From e798a631cd499ced693f42895bc524c760c7e482 Mon Sep 17 00:00:00 2001 From: Nikita Prokopov Date: Mon, 17 Jul 2023 23:18:04 +0200 Subject: [PATCH] subslice benchmarks, roam dataset --- bench/datascript/bench/datascript.cljc | 177 +++++++++++++++++++++---- dev/roam.clj | 56 ++++++++ dev/roam.cljs | 68 ++++++++++ project.clj | 16 ++- script/test_cljs.js | 29 +--- src/datascript/db.cljc | 6 + 6 files changed, 302 insertions(+), 50 deletions(-) create mode 100644 dev/roam.clj create mode 100644 dev/roam.cljs diff --git a/bench/datascript/bench/datascript.cljc b/bench/datascript/bench/datascript.cljc index 1bc280da..abc269dd 100644 --- a/bench/datascript/bench/datascript.cljc +++ b/bench/datascript/bench/datascript.cljc @@ -1,11 +1,19 @@ (ns datascript.bench.datascript (:require - [datascript.core :as d] - [datascript.bench.bench :as bench] - #?(:clj [jsonista.core :as jsonista]))) + [datascript.core :as d] + [datascript.db :as db] + [datascript.bench.bench :as bench] + [datascript.test] + [me.tonsky.persistent-sorted-set :as pss] + [me.tonsky.persistent-sorted-set.arrays :as arrays] + #?(:clj [jsonista.core :as jsonista]) + #?(:cljs [goog.object :as gobj]))) #?(:cljs (enable-console-print!)) +#?(:cljs + (def fs (js/require "fs"))) + (def schema {:id {:db/unique :db.unique/identity} :follows {:db/valueType :db.type/ref @@ -77,31 +85,31 @@ (bench/bench (d/q '[:find ?e ?a :where [?e :name "Ivan"] - [?e :age ?a]] + [?e :age ?a]] @*db100k))) (defn bench-q3 [] (bench/bench (d/q '[:find ?e ?a :where [?e :name "Ivan"] - [?e :age ?a] - [?e :sex :male]] + [?e :age ?a] + [?e :sex :male]] @*db100k))) (defn bench-q4 [] (bench/bench (d/q '[:find ?e ?l ?a :where [?e :name "Ivan"] - [?e :last-name ?l] - [?e :age ?a] - [?e :sex :male]] + [?e :last-name ?l] + [?e :age ?a] + [?e :sex :male]] @*db100k))) (defn bench-qpred1 [] (bench/bench (d/q '[:find ?e ?s :where [?e :salary ?s] - [(> ?s 50000)]] + [(> ?s 50000)]] @*db100k))) (defn bench-qpred2 [] @@ -109,7 +117,7 @@ (d/q '[:find ?e ?s :in $ ?min_s :where [?e :salary ?s] - [(> ?s ?min_s)]] + [(> ?s ?min_s)]] @*db100k 50000))) (def *pull-db @@ -148,12 +156,12 @@ (d/q '[:find ?e ?e2 :in $ % :where (follows ?e ?e2)] - db - '[[(follows ?x ?y) - [?x :follows ?y]] - [(follows ?x ?y) - [?x :follows ?t] - (follows ?t ?y)]])) + db + '[[(follows ?x ?y) + [?x :follows ?y]] + [(follows ?x ?y) + [?x :follows ?t] + (follows ?t ?y)]])) (defn bench-rules-wide-3x3 [] (let [db (wide-db 3 3)] @@ -192,16 +200,129 @@ (take 10000) (vec)))) -(defn bench-subslice-eavt [] +(defn bench-slice-eavt [] (bench/bench (doseq [id @*ids10k] (-> (d/datoms @*db100k :eavt id :namespaced/full-name) first :v)))) -(defn bench-subslice-aevt [] +(defn bench-slice-aevt [] (bench/bench (doseq [id @*ids10k] (-> (d/datoms @*db100k :aevt :namespaced/full-name id) first :v)))) +(defn bench-subslice-aevt-cp [] + (bench/bench + (doseq [:let [db @*db100k + slice (d/datoms db :aevt :namespaced/full-name)] + id @*ids10k + :let [from (db/components->pattern db :aevt :namespaced/full-name id nil nil db/e0 db/tx0) + to (db/components->pattern db :aevt :namespaced/full-name id nil nil db/emax db/txmax)]] + (-> (pss/slice slice from to db/cmp-datoms-evt) first :v)))) + +(defn bench-seek [] + (bench/bench + (doseq [:let [db @*db100k] + id @*ids10k + :let [from (db/datom id :namespaced/full-name nil db/tx0) + to (db/datom id :namespaced/full-name nil db/txmax)]]))) + +(defn bench-seek-rd [] + (bench/bench + (doseq [:let [db @*db100k] + id @*ids10k + :let [from (db/resolve-datom db id :namespaced/full-name nil nil db/e0 db/tx0) + to (db/resolve-datom db id :namespaced/full-name nil nil db/emax db/txmax)]]))) + +(defn bench-seek-cp [] + (bench/bench + (doseq [:let [db @*db100k] + id @*ids10k + :let [from (db/components->pattern db :aevt :namespaced/full-name id nil nil db/e0 db/tx0) + to (db/components->pattern db :aevt :namespaced/full-name id nil nil db/emax db/txmax)]]))) + +(defn bench-subslice-aevt [] + (bench/bench + (doseq [:let [db @*db100k + slice (d/datoms db :aevt :namespaced/full-name)] + id @*ids10k + :let [from (db/datom id :namespaced/full-name nil db/tx0) + to (db/datom id :namespaced/full-name nil db/txmax)]] + (-> (pss/slice slice from to db/cmp-datoms-evt) first :v)))) + +(defn map-to [ks val-fn] + (persistent! + (reduce + (fn [m k] + (assoc! m k (val-fn k))) + (transient {}) + ks))) + +#?(:cljs + (def *roam-db + (delay + (let [fun (gobj/get fs "readFileSync") + file (fun "/Users/tonsky/ws/roam/db_3M.json") + parsed (js/JSON.parse file)] + (d/from-serializable parsed))))) + +#?(:cljs + (def *roam-ids + (delay + (let [index (vec (d/datoms @*roam-db :aevt :block/parents))] + (->> #(rand-nth index) + (repeatedly 10000) + (map :e) + (into #{})))))) + +#?(:cljs + (defn bench-roam-index [] + (bench/bench + (let [index (js/Object.)] + (doseq [d (d/datoms @*roam-db :aevt :block/parents)] + (gobj/set index (.-e d) (.-v d))) + (map-to @*roam-ids #(gobj/get index %)))))) + +#?(:cljs + (defn bench-roam-eavt [] + (bench/bench + (map-to @*roam-ids + #(-> (d/datoms @*roam-db :eavt % :block/parents) + first + :v))))) + +#?(:cljs + (defn bench-roam-aevt [] + (bench/bench + (map-to @*roam-ids + #(-> (d/datoms @*roam-db :aevt :block/parents %) + first + :v))))) + +#?(:cljs + (defn bench-roam-aevt-slice [] + (bench/bench + (let [slice (d/datoms @*roam-db :aevt :block/parents)] + (map-to @*roam-ids + (fn [id] + (let [from (db/datom id :block/parents nil db/tx0) + to (db/datom id :block/parents nil db/txmax)] + (some-> (pss/slice slice from to db/cmp-datoms-evt) + first + (.-v))))))))) + +#?(:cljs + (defn bench-roam-aevt-slice-cp [] + (bench/bench + (let [db @*roam-db + slice (d/datoms db :aevt :block/parents)] + (map-to @*roam-ids + (fn [id] + (let [from (db/components->pattern db :aevt :block/parents id nil nil db/e0 db/tx0) + to (db/components->pattern db :aevt :block/parents id nil nil db/emax db/txmax)] + (some-> (pss/slice slice from to db/cmp-datoms-evt) + first + (.-v))))))))) + (def *serialize-db (delay (d/db-with empty-db @@ -244,15 +365,25 @@ "rules-long-10x3" bench-rules-long-10x3 "rules-long-30x3" bench-rules-long-30x3 "rules-long-30x5" bench-rules-long-30x5 - "subslice-eavt" bench-subslice-eavt + "seek" bench-seek + "seek-cp" bench-seek-cp + "seek-rd" bench-seek-rd + "slice-eavt" bench-slice-eavt + "slice-aevt" bench-slice-aevt "subslice-aevt" bench-subslice-aevt + "subslice-aevt-cp" bench-subslice-aevt-cp "freeze" bench-freeze - "thaw" bench-thaw}) + "thaw" bench-thaw + #?@(:cljs ["roam-index" bench-roam-index + "roam-eavt" bench-roam-eavt + "roam-aevt" bench-roam-aevt + "roam-aevt-slice" bench-roam-aevt-slice + "roam-aevt-slice-cp" bench-roam-aevt-slice-cp])}) (defn ^:export -main "clj -A:bench -M -m datascript.bench.datascript [--profile] (add-1 | add-5 | ...)*" [& args] - (let [profile? (.contains (or args ()) "--profile") + (let [profile? (contains? (set (or args ())) "--profile") args (remove #{"--profile"} args) names (or (not-empty args) (sort (keys benches))) _ (apply println #?(:clj "CLJ:" :cljs "CLJS:") names) @@ -303,4 +434,4 @@ (bench-rules-long-30x3) (bench-rules-long-30x5) (bench-freeze) - (bench-thaw)) \ No newline at end of file + (bench-thaw)) diff --git a/dev/roam.clj b/dev/roam.clj new file mode 100644 index 00000000..3e138665 --- /dev/null +++ b/dev/roam.clj @@ -0,0 +1,56 @@ +(ns roam + (:require + [clojure.java.io :as io] + [cognitect.transit :as transit] + [datascript.core :as d] + [datascript.db :as db])) + +(def read-handlers + {"datascript/DB" (transit/read-handler db/db-from-reader) + "datascript/Datom" (transit/read-handler db/datom-from-reader)}) + +(defn transit-read [s type] + (with-open [is (java.io.ByteArrayInputStream. s)] + (transit/read (transit/reader is type {:handlers read-handlers})))) + +(defn transit-read-str [s] + (transit-read (.getBytes ^String s "UTF-8") :json)) + +(defonce db + (with-open [is (io/input-stream "../roam/db_3M.transit")] + (transit/read (transit/reader is :json {:handlers read-handlers})))) + +(def ids + (let [index (vec (d/datoms db :aevt :block/parents))] + (->> #(rand-nth index) + (repeatedly 10000) + (map :e) + (into #{})))) + +(comment + (time + (persistent! + (reduce + (fn [m eid] + (let [parents (d/datoms db :aevt :block/parents eid)] + (assoc! m eid (:v (first parents))))) + (transient {}) + ids))) + + (count (d/datoms db :aevt :block/parents)) + + (time + (let [index (persistent! + (reduce + (fn [m d] + (assoc! m (:e d) (:v d))) + (transient {}) + (d/datoms db :aevt :block/parents)))] + (persistent! + (reduce + (fn [m eid] + (assoc! m eid (get index eid))) + (transient {}) + ids)))) + ) + diff --git a/dev/roam.cljs b/dev/roam.cljs new file mode 100644 index 00000000..ee8c3d67 --- /dev/null +++ b/dev/roam.cljs @@ -0,0 +1,68 @@ +(ns roam + (:require + [fs] + [cognitect.transit :as transit] + [datascript.bench.bench :refer-macros [dotime measure]] + [datascript.core :as d] + [datascript.db :as db])) + +(def read-handlers + {"datascript/DB" (transit/read-handler db/db-from-reader) + "datascript/Datom" (transit/read-handler db/datom-from-reader)}) + +(defn transit-read [s type] + (transit/read (transit/reader type {:handlers read-handlers}) s)) + +(defn transit-read-str [s] + (transit-read s :json)) + +(def file + (fs/readFileSync "/Users/tonsky/ws/roam/db_3M.json")) + +(def parsed + (js/JSON.parse file)) + +(def db + (d/from-serializable parsed)) + +(def ids + (let [index (vec (d/datoms db :aevt :block/parents))] + (->> #(rand-nth index) + (repeatedly 10000) + (map :e) + (into #{})))) + +(defn map-to [ks val-fn] + (persistent! + (reduce + (fn [m k] + (assoc! m k (val-fn k))) + (transient {}) + ks))) + +(comment + (count db) + (count ids) + (count (d/datoms db :aevt :block/parents)) + + (dotime 10000 + (let [index (js/Object.)] + (doseq [d (d/datoms db :aevt :block/parents)] + (aset index (.-e d) (.-v d))) + (map-to ids #(aget index %)))) + + (dotime 10000 + (map-to ids + #(-> (d/datoms db :eavt % :block/parents) + (first) + (.-v)))) + + (dotime 10000 + (map-to ids + #(-> (d/datoms db :aevt :block/parents %) + (first) + (.-v)))) + ) + +(defn -main [& args] + (println "Loaded" (count db) "datoms")) diff --git a/project.clj b/project.clj index ab5a4ab3..4fd69e7d 100644 --- a/project.clj +++ b/project.clj @@ -6,10 +6,17 @@ :url "http://www.eclipse.org/legal/epl-v10.html"} :url "https://github.com/tonsky/datascript" + + :source-paths [ + "src" + "/Users/tonsky/ws/persistent-sorted-set/src-clojure" + "/Users/tonsky/ws/persistent-sorted-set/target/classes" + ] + :dependencies [ [org.clojure/clojure "1.10.2" :scope "provided"] [org.clojure/clojurescript "1.10.844" :scope "provided"] - [persistent-sorted-set "0.2.3"] + #_[persistent-sorted-set "0.2.3"] ] :plugins [ @@ -52,19 +59,20 @@ }} { :id "bench" - :source-paths ["src" "bench"] + :source-paths ["src" "bench" "/Users/tonsky/ws/persistent-sorted-set/src-clojure"] :compiler { :main datascript.bench.datascript :output-to "target/datascript.js" :optimizations :advanced ; :source-map "target/datascript.js.map" - ; :pretty-print true + :pretty-print true :recompile-dependents false :parallel-build true :checked-arrays :warn - ; :pseudo-names true + :pseudo-names true :fn-invoke-direct true :elide-asserts true + :target :nodejs }} { :id "none" diff --git a/script/test_cljs.js b/script/test_cljs.js index 3d1f0780..e151bbbc 100644 --- a/script/test_cljs.js +++ b/script/test_cljs.js @@ -1,30 +1,13 @@ #! node -var fs = require('fs'), - vm = require('vm'); - -global.performance = { now: function () { - var t = process.hrtime(); - return t[0] * 1000 + t[1] / 1000000; -} } - -global.goog = {}; - -global.CLOSURE_IMPORT_SCRIPT = function(src) { - require('./target/none/goog/' + src); - return true; -}; - -function nodeGlobalRequire(file) { - vm.runInThisContext.call(global, fs.readFileSync(file), file); +global.performance = { + now: function () { + var t = process.hrtime(); + return t[0] * 1000 + t[1] / 1000000; + } } -if (fs.existsSync("./target/none")) { - nodeGlobalRequire('./target/none/goog/base.js'); - nodeGlobalRequire('./target/none/cljs_deps.js'); - goog.require('datascript.test'); -} else - nodeGlobalRequire('./target/datascript.js'); +require("../target/datascript.js"); var res = datascript.test.test_cljs(); diff --git a/src/datascript/db.cljc b/src/datascript/db.cljc index 3a8672e5..f3e26bda 100644 --- a/src/datascript/db.cljc +++ b/src/datascript/db.cljc @@ -503,6 +503,12 @@ (value-cmp (.-v d1) (.-v d2)) (int-compare (datom-tx d1) (datom-tx d2)))) +(defcomp cmp-datoms-evt ^long [^Datom d1, ^Datom d2] + (combine-cmp + (int-compare (.-e d1) (.-e d2)) + (value-cmp (.-v d1) (.-v d2)) + (int-compare (datom-tx d1) (datom-tx d2)))) + (defcomp cmp-datoms-avet ^long [^Datom d1, ^Datom d2] (combine-cmp (cmp (.-a d1) (.-a d2))