Skip to content

Commit

Permalink
use dns:source instead of dc:source
Browse files Browse the repository at this point in the history
  • Loading branch information
simongray committed Nov 27, 2023
1 parent 255c1ec commit 532f10d
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 4 deletions.
2 changes: 1 addition & 1 deletion examples/ddo_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
g.parse('../export/rdf/dannet.ttl')

# define a SPARQL query to retrieve DDO source URLs
q = "SELECT * WHERE { ?resource <http://purl.org/dc/terms/source> ?source }"
q = "SELECT * WHERE { ?resource dns:source ?source }"

# reuse input prefixes for output, mostly for aesthetics
nm = NamespaceManager(g)
Expand Down
4 changes: 2 additions & 2 deletions resources/schemas/internal/dannet-schema.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,8 @@
rdfs:isDefinedBy <https://wordnet.dk/dannet/schema> .

:source a owl:ObjectProperty ;
rdfs:comment "A direct reference to the source of a specific lexical unit."@en ;
rdfs:comment "En direkte reference til kilden for en specifik leksikalsk enhed."@da ;
rdfs:comment "A direct reference to the source of a specific lexical unit, e.g. a dictionary entry."@en ;
rdfs:comment "En direkte reference til kilden for en specifik leksikalsk enhed, f.eks. et ordbogsopslag."@da ;
rdfs:label "source"@en ;
rdfs:label "kilde"@da ;
rdfs:isDefinedBy <https://wordnet.dk/dannet/schema> .
27 changes: 26 additions & 1 deletion src/main/dk/cst/dannet/db/bootstrap.clj
Original file line number Diff line number Diff line change
Expand Up @@ -268,13 +268,15 @@
(map row->triples)
(doall))))

;; TODO: remove
(defn ->freq-triples
[[ddo_entryid _ ddo_artikeltyngde :as row]]
(when (not-empty ddo_artikeltyngde)
(let [word (shared/word-uri ddo_entryid)
value (Integer/parseUnsignedInt ddo_artikeltyngde)]
#{[word :dns/ddoFrequency value]})))

;; TODO: remove
(defn add-word-frequency!
"Add word frequency data from DDO; useful for ranking/selecting labels."
[dataset]
Expand All @@ -299,6 +301,29 @@
(doseq [word unlabeled]
(db/remove! model [word :dns/ddoFrequency '_]))))))

(defn fix-source-relations!
"Use a custom source relation that is less strict than dc:source and less
prone to prefix mix-up (dc can default to a different IRI, e.g. in rdflib)."
[dataset]
(println "... finding existing source relations")
(let [graph (db/get-graph dataset prefix/dn-uri)
model (db/get-model dataset prefix/dn-uri)
triples (->> (op/sparql "SELECT ?resource ?source
WHERE {
?resource <http://purl.org/dc/terms/source> ?source .
}")
(q/run-basic graph)
(mapv (fn [{:syms [?resource ?source]}]
[?resource :dns/source ?source]))
(set))]
(txn/transact-exec model
(println "... removing" (count triples) "sense labels")
(doseq [[?resource] triples]
(db/remove! model [?resource "<http://purl.org/dc/terms/source>" '_])))
(txn/transact-exec graph
(println "... adding" (count triples) "fixed sense labels")
(db/safe-add! graph triples))))

(defn fix-sense-label-lang!
"Add word frequency data from DDO; useful for ranking/selecting labels."
[dataset]
Expand Down Expand Up @@ -369,7 +394,7 @@
(let [expected-release "2023-09-28-SNAPSHOT"]
(assert (= current-release expected-release)) ; another check
(println "Applying release changes for" expected-release "...")
(add-word-frequency! dataset)
(fix-source-relations! dataset)
(fix-sense-label-lang! dataset)
(add-abridged-labels! dataset)
(println "Release changes applied!")))
Expand Down

0 comments on commit 532f10d

Please sign in to comment.