Skip to content

Commit

Permalink
Move sparql term parsing to wikidata_value
Browse files Browse the repository at this point in the history
  • Loading branch information
dseomn committed Nov 9, 2023
1 parent c947ba0 commit d2ba707
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 63 deletions.
25 changes: 7 additions & 18 deletions rock_paper_sand/wikidata.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,20 +69,6 @@ def requests_session() -> Generator[requests.Session, None, None]:
yield session


def _parse_sparql_result_item(term: Any) -> wikidata_value.ItemRef:
if term["type"] != "uri":
raise ValueError(f"Cannot parse non-uri term as an item: {term}")
return wikidata_value.ItemRef.from_uri(term["value"])


def _parse_sparql_result_string(term: Any) -> str:
if term["type"] != "literal":
raise ValueError(f"Cannot parse non-literal term as a string: {term}")
if term.keys() & {"datatype", "xml:lang"}:
raise ValueError(f"Cannot parse non-plain literal as a string: {term}")
return term["value"]


@dataclasses.dataclass(frozen=True, kw_only=True)
class RelatedMedia:
"""Media or media groups related to a media item.
Expand Down Expand Up @@ -200,7 +186,8 @@ def transitive_subclasses(
"}"
)
self._transitive_subclasses[class_id] = frozenset(
_parse_sparql_result_item(result["class"]) for result in results
wikidata_value.parse_sparql_term_item(result["class"])
for result in results
)
return self._transitive_subclasses[class_id]

Expand Down Expand Up @@ -255,14 +242,16 @@ def related_media(self, item_id: wikidata_value.ItemRef) -> RelatedMedia:
collections.defaultdict[str, set[wikidata_value.ItemRef]]
) = collections.defaultdict(set)
for result in results:
related_item = _parse_sparql_result_item(result["item"])
related_item = wikidata_value.parse_sparql_term_item(
result["item"]
)
related_item_classes = item_classes[related_item]
if "class" in result:
related_item_classes.add(
_parse_sparql_result_item(result["class"])
wikidata_value.parse_sparql_term_item(result["class"])
)
items_by_relation[
_parse_sparql_result_string(result["relation"])
wikidata_value.parse_sparql_term_string(result["relation"])
].add(related_item)
for related_item, classes in item_classes.items():
self._entity_classes.setdefault(
Expand Down
45 changes: 0 additions & 45 deletions rock_paper_sand/wikidata_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,51 +312,6 @@ def test_related_media_error(self) -> None:
self._api.related_media(wikidata_value.ItemRef("Q1"))


class WikidataUtilsTest(parameterized.TestCase):
# pylint: disable=protected-access

def test_parse_sparql_result_item_error(self) -> None:
with self.assertRaisesRegex(ValueError, "non-uri"):
wikidata._parse_sparql_result_item({"type": "literal"})

def test_parse_sparql_result_item(self) -> None:
self.assertEqual(
wikidata_value.ItemRef("Q1"),
wikidata._parse_sparql_result_item(_sparql_item("Q1")),
)

@parameterized.named_parameters(
dict(
testcase_name="not_literal",
term={"type": "uri"},
error_regex=r"non-literal",
),
dict(
testcase_name="not_plain",
term={
"type": "literal",
"value": "Alice",
"datatype": "https://example.com/person",
},
error_regex=r"non-plain",
),
)
def test_parse_sparql_result_string_error(
self,
*,
term: Any,
error_regex: str,
) -> None:
with self.assertRaisesRegex(ValueError, error_regex):
wikidata._parse_sparql_result_string(term)

def test_parse_sparql_result_string(self) -> None:
self.assertEqual(
"foo",
wikidata._parse_sparql_result_string(_sparql_string("foo")),
)


class WikidataFilterTest(parameterized.TestCase):
def setUp(self) -> None:
super().setUp()
Expand Down
20 changes: 20 additions & 0 deletions rock_paper_sand/wikidata_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,3 +356,23 @@ def truthy_statements(
for statement in statements
if statement["rank"] == "normal"
)


# https://www.w3.org/TR/2013/REC-sparql11-results-json-20130321/#select-encode-terms
SparqlTerm = Mapping[str, Any]


def parse_sparql_term_item(term: SparqlTerm) -> ItemRef:
"""Returns an item value from a term."""
if term["type"] != "uri":
raise ValueError(f"Cannot parse non-uri term as an item: {term}")
return ItemRef.from_uri(term["value"])


def parse_sparql_term_string(term: SparqlTerm) -> str:
"""Returns an string value from a term."""
if term["type"] != "literal":
raise ValueError(f"Cannot parse non-literal term as a string: {term}")
if term.keys() & {"datatype", "xml:lang"}:
raise ValueError(f"Cannot parse non-plain literal as a string: {term}")
return term["value"]
45 changes: 45 additions & 0 deletions rock_paper_sand/wikidata_value_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,51 @@ def test_truthy_statements(
wikidata_value.Entity(json_full=entity).truthy_statements(prop),
)

def test_parse_sparql_term_item_error(self) -> None:
with self.assertRaisesRegex(ValueError, "non-uri"):
wikidata_value.parse_sparql_term_item({"type": "literal"})

def test_parse_sparql_term_item(self) -> None:
self.assertEqual(
wikidata_value.ItemRef("Q1"),
wikidata_value.parse_sparql_term_item(
{"type": "uri", "value": "http://www.wikidata.org/entity/Q1"}
),
)

@parameterized.named_parameters(
dict(
testcase_name="not_literal",
term={"type": "uri"},
error_regex=r"non-literal",
),
dict(
testcase_name="not_plain",
term={
"type": "literal",
"value": "Alice",
"datatype": "https://example.com/person",
},
error_regex=r"non-plain",
),
)
def test_parse_sparql_term_string_error(
self,
*,
term: wikidata_value.SparqlTerm,
error_regex: str,
) -> None:
with self.assertRaisesRegex(ValueError, error_regex):
wikidata_value.parse_sparql_term_string(term)

def test_parse_sparql_term_string(self) -> None:
self.assertEqual(
"foo",
wikidata_value.parse_sparql_term_string(
{"type": "literal", "value": "foo"}
),
)


if __name__ == "__main__":
absltest.main()

0 comments on commit d2ba707

Please sign in to comment.