Skip to content

Commit

Permalink
Update expected scores
Browse files Browse the repository at this point in the history
  • Loading branch information
jbothma committed Sep 19, 2024
1 parent 4544687 commit 5d59c8d
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 14 deletions.
Binary file modified nomenklatura/data/regression-v3.pkl
Binary file not shown.
1 change: 0 additions & 1 deletion nomenklatura/xref.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def xref(

if scored:
result = algorithm.compare(left, right)
#print("xref", result)
score = result.score

scores.append(score)
Expand Down
15 changes: 8 additions & 7 deletions tests/matching/test_regression_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def test_name_country():
data["id"] = "mike2"
e2 = Entity.from_dict(model, data)
res = RegressionV3.compare(e1, e2)
assert 0.89 < res.score < 0.93, res
assert 0.92 < res.score < 0.95, res


def test_name_match():
Expand Down Expand Up @@ -171,7 +171,7 @@ def test_name_address():
"id": "a",
"schema": "Company",
"properties": {
"name": ["The AAA Weapons and Munitions Factory Joint Stock Company"],
"name": ["The AAA Weapons and MunitionS Factory Joint Stock Company"],
"address": ["Moscow"],
},
},
Expand All @@ -182,7 +182,7 @@ def test_name_address():
"id": "b",
"schema": "Company",
"properties": {
"name": ["The BBB Weapons and Munitions Factory Joint Stock Company"],
"name": ["The BBB Weapons and MunitionS Factory Joint Stock Company"],
"address": ["Moscow"],
},
},
Expand All @@ -193,16 +193,17 @@ def test_name_address():
"id": "c",
"schema": "Company",
"properties": {
"name": ["The AAA Weapons and Ammunition Factory Joint Stock Company"],
"name": ["The AAA Weapons and MunitioN Factory Joint Stock Company"],
"address": ["Moscow"],
},
},
)
ac = RegressionV3.compare(a, c)
assert 0.5 < ac.score < 0.9
assert 0.87 < ac.score < 0.93
ab = RegressionV3.compare(a, b)
assert 0.5 < ab.score < 0.9

assert 0.87 < ab.score < 0.93
bc = RegressionV3.compare(b, c)
assert 0.84 < bc.score < 0.93

def test_isin():
"""name and country together shouldn't be too strong"""
Expand Down
10 changes: 4 additions & 6 deletions tests/test_xref.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_xref_potential_conflicts(
"id": "a",
"schema": "Company",
"properties": {
"name": ["The AAA Weapons and Munitions Factory Joint Stock Company"],
"name": ["The AAA Weapons and MunitionS Factory Joint Stock Company"],
"address": ["Moscow"],
},
},
Expand All @@ -54,7 +54,7 @@ def test_xref_potential_conflicts(
"id": "b",
"schema": "Company",
"properties": {
"name": ["The BBB Weapons and Munitions Factory Joint Stock Company"],
"name": ["The BBB Weapons and MunitionS Factory Joint Stock Company"],
"address": ["Moscow"],
},
},
Expand All @@ -65,7 +65,7 @@ def test_xref_potential_conflicts(
"id": "c",
"schema": "Company",
"properties": {
"name": ["The AAA Weapons and Ammunition Factory Joint Stock Company"],
"name": ["The AAA Weapons and MunitioN Factory Joint Stock Company"],
"address": ["Moscow"],
},
},
Expand All @@ -83,7 +83,7 @@ def test_xref_potential_conflicts(
store,
index_path,
algorithm=RegressionV3,
conflicting_match_threshold=0.9,
conflicting_match_threshold=0.8,
)
stdout = capsys.readouterr().out

Expand All @@ -95,5 +95,3 @@ def test_xref_potential_conflicts(
assert a.get("name")[0] in flat, stdout
assert b.get("name")[0] in flat, stdout
assert c.get("name")[0] in flat, stdout
print(stdout)
assert False

0 comments on commit 5d59c8d

Please sign in to comment.