From 5d59c8da2c6f78cf91c03cb94794b236e402fbc2 Mon Sep 17 00:00:00 2001
From: JD Bothma <jbothma@gmail.com>
Date: Thu, 19 Sep 2024 09:12:13 +0100
Subject: [PATCH] Update expected scores

---
 nomenklatura/data/regression-v3.pkl  | Bin 2603 -> 2603 bytes
 nomenklatura/xref.py                 |   1 -
 tests/matching/test_regression_v3.py |  15 ++++++++-------
 tests/test_xref.py                   |  10 ++++------
 4 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/nomenklatura/data/regression-v3.pkl b/nomenklatura/data/regression-v3.pkl
index 39078a8cc25b30f546c93b377e7cb5a1b677b895..7a95c01cc2de40065f8102901b3ff4600ab7fa80 100644
GIT binary patch
delta 984
zcma)&c}$XV7{?XRmp5mS({439@u-=(a=P|pmYLcC%bKp#UeeJ9QV!QKgUX}q#^1YI
zF3YsmA8dG*>6l<61WFL-HZsejOYLwSv$9QI|8T3V|G#^l&*!7`R(jVrxXO>&*Ig90
zr2wm8>nxS>19W!c5wp2G7(P?n0W3cB^>N3nGtF=<=^5fK9EQ@`V!ml)1P0uWcD1IC
z!YAWFTa%~)Zg9n3l1=Gw&vlU{>(DUduHCQL{8I#LcD0?p5LW~JtVGlE#@ArI_|)Fs
z(*oB>nSHS#8b#5pG=e~M`r`jaiHUWhJZyKO7-)Y-Iq?sa-?YD@)X_ua)xJE7y*UL`
z%WLfc#s=8N3)L3|@<4W;5gs~&542H<%`u?|wT@>b`fEX_ef0fSmI4O(^Abt}O2ATh
zve(C44huX#pV^w64ja>~^i9XLFlMC#>y-!!l2$D(Ag@9ZJ19f6s~p6(6;X`zk1)zD
zWq0YL(G~_*{w-+U+OGI2_}(7ED0|TjB?{S!m1nEr$M|XjepSQi-OI+@IXz&m>5lD*
zGs3=`jg=irEku*|X+cCQ_*4cDiq&c`MBaJnCar-n<C<^FT^@r+OJ%+#PX&*jOlW#r
zIw7H}F@F0IHC(8<9+MLO5%iA0#;9~f(>O!#I+Nw%;zCME;*^^}lADKe7~XQ%RCXCD
z^es<3+^o{`AMDU+IPX~{{sTkswpg!tRwUEZC0yE@kyotom&S&vo84FSBwOQ_yy*wM
zsba28W&jgb<wa~XgRY~yHQ1<yJ+s-Nv4m3N$h4$AzdHyHf&5ri=2MM;@QGmBOb~E8
zdxKX{gJuZ-@-}1!ryd01!|(E~1|-5F-R~{EvLGiOUB=d96b~A~{CHDLWEsgPPkHPS
zO7e5F#XF?}At{U(`X(}`V5vc_c;{}>w84~xbHGOD@Hy^IBXp4Ihfz}K3ez8>#L!*l
zB8-wlADHtoN)UPCeqmFR&U2muNnyUU;J~zxPJgro55uSgs00ths0>JjFT|)6$cp=6
VlQ}r`=qnz~riy4LuNM|B`2%FK$?X6D

delta 984
zcma)&Ye-XZ7>7MNcQ&shT~_22XO}@*Rz#HbYFZg8>6&IlnAvKA)1@1wg_pGH%G>%M
zOGzzP%hIxG*veVQQm2z-ji8a;BvWdcZBqK73xPX+5d6^hAMg7-Pm6bpx26rt*C*_8
zHe!)r_6l>x@+LrS4Dp#keBhckH-~$sK+;=l$+g!*5NG;&pO~2dlK$qnr)3hXjKu;&
z$anCaS*q@fxd@$=Yd^;d62Yct^_IqdgO!dymtlG7(3Q&HcN0{Q*Ryx(UD7B#GzPd`
zy3hu@GnyU;KjNWvjwZ)l$B>g#(0`-c9<igG_pzho(iTRM^e&8|7+V<Sp3};Fe}+=p
z`ZoeZ+O!Si>2~m2%#mNL;KPh1NX(<Bz;tMMzj9O$M)92oF;Qy3vInY5R7#L!)wuC(
za$uZG>lc<PAfy--6~!cidi6lz4dYWN2!3}<bu=AzSWMc$+C~Wfz35N-B?XAgvh4$_
zY%o-~d~8(9Ljv0B{Q41_?rd}|H0@?rr>!?Z@it8?Z)+oD<UZ4L_BVm!lujDYdIRMw
zG~K;q5VmcKatnIi4c_cmq7zGWU}SnF$*a4d{0cX|U{nP|EmNU)KbRpWxN)#JuoW)H
z^*nLW^~3D|pBj067hoeBAx>$9QwLsFcxuY-|Dx$J54MAY10f}jlsTtn3Xh?~H1B*Y
znk6ShzKV!~ouRSg!`r*bPwjZ*NwB~(t~LIPR!JUaZ#n7Ni%9Ink3f|dJ*+xAx$j`z
zIEXuk&Fl^Xa2paD6K|S8b4}t=*VhT$9$TgMwT5)feqU~G9*2%BNpx3>i9CJ?8>Y2A
zg{El(5q-U#6zJmq2wj8*$g_O-^|7rNrQ>VpRW+N=^h;3#z6Ymx&?L@@pJO8A1fM8#
zm54GkgvsI*X{v}2Z58=OFy>&Xfl!>1mOgJVXJH?(P!-dMZZ|@?3=U37p{oo(oDxHQ
z41b)GLlcZ;I3<YOnH=7nr2U+GYNjkhnw2x}qun0~m^_?HfQp%1oXUV&nago11u`)?
V_-qcL0ZlVku&5&B>~)ze_#b8e$T9!`

diff --git a/nomenklatura/xref.py b/nomenklatura/xref.py
index b3d8447e..da0a845c 100644
--- a/nomenklatura/xref.py
+++ b/nomenklatura/xref.py
@@ -78,7 +78,6 @@ def xref(
 
             if scored:
                 result = algorithm.compare(left, right)
-                #print("xref", result)
                 score = result.score
 
             scores.append(score)
diff --git a/tests/matching/test_regression_v3.py b/tests/matching/test_regression_v3.py
index d98c38e3..3407111e 100644
--- a/tests/matching/test_regression_v3.py
+++ b/tests/matching/test_regression_v3.py
@@ -132,7 +132,7 @@ def test_name_country():
     data["id"] = "mike2"
     e2 = Entity.from_dict(model, data)
     res = RegressionV3.compare(e1, e2)
-    assert 0.89 < res.score < 0.93, res
+    assert 0.92 < res.score < 0.95, res
 
 
 def test_name_match():
@@ -171,7 +171,7 @@ def test_name_address():
             "id": "a",
             "schema": "Company",
             "properties": {
-                "name": ["The AAA Weapons and Munitions Factory Joint Stock Company"],
+                "name": ["The AAA Weapons and MunitionS Factory Joint Stock Company"],
                 "address": ["Moscow"],
             },
         },
@@ -182,7 +182,7 @@ def test_name_address():
             "id": "b",
             "schema": "Company",
             "properties": {
-                "name": ["The BBB Weapons and Munitions Factory Joint Stock Company"],
+                "name": ["The BBB Weapons and MunitionS Factory Joint Stock Company"],
                 "address": ["Moscow"],
             },
         },
@@ -193,16 +193,17 @@ def test_name_address():
             "id": "c",
             "schema": "Company",
             "properties": {
-                "name": ["The AAA Weapons and Ammunition Factory Joint Stock Company"],
+                "name": ["The AAA Weapons and MunitioN Factory Joint Stock Company"],
                 "address": ["Moscow"],
             },
         },
     )
     ac = RegressionV3.compare(a, c)
-    assert 0.5 < ac.score < 0.9
+    assert 0.87 < ac.score < 0.93
     ab = RegressionV3.compare(a, b)
-    assert 0.5 < ab.score < 0.9
-
+    assert 0.87 < ab.score < 0.93
+    bc = RegressionV3.compare(b, c)
+    assert 0.84 < bc.score < 0.93
 
 def test_isin():
     """name and country together shouldn't be too strong"""
diff --git a/tests/test_xref.py b/tests/test_xref.py
index 29281433..4a99e504 100644
--- a/tests/test_xref.py
+++ b/tests/test_xref.py
@@ -43,7 +43,7 @@ def test_xref_potential_conflicts(
             "id": "a",
             "schema": "Company",
             "properties": {
-                "name": ["The AAA Weapons and Munitions Factory Joint Stock Company"],
+                "name": ["The AAA Weapons and MunitionS Factory Joint Stock Company"],
                 "address": ["Moscow"],
             },
         },
@@ -54,7 +54,7 @@ def test_xref_potential_conflicts(
             "id": "b",
             "schema": "Company",
             "properties": {
-                "name": ["The BBB Weapons and Munitions Factory Joint Stock Company"],
+                "name": ["The BBB Weapons and MunitionS Factory Joint Stock Company"],
                 "address": ["Moscow"],
             },
         },
@@ -65,7 +65,7 @@ def test_xref_potential_conflicts(
             "id": "c",
             "schema": "Company",
             "properties": {
-                "name": ["The AAA Weapons and Ammunition Factory Joint Stock Company"],
+                "name": ["The AAA Weapons and MunitioN Factory Joint Stock Company"],
                 "address": ["Moscow"],
             },
         },
@@ -83,7 +83,7 @@ def test_xref_potential_conflicts(
         store,
         index_path,
         algorithm=RegressionV3,
-        conflicting_match_threshold=0.9,
+        conflicting_match_threshold=0.8,
     )
     stdout = capsys.readouterr().out
 
@@ -95,5 +95,3 @@ def test_xref_potential_conflicts(
     assert a.get("name")[0] in flat, stdout
     assert b.get("name")[0] in flat, stdout
     assert c.get("name")[0] in flat, stdout
-    print(stdout)
-    assert False