From e5c3997a9059e3196d31afcd82c8d23b5956f69a Mon Sep 17 00:00:00 2001
From: Friedrich Lindenberg <friedrich@pudo.org>
Date: Tue, 11 Jul 2023 13:17:20 +0200
Subject: [PATCH 1/3] adopt pydantic 2

---
 contrib/test_bench.py   |  2 +-
 setup.py                |  4 +-
 yente/data/common.py    | 88 ++++++++++++++++-------------------------
 yente/data/freebase.py  | 30 +++++++-------
 yente/data/manifest.py  |  2 +-
 yente/routers/admin.py  |  2 +-
 yente/routers/match.py  |  2 +-
 yente/routers/search.py |  4 +-
 yente/routers/util.py   |  2 +-
 yente/search/search.py  |  2 +
 yente/util.py           |  4 +-
 11 files changed, 62 insertions(+), 80 deletions(-)

diff --git a/contrib/test_bench.py b/contrib/test_bench.py
index 5c969b0b..3fa77d46 100644
--- a/contrib/test_bench.py
+++ b/contrib/test_bench.py
@@ -22,7 +22,7 @@
 
 async def test_example():
     ds = await get_dataset("default")
-    example = EntityExample.parse_obj(EXAMPLE)
+    example = EntityExample.model_validate(EXAMPLE)
     entity = Entity.from_example(example)
     query = entity_query(ds, entity)
     pprint(query)
diff --git a/setup.py b/setup.py
index 7705523c..ae544a9f 100644
--- a/setup.py
+++ b/setup.py
@@ -25,14 +25,14 @@
         "types-aiofiles==23.1.0.4",
         "aiohttp[speedups]==3.8.4",
         "elasticsearch[async]==8.8.0",
-        "fastapi==0.99.1",
+        "fastapi==0.100.0",
         "uvicorn[standard]==0.22.0",
         "python-multipart==0.0.6",
         "email-validator==2.0.0.post2",
         "structlog==23.1.0",
         "pyicu==2.11",
         "jellyfish==1.0.0",
-        "orjson==3.9.1",
+        "orjson==3.9.2",
         "text-unidecode==1.3",
         "click==8.0.4",
         "normality==2.4.0",
diff --git a/yente/data/common.py b/yente/data/common.py
index fc285cbe..d3461ffd 100644
--- a/yente/data/common.py
+++ b/yente/data/common.py
@@ -10,38 +10,27 @@
 
 
 class ErrorResponse(BaseModel):
-    detail: str = Field(..., example="Detailed error message")
+    detail: str = Field(..., examples=["Detailed error message"])
 
 
 class EntityResponse(BaseModel):
-    id: str = Field(..., example="NK-A7z....")
-    caption: str = Field(..., example="John Doe")
-    schema_: str = Field(..., example="LegalEntity", alias="schema")
-    properties: EntityProperties = Field(..., example={"name": ["John Doe"]})
-    datasets: List[str] = Field([], example=["us_ofac_sdn"])
-    referents: List[str] = Field([], example=["ofac-1234"])
+    id: str = Field(..., examples=["NK-A7z...."])
+    caption: str = Field(..., examples=["John Doe"])
+    schema_: str = Field(..., examples=["LegalEntity"], alias="schema")
+    properties: EntityProperties = Field(..., examples=[{"name": ["John Doe"]}])
+    datasets: List[str] = Field([], examples=[["us_ofac_sdn"]])
+    referents: List[str] = Field([], examples=[["ofac-1234"]])
     target: bool = Field(False)
-    first_seen: Optional[datetime] = Field(..., example=datetime.utcnow())
-    last_seen: Optional[datetime] = Field(..., example=datetime.utcnow())
-    last_change: Optional[datetime] = Field(..., example=datetime.utcnow())
+    first_seen: Optional[datetime] = Field(..., examples=[datetime.utcnow()])
+    last_seen: Optional[datetime] = Field(..., examples=[datetime.utcnow()])
+    last_change: Optional[datetime] = Field(..., examples=[datetime.utcnow()])
 
     @classmethod
     def from_entity(cls, entity: Entity) -> "EntityResponse":
-        return cls.construct(
-            id=entity.id,
-            caption=entity._caption,
-            schema=entity.schema.name,
-            properties=dict(entity.properties),
-            datasets=list(entity.datasets),
-            referents=list(entity.referents),
-            target=entity.target,
-            first_seen=entity.first_seen,
-            last_seen=entity.last_seen,
-            last_change=entity.last_change,
-        )
+        return cls.model_validate(entity.to_dict())
 
 
-EntityResponse.update_forward_refs()
+EntityResponse.model_rebuild()
 
 
 class ScoredEntityResponse(EntityResponse):
@@ -53,21 +42,10 @@ class ScoredEntityResponse(EntityResponse):
     def from_entity_result(
         cls, entity: Entity, result: MatchingResult, threshold: float
     ) -> "ScoredEntityResponse":
-        return cls.construct(
-            id=entity.id,
-            caption=entity._caption,
-            schema=entity.schema.name,
-            properties=entity.properties,
-            datasets=list(entity.datasets),
-            referents=list(entity.referents),
-            target=entity.target,
-            first_seen=entity.first_seen,
-            last_seen=entity.last_seen,
-            last_change=entity.last_change,
-            score=result["score"],
-            match=result["score"] >= threshold,
-            features=result["features"],
-        )
+        data = entity.to_dict()
+        data.update(result)
+        data["match"] = result["score"] >= threshold
+        return cls.model_validate(data)
 
 
 class StatusResponse(BaseModel):
@@ -75,24 +53,24 @@ class StatusResponse(BaseModel):
 
 
 class SearchFacetItem(BaseModel):
-    name: str = Field(..., example="ru")
-    label: str = Field(..., example="Russia")
-    count: int = Field(1, example=42)
+    name: str = Field(..., examples=["ru"])
+    label: str = Field(..., examples=["Russia"])
+    count: int = Field(1, examples=[42])
 
 
 class SearchFacet(BaseModel):
-    label: str = Field(..., example="Countries")
+    label: str = Field(..., examples=["Countries"])
     values: List[SearchFacetItem]
 
 
 class TotalSpec(BaseModel):
-    value: int = Field(..., example=42)
-    relation: str = Field("eq", example="eq")
+    value: int = Field(..., examples=[42])
+    relation: str = Field("eq", examples=["eq"])
 
 
 class ResultsResponse(BaseModel):
-    limit: int = Field(..., example=20)
-    offset: int = Field(0, example=0)
+    limit: int = Field(..., examples=[20])
+    offset: int = Field(0, examples=[0])
     total: TotalSpec
 
 
@@ -102,10 +80,10 @@ class SearchResponse(ResultsResponse):
 
 
 class EntityExample(BaseModel):
-    id: Optional[str] = Field(None, example="my-entity-id")
-    schema_: str = Field(..., example=settings.BASE_SCHEMA, alias="schema")
+    id: Optional[str] = Field(None, examples=["my-entity-id"])
+    schema_: str = Field(..., examples=[settings.BASE_SCHEMA], alias="schema")
     properties: Dict[str, Union[str, List[str]]] = Field(
-        ..., example={"name": ["John Doe"]}
+        ..., examples=[{"name": ["John Doe"]}]
     )
 
 
@@ -114,7 +92,7 @@ class EntityMatchQuery(BaseModel):
 
 
 class EntityMatches(BaseModel):
-    status: int = Field(200, example=200)
+    status: int = Field(200, examples=[200])
     results: List[ScoredEntityResponse]
     total: TotalSpec
     query: EntityExample
@@ -123,16 +101,16 @@ class EntityMatches(BaseModel):
 class EntityMatchResponse(BaseModel):
     responses: Dict[str, EntityMatches]
     matcher: FeatureDocs
-    limit: int = Field(..., example=5)
+    limit: int = Field(..., examples=[5])
 
 
 class DatasetModel(BaseModel):
     name: str
     title: str
-    summary: Optional[str]
-    url: Optional[str]
+    summary: Optional[str] = None
+    url: Optional[str] = None
     load: bool
-    entities_url: Optional[str]
+    entities_url: Optional[str] = None
     version: str
     children: List[str]
 
@@ -143,7 +121,7 @@ class DataCatalogModel(BaseModel):
 
 class Algorithm(BaseModel):
     name: str
-    description: Optional[str]
+    description: Optional[str] = None
     features: FeatureDocs
 
 
diff --git a/yente/data/freebase.py b/yente/data/freebase.py
index b6f8be43..4eedd1d8 100644
--- a/yente/data/freebase.py
+++ b/yente/data/freebase.py
@@ -11,9 +11,9 @@
 
 
 class FreebaseType(BaseModel):
-    id: str = Field(..., example="Person")
-    name: str = Field(..., example="People")
-    description: Optional[str] = Field(None, example="...")
+    id: str = Field(..., examples=["Person"])
+    name: str = Field(..., examples=["People"])
+    description: Optional[str] = None
 
     @classmethod
     def from_schema(cls, schema: Schema) -> "FreebaseType":
@@ -22,9 +22,9 @@ def from_schema(cls, schema: Schema) -> "FreebaseType":
 
 
 class FreebaseProperty(BaseModel):
-    id: str = Field(..., example="birthDate")
-    name: str = Field(..., example="Date of birth")
-    description: Optional[str] = Field(None, example="...")
+    id: str = Field(..., examples=["birthDate"])
+    name: str = Field(..., examples=["Date of birth"])
+    description: Optional[str] = None
 
     @classmethod
     def from_prop(cls, prop: Property) -> "FreebaseProperty":
@@ -32,9 +32,9 @@ def from_prop(cls, prop: Property) -> "FreebaseProperty":
 
 
 class FreebaseEntity(BaseModel):
-    id: str = Field(..., example="NK-A7z....")
-    name: str = Field(..., example="John Doe")
-    description: Optional[str] = Field(None, example="...")
+    id: str = Field(..., examples=["NK-A7z...."])
+    name: str = Field(..., examples=["John Doe"])
+    description: Optional[str] = None
     type: List[FreebaseType]
 
     @classmethod
@@ -49,8 +49,8 @@ def from_proxy(cls, proxy: EntityProxy) -> "FreebaseEntity":
 
 
 class FreebaseScoredEntity(FreebaseEntity):
-    score: Optional[float] = Field(..., example=0.99)
-    match: Optional[bool] = Field(..., example=False)
+    score: Optional[float] = Field(..., examples=[0.99])
+    match: Optional[bool] = Field(..., examples=[False])
 
     @classmethod
     def from_scored(cls, data: ScoredEntityResponse) -> "FreebaseScoredEntity":
@@ -89,11 +89,11 @@ class FreebasePropertySuggestResponse(FreebaseSuggestResponse):
 
 
 class FreebaseManifestView(BaseModel):
-    url: str
+    url: AnyHttpUrl
 
 
 class FreebaseManifestPreview(BaseModel):
-    url: str
+    url: AnyHttpUrl
     width: int
     height: int
 
@@ -110,8 +110,8 @@ class FreebaseManifestSuggest(BaseModel):
 
 
 class FreebaseManifest(BaseModel):
-    versions: List[str] = Field(..., example=["0.2"])
-    name: str = Field(..., example=settings.TITLE)
+    versions: List[str] = Field(..., examples=[["0.2"]])
+    name: str = Field(..., examples=[settings.TITLE])
     identifierSpace: AnyHttpUrl
     schemaSpace: AnyHttpUrl
     view: FreebaseManifestView
diff --git a/yente/data/manifest.py b/yente/data/manifest.py
index 6192c542..3639a1af 100644
--- a/yente/data/manifest.py
+++ b/yente/data/manifest.py
@@ -42,7 +42,7 @@ class Manifest(BaseModel):
     @classmethod
     async def load(cls) -> "Manifest":
         data = await load_yaml_url(settings.MANIFEST)
-        manifest = cls.parse_obj(data)
+        manifest = cls.model_validate(data)
         for catalog in manifest.catalogs:
             await catalog.fetch(manifest)
         # TODO: load remote metadata from a `metadata_url` on each dataset?
diff --git a/yente/routers/admin.py b/yente/routers/admin.py
index a61698a5..401c7470 100644
--- a/yente/routers/admin.py
+++ b/yente/routers/admin.py
@@ -89,7 +89,7 @@ async def catalog() -> DataCatalogModel:
     data sources are included, and how often they should be loaded.
     """
     catalog = await get_catalog()
-    return DataCatalogModel.parse_obj(catalog.to_dict())
+    return DataCatalogModel.model_validate(catalog.to_dict())
 
 
 @router.get(
diff --git a/yente/routers/match.py b/yente/routers/match.py
index 0a29b9c7..d8a8335c 100644
--- a/yente/routers/match.py
+++ b/yente/routers/match.py
@@ -158,7 +158,7 @@ async def match(
             status=200,
             results=scored,
             total=total,
-            query=EntityExample.parse_obj(entity.to_dict()),
+            query=EntityExample.model_validate(entity.to_dict()),
         )
     response.headers["x-batch-size"] = str(len(responses))
     return EntityMatchResponse(
diff --git a/yente/routers/search.py b/yente/routers/search.py
index 9ff1ffe5..b061abd0 100644
--- a/yente/routers/search.py
+++ b/yente/routers/search.py
@@ -115,7 +115,9 @@ async def search(
 )
 async def fetch_entity(
     response: Response,
-    entity_id: str = Path(description="ID of the entity to retrieve", example="Q7747"),
+    entity_id: str = Path(
+        description="ID of the entity to retrieve", examples=["Q7747"]
+    ),
     nested: bool = Query(
         True,
         title="Include adjacent entities (e.g. addresses, family, subsidiaries) in response",
diff --git a/yente/routers/util.py b/yente/routers/util.py
index 0c43783f..130281e3 100644
--- a/yente/routers/util.py
+++ b/yente/routers/util.py
@@ -7,7 +7,7 @@
 
 PATH_DATASET = Path(
     description="Data source or collection name to be queries",
-    example="default",
+    examples=["default"],
 )
 QUERY_PREFIX = Query("", min_length=1, description="Search prefix")
 
diff --git a/yente/search/search.py b/yente/search/search.py
index b29f7f85..f6469a5d 100644
--- a/yente/search/search.py
+++ b/yente/search/search.py
@@ -54,6 +54,8 @@ def result_facets(
         buckets: List[Dict[str, Any]] = agg.get("buckets", [])
         for bucket in buckets:
             key: Optional[str] = bucket.get("key")
+            if key is not None:
+                key = str(key)
             count: Optional[int] = bucket.get("doc_count")
             if key is None or count is None:
                 continue
diff --git a/yente/util.py b/yente/util.py
index 05326b40..74da988f 100644
--- a/yente/util.py
+++ b/yente/util.py
@@ -1,6 +1,6 @@
 from typing import Any, Optional, Tuple, cast
 from pydantic import AnyHttpUrl
-from pydantic.tools import parse_obj_as
+from pydantic.type_adapter import TypeAdapter
 
 from yente import settings
 
@@ -11,7 +11,7 @@ def __init__(self, canonical_id: str) -> None:
 
 
 def typed_url(url: Any) -> AnyHttpUrl:
-    return cast(AnyHttpUrl, parse_obj_as(AnyHttpUrl, url))
+    return TypeAdapter(AnyHttpUrl).validate_python(url)
 
 
 def match_prefix(prefix: str, *labels: Optional[str]) -> bool:

From 282e8a7e9dcc1d1c8295f7ac8d8a9249d0fbc5df Mon Sep 17 00:00:00 2001
From: Friedrich Lindenberg <friedrich@pudo.org>
Date: Tue, 11 Jul 2023 13:19:14 +0200
Subject: [PATCH 2/3] compat work in nomenklatura for pydantic 2

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index ae544a9f..4d687c96 100644
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,7 @@
     namespace_packages=[],
     install_requires=[
         "followthemoney==3.4.3",
-        "nomenklatura==3.2.0",
+        "nomenklatura==3.2.1",
         "asyncstdlib==3.10.8",
         "aiocron==1.8",
         "aiocsv==1.2.4",

From 327cc7cf4a0a29f2a6712e8440382512dbdc0842 Mon Sep 17 00:00:00 2001
From: Friedrich Lindenberg <friedrich@pudo.org>
Date: Thu, 13 Jul 2023 21:21:03 +0200
Subject: [PATCH 3/3] simplify kubernetes example

---
 kubernetes.example.yml | 58 +++++-------------------------------------
 1 file changed, 7 insertions(+), 51 deletions(-)

diff --git a/kubernetes.example.yml b/kubernetes.example.yml
index b7627e89..206011a8 100644
--- a/kubernetes.example.yml
+++ b/kubernetes.example.yml
@@ -1,19 +1,4 @@
 ---
-apiVersion: cert-manager.io/v1
-kind: ClusterIssuer
-metadata:
-  name: letsencrypt-prod
-spec:
-  acme:
-    server: https://acme-v02.api.letsencrypt.org/directory
-    email: info@opensanctions.org
-    privateKeySecretRef:
-      name: letsencrypt-prod
-    solvers:
-      - http01:
-          ingress:
-            class: nginx
----
 apiVersion: v1
 kind: Service
 metadata:
@@ -28,37 +13,6 @@ spec:
       targetPort: 8000
       name: http
 ---
-# Supposes you have an ingress, and ideally cert-manager installed on your
-# cluster. You should also consider running the service internally to the
-# cluster without exposing it on an ingress.
-apiVersion: networking.k8s.io/v1
-kind: Ingress
-metadata:
-  name: yente-ingress
-  annotations:
-    # you need to configure letsencrypt for your cluster:
-    # cert-manager.io/cluster-issuer: letsencrypt-prod
-    acme.cert-manager.io/http01-edit-in-place: "true"
-  labels:
-    app: opensanctions
-spec:
-  ingressClassName: nginx
-  tls:
-    - hosts:
-        - api.opensanctions.org
-      secretName: tls-api.opensanctions.org
-  rules:
-    - host: api.opensanctions.org
-      http:
-        paths:
-          - path: /
-            pathType: Prefix
-            backend:
-              service:
-                name: yente
-                port:
-                  number: 8000
----
 apiVersion: v1
 kind: ConfigMap
 metadata:
@@ -67,7 +21,7 @@ data:
   manifest.yml: |
     catalogs:
       - url: "https://data.opensanctions.org/datasets/latest/index.json"
-        scope: all
+        scope: default
         resource_name: entities.ftm.json
       # - url: "https://data.opensanctions.org/graph/catalog.json"
       #   resource_name: entities.ftm.json
@@ -107,10 +61,10 @@ spec:
               name: http
           resources:
             requests:
-              memory: 300Mi
+              memory: 600Mi
               cpu: 200m
             limits:
-              memory: 300Mi
+              memory: 600Mi
               cpu: 200m
           securityContext:
             readOnlyRootFilesystem: true
@@ -122,6 +76,8 @@ spec:
             - mountPath: /tmp
               name: tmp-volume
           env:
+            - name: YENTE_PORT
+              value: 8000
             - name: YENTE_TITLE
               value: "OpenSanctions API"
             - name: YENTE_LOG_JSON
@@ -205,10 +161,10 @@ spec:
                 - reindex
               resources:
                 requests:
-                  memory: 300M
+                  memory: 600M
                   cpu: 400m
                 limits:
-                  memory: 300M
+                  memory: 600M
                   cpu: 400m
               securityContext:
                 readOnlyRootFilesystem: true