From e5c3997a9059e3196d31afcd82c8d23b5956f69a Mon Sep 17 00:00:00 2001 From: Friedrich Lindenberg Date: Tue, 11 Jul 2023 13:17:20 +0200 Subject: [PATCH 1/3] adopt pydantic 2 --- contrib/test_bench.py | 2 +- setup.py | 4 +- yente/data/common.py | 88 ++++++++++++++++------------------------- yente/data/freebase.py | 30 +++++++------- yente/data/manifest.py | 2 +- yente/routers/admin.py | 2 +- yente/routers/match.py | 2 +- yente/routers/search.py | 4 +- yente/routers/util.py | 2 +- yente/search/search.py | 2 + yente/util.py | 4 +- 11 files changed, 62 insertions(+), 80 deletions(-) diff --git a/contrib/test_bench.py b/contrib/test_bench.py index 5c969b0b..3fa77d46 100644 --- a/contrib/test_bench.py +++ b/contrib/test_bench.py @@ -22,7 +22,7 @@ async def test_example(): ds = await get_dataset("default") - example = EntityExample.parse_obj(EXAMPLE) + example = EntityExample.model_validate(EXAMPLE) entity = Entity.from_example(example) query = entity_query(ds, entity) pprint(query) diff --git a/setup.py b/setup.py index 7705523c..ae544a9f 100644 --- a/setup.py +++ b/setup.py @@ -25,14 +25,14 @@ "types-aiofiles==23.1.0.4", "aiohttp[speedups]==3.8.4", "elasticsearch[async]==8.8.0", - "fastapi==0.99.1", + "fastapi==0.100.0", "uvicorn[standard]==0.22.0", "python-multipart==0.0.6", "email-validator==2.0.0.post2", "structlog==23.1.0", "pyicu==2.11", "jellyfish==1.0.0", - "orjson==3.9.1", + "orjson==3.9.2", "text-unidecode==1.3", "click==8.0.4", "normality==2.4.0", diff --git a/yente/data/common.py b/yente/data/common.py index fc285cbe..d3461ffd 100644 --- a/yente/data/common.py +++ b/yente/data/common.py @@ -10,38 +10,27 @@ class ErrorResponse(BaseModel): - detail: str = Field(..., example="Detailed error message") + detail: str = Field(..., examples=["Detailed error message"]) class EntityResponse(BaseModel): - id: str = Field(..., example="NK-A7z....") - caption: str = Field(..., example="John Doe") - schema_: str = Field(..., example="LegalEntity", alias="schema") - properties: EntityProperties = Field(..., example={"name": ["John Doe"]}) - datasets: List[str] = Field([], example=["us_ofac_sdn"]) - referents: List[str] = Field([], example=["ofac-1234"]) + id: str = Field(..., examples=["NK-A7z...."]) + caption: str = Field(..., examples=["John Doe"]) + schema_: str = Field(..., examples=["LegalEntity"], alias="schema") + properties: EntityProperties = Field(..., examples=[{"name": ["John Doe"]}]) + datasets: List[str] = Field([], examples=[["us_ofac_sdn"]]) + referents: List[str] = Field([], examples=[["ofac-1234"]]) target: bool = Field(False) - first_seen: Optional[datetime] = Field(..., example=datetime.utcnow()) - last_seen: Optional[datetime] = Field(..., example=datetime.utcnow()) - last_change: Optional[datetime] = Field(..., example=datetime.utcnow()) + first_seen: Optional[datetime] = Field(..., examples=[datetime.utcnow()]) + last_seen: Optional[datetime] = Field(..., examples=[datetime.utcnow()]) + last_change: Optional[datetime] = Field(..., examples=[datetime.utcnow()]) @classmethod def from_entity(cls, entity: Entity) -> "EntityResponse": - return cls.construct( - id=entity.id, - caption=entity._caption, - schema=entity.schema.name, - properties=dict(entity.properties), - datasets=list(entity.datasets), - referents=list(entity.referents), - target=entity.target, - first_seen=entity.first_seen, - last_seen=entity.last_seen, - last_change=entity.last_change, - ) + return cls.model_validate(entity.to_dict()) -EntityResponse.update_forward_refs() +EntityResponse.model_rebuild() class ScoredEntityResponse(EntityResponse): @@ -53,21 +42,10 @@ class ScoredEntityResponse(EntityResponse): def from_entity_result( cls, entity: Entity, result: MatchingResult, threshold: float ) -> "ScoredEntityResponse": - return cls.construct( - id=entity.id, - caption=entity._caption, - schema=entity.schema.name, - properties=entity.properties, - datasets=list(entity.datasets), - referents=list(entity.referents), - target=entity.target, - first_seen=entity.first_seen, - last_seen=entity.last_seen, - last_change=entity.last_change, - score=result["score"], - match=result["score"] >= threshold, - features=result["features"], - ) + data = entity.to_dict() + data.update(result) + data["match"] = result["score"] >= threshold + return cls.model_validate(data) class StatusResponse(BaseModel): @@ -75,24 +53,24 @@ class StatusResponse(BaseModel): class SearchFacetItem(BaseModel): - name: str = Field(..., example="ru") - label: str = Field(..., example="Russia") - count: int = Field(1, example=42) + name: str = Field(..., examples=["ru"]) + label: str = Field(..., examples=["Russia"]) + count: int = Field(1, examples=[42]) class SearchFacet(BaseModel): - label: str = Field(..., example="Countries") + label: str = Field(..., examples=["Countries"]) values: List[SearchFacetItem] class TotalSpec(BaseModel): - value: int = Field(..., example=42) - relation: str = Field("eq", example="eq") + value: int = Field(..., examples=[42]) + relation: str = Field("eq", examples=["eq"]) class ResultsResponse(BaseModel): - limit: int = Field(..., example=20) - offset: int = Field(0, example=0) + limit: int = Field(..., examples=[20]) + offset: int = Field(0, examples=[0]) total: TotalSpec @@ -102,10 +80,10 @@ class SearchResponse(ResultsResponse): class EntityExample(BaseModel): - id: Optional[str] = Field(None, example="my-entity-id") - schema_: str = Field(..., example=settings.BASE_SCHEMA, alias="schema") + id: Optional[str] = Field(None, examples=["my-entity-id"]) + schema_: str = Field(..., examples=[settings.BASE_SCHEMA], alias="schema") properties: Dict[str, Union[str, List[str]]] = Field( - ..., example={"name": ["John Doe"]} + ..., examples=[{"name": ["John Doe"]}] ) @@ -114,7 +92,7 @@ class EntityMatchQuery(BaseModel): class EntityMatches(BaseModel): - status: int = Field(200, example=200) + status: int = Field(200, examples=[200]) results: List[ScoredEntityResponse] total: TotalSpec query: EntityExample @@ -123,16 +101,16 @@ class EntityMatches(BaseModel): class EntityMatchResponse(BaseModel): responses: Dict[str, EntityMatches] matcher: FeatureDocs - limit: int = Field(..., example=5) + limit: int = Field(..., examples=[5]) class DatasetModel(BaseModel): name: str title: str - summary: Optional[str] - url: Optional[str] + summary: Optional[str] = None + url: Optional[str] = None load: bool - entities_url: Optional[str] + entities_url: Optional[str] = None version: str children: List[str] @@ -143,7 +121,7 @@ class DataCatalogModel(BaseModel): class Algorithm(BaseModel): name: str - description: Optional[str] + description: Optional[str] = None features: FeatureDocs diff --git a/yente/data/freebase.py b/yente/data/freebase.py index b6f8be43..4eedd1d8 100644 --- a/yente/data/freebase.py +++ b/yente/data/freebase.py @@ -11,9 +11,9 @@ class FreebaseType(BaseModel): - id: str = Field(..., example="Person") - name: str = Field(..., example="People") - description: Optional[str] = Field(None, example="...") + id: str = Field(..., examples=["Person"]) + name: str = Field(..., examples=["People"]) + description: Optional[str] = None @classmethod def from_schema(cls, schema: Schema) -> "FreebaseType": @@ -22,9 +22,9 @@ def from_schema(cls, schema: Schema) -> "FreebaseType": class FreebaseProperty(BaseModel): - id: str = Field(..., example="birthDate") - name: str = Field(..., example="Date of birth") - description: Optional[str] = Field(None, example="...") + id: str = Field(..., examples=["birthDate"]) + name: str = Field(..., examples=["Date of birth"]) + description: Optional[str] = None @classmethod def from_prop(cls, prop: Property) -> "FreebaseProperty": @@ -32,9 +32,9 @@ def from_prop(cls, prop: Property) -> "FreebaseProperty": class FreebaseEntity(BaseModel): - id: str = Field(..., example="NK-A7z....") - name: str = Field(..., example="John Doe") - description: Optional[str] = Field(None, example="...") + id: str = Field(..., examples=["NK-A7z...."]) + name: str = Field(..., examples=["John Doe"]) + description: Optional[str] = None type: List[FreebaseType] @classmethod @@ -49,8 +49,8 @@ def from_proxy(cls, proxy: EntityProxy) -> "FreebaseEntity": class FreebaseScoredEntity(FreebaseEntity): - score: Optional[float] = Field(..., example=0.99) - match: Optional[bool] = Field(..., example=False) + score: Optional[float] = Field(..., examples=[0.99]) + match: Optional[bool] = Field(..., examples=[False]) @classmethod def from_scored(cls, data: ScoredEntityResponse) -> "FreebaseScoredEntity": @@ -89,11 +89,11 @@ class FreebasePropertySuggestResponse(FreebaseSuggestResponse): class FreebaseManifestView(BaseModel): - url: str + url: AnyHttpUrl class FreebaseManifestPreview(BaseModel): - url: str + url: AnyHttpUrl width: int height: int @@ -110,8 +110,8 @@ class FreebaseManifestSuggest(BaseModel): class FreebaseManifest(BaseModel): - versions: List[str] = Field(..., example=["0.2"]) - name: str = Field(..., example=settings.TITLE) + versions: List[str] = Field(..., examples=[["0.2"]]) + name: str = Field(..., examples=[settings.TITLE]) identifierSpace: AnyHttpUrl schemaSpace: AnyHttpUrl view: FreebaseManifestView diff --git a/yente/data/manifest.py b/yente/data/manifest.py index 6192c542..3639a1af 100644 --- a/yente/data/manifest.py +++ b/yente/data/manifest.py @@ -42,7 +42,7 @@ class Manifest(BaseModel): @classmethod async def load(cls) -> "Manifest": data = await load_yaml_url(settings.MANIFEST) - manifest = cls.parse_obj(data) + manifest = cls.model_validate(data) for catalog in manifest.catalogs: await catalog.fetch(manifest) # TODO: load remote metadata from a `metadata_url` on each dataset? diff --git a/yente/routers/admin.py b/yente/routers/admin.py index a61698a5..401c7470 100644 --- a/yente/routers/admin.py +++ b/yente/routers/admin.py @@ -89,7 +89,7 @@ async def catalog() -> DataCatalogModel: data sources are included, and how often they should be loaded. """ catalog = await get_catalog() - return DataCatalogModel.parse_obj(catalog.to_dict()) + return DataCatalogModel.model_validate(catalog.to_dict()) @router.get( diff --git a/yente/routers/match.py b/yente/routers/match.py index 0a29b9c7..d8a8335c 100644 --- a/yente/routers/match.py +++ b/yente/routers/match.py @@ -158,7 +158,7 @@ async def match( status=200, results=scored, total=total, - query=EntityExample.parse_obj(entity.to_dict()), + query=EntityExample.model_validate(entity.to_dict()), ) response.headers["x-batch-size"] = str(len(responses)) return EntityMatchResponse( diff --git a/yente/routers/search.py b/yente/routers/search.py index 9ff1ffe5..b061abd0 100644 --- a/yente/routers/search.py +++ b/yente/routers/search.py @@ -115,7 +115,9 @@ async def search( ) async def fetch_entity( response: Response, - entity_id: str = Path(description="ID of the entity to retrieve", example="Q7747"), + entity_id: str = Path( + description="ID of the entity to retrieve", examples=["Q7747"] + ), nested: bool = Query( True, title="Include adjacent entities (e.g. addresses, family, subsidiaries) in response", diff --git a/yente/routers/util.py b/yente/routers/util.py index 0c43783f..130281e3 100644 --- a/yente/routers/util.py +++ b/yente/routers/util.py @@ -7,7 +7,7 @@ PATH_DATASET = Path( description="Data source or collection name to be queries", - example="default", + examples=["default"], ) QUERY_PREFIX = Query("", min_length=1, description="Search prefix") diff --git a/yente/search/search.py b/yente/search/search.py index b29f7f85..f6469a5d 100644 --- a/yente/search/search.py +++ b/yente/search/search.py @@ -54,6 +54,8 @@ def result_facets( buckets: List[Dict[str, Any]] = agg.get("buckets", []) for bucket in buckets: key: Optional[str] = bucket.get("key") + if key is not None: + key = str(key) count: Optional[int] = bucket.get("doc_count") if key is None or count is None: continue diff --git a/yente/util.py b/yente/util.py index 05326b40..74da988f 100644 --- a/yente/util.py +++ b/yente/util.py @@ -1,6 +1,6 @@ from typing import Any, Optional, Tuple, cast from pydantic import AnyHttpUrl -from pydantic.tools import parse_obj_as +from pydantic.type_adapter import TypeAdapter from yente import settings @@ -11,7 +11,7 @@ def __init__(self, canonical_id: str) -> None: def typed_url(url: Any) -> AnyHttpUrl: - return cast(AnyHttpUrl, parse_obj_as(AnyHttpUrl, url)) + return TypeAdapter(AnyHttpUrl).validate_python(url) def match_prefix(prefix: str, *labels: Optional[str]) -> bool: From 282e8a7e9dcc1d1c8295f7ac8d8a9249d0fbc5df Mon Sep 17 00:00:00 2001 From: Friedrich Lindenberg Date: Tue, 11 Jul 2023 13:19:14 +0200 Subject: [PATCH 2/3] compat work in nomenklatura for pydantic 2 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ae544a9f..4d687c96 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ namespace_packages=[], install_requires=[ "followthemoney==3.4.3", - "nomenklatura==3.2.0", + "nomenklatura==3.2.1", "asyncstdlib==3.10.8", "aiocron==1.8", "aiocsv==1.2.4", From 327cc7cf4a0a29f2a6712e8440382512dbdc0842 Mon Sep 17 00:00:00 2001 From: Friedrich Lindenberg Date: Thu, 13 Jul 2023 21:21:03 +0200 Subject: [PATCH 3/3] simplify kubernetes example --- kubernetes.example.yml | 58 +++++------------------------------------- 1 file changed, 7 insertions(+), 51 deletions(-) diff --git a/kubernetes.example.yml b/kubernetes.example.yml index b7627e89..206011a8 100644 --- a/kubernetes.example.yml +++ b/kubernetes.example.yml @@ -1,19 +1,4 @@ --- -apiVersion: cert-manager.io/v1 -kind: ClusterIssuer -metadata: - name: letsencrypt-prod -spec: - acme: - server: https://acme-v02.api.letsencrypt.org/directory - email: info@opensanctions.org - privateKeySecretRef: - name: letsencrypt-prod - solvers: - - http01: - ingress: - class: nginx ---- apiVersion: v1 kind: Service metadata: @@ -28,37 +13,6 @@ spec: targetPort: 8000 name: http --- -# Supposes you have an ingress, and ideally cert-manager installed on your -# cluster. You should also consider running the service internally to the -# cluster without exposing it on an ingress. -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: yente-ingress - annotations: - # you need to configure letsencrypt for your cluster: - # cert-manager.io/cluster-issuer: letsencrypt-prod - acme.cert-manager.io/http01-edit-in-place: "true" - labels: - app: opensanctions -spec: - ingressClassName: nginx - tls: - - hosts: - - api.opensanctions.org - secretName: tls-api.opensanctions.org - rules: - - host: api.opensanctions.org - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: yente - port: - number: 8000 ---- apiVersion: v1 kind: ConfigMap metadata: @@ -67,7 +21,7 @@ data: manifest.yml: | catalogs: - url: "https://data.opensanctions.org/datasets/latest/index.json" - scope: all + scope: default resource_name: entities.ftm.json # - url: "https://data.opensanctions.org/graph/catalog.json" # resource_name: entities.ftm.json @@ -107,10 +61,10 @@ spec: name: http resources: requests: - memory: 300Mi + memory: 600Mi cpu: 200m limits: - memory: 300Mi + memory: 600Mi cpu: 200m securityContext: readOnlyRootFilesystem: true @@ -122,6 +76,8 @@ spec: - mountPath: /tmp name: tmp-volume env: + - name: YENTE_PORT + value: 8000 - name: YENTE_TITLE value: "OpenSanctions API" - name: YENTE_LOG_JSON @@ -205,10 +161,10 @@ spec: - reindex resources: requests: - memory: 300M + memory: 600M cpu: 400m limits: - memory: 300M + memory: 600M cpu: 400m securityContext: readOnlyRootFilesystem: true