Skip to content

Commit

Permalink
Include ftm version in index version prefix
Browse files Browse the repository at this point in the history
  • Loading branch information
pudo committed Aug 10, 2024
1 parent 9edbce7 commit ab7c7c7
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 14 deletions.
4 changes: 2 additions & 2 deletions yente/search/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
NAME_PHONETIC_FIELD,
)
from yente.provider import SearchProvider, with_provider
from yente.search.util import parse_index_name
from yente.search.util import construct_index_name
from yente.search.versions import parse_index_name
from yente.search.versions import construct_index_name
from yente.data.util import expand_dates, phonetic_names
from yente.data.util import index_name_parts, index_name_keys

Expand Down
2 changes: 1 addition & 1 deletion yente/search/status.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from yente import settings
from yente.logs import get_logger
from yente.provider import SearchProvider
from yente.search.util import parse_index_name
from yente.search.versions import parse_index_name
from yente.data.manifest import Catalog

log = get_logger(__name__)
Expand Down
32 changes: 22 additions & 10 deletions yente/search/util.py → yente/search/versions.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
from functools import cache
from typing import Tuple
from normality import slugify
import followthemoney

from yente import settings


@cache
def system_version() -> str:
"""Get the current version of the system."""
parts = [v.rjust(2, "0") for v in followthemoney.__version__.split(".")]
ftm_version = "".join(parts)[:6]
return f"{settings.INDEX_VERSION}{ftm_version}-"


def parse_index_name(index: str) -> Tuple[str, str]:
"""
Parse a given index name.
Expand All @@ -19,31 +29,33 @@ def parse_index_name(index: str) -> Tuple[str, str]:
if "-" not in index_end:
raise ValueError("Index name does not contain a version.")
dataset, index_version = index_end.split("-", 1)
if not index_version.startswith(settings.INDEX_VERSION):
sys_version = system_version()
if not index_version.startswith(sys_version):
raise ValueError("Index version does not start with the correct prefix.")
dataset_version = index_version[len(settings.INDEX_VERSION) :]
dataset_version = index_version[len(sys_version) :]
if len(dataset_version) < 1:
raise ValueError("Index version must be at least one character long.")
return (dataset, dataset_version)


def construct_index_name(ds_name: str, ds_version: str | None = None) -> str:
def construct_index_name(dataset: str, version: str | None = None) -> str:
"""
Given a dataset and optionally a version construct a properly versioned index name.
"""
if len(str(ds_name)) < 1:
if len(str(dataset)) < 1:
raise ValueError("Dataset name must be at least one character long.")
base = f"{settings.ENTITY_INDEX}-{ds_name}"
if ds_version is None:
base = f"{settings.ENTITY_INDEX}-{dataset}"
if version is None:
return base
return f"{base}-{construct_index_version(ds_version)}"
return f"{base}-{construct_index_version(version)}"


def construct_index_version(version: str) -> str:
"""Given a version ID, return a version string with the version prefix."""
if len(version) < 1:
raise ValueError("Version must be at least one character long.")
combined = slugify(f"{settings.INDEX_VERSION}{version}", "-")
if combined is None or len(combined) < len(settings.INDEX_VERSION) + 1:
raise ValueError("Invalid version: %s%s." % (settings.INDEX_VERSION, version))
sys_version = system_version()
combined = slugify(f"{sys_version}{version}", "-")
if combined is None or len(combined) < len(sys_version) + 1:
raise ValueError("Invalid version: %s%s." % (sys_version, version))
return combined
2 changes: 1 addition & 1 deletion yente/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def random_cron() -> str:
INDEX_SHARDS = int(env_legacy("YENTE_INDEX_SHARDS", "YENTE_ELASTICSEARCH_SHARDS", "1"))
INDEX_NAME = env_legacy("YENTE_INDEX_NAME", "YENTE_ELASTICSEARCH_INDEX", "yente")
ENTITY_INDEX = f"{INDEX_NAME}-entities"
INDEX_VERSION = env_str("YENTE_INDEX_VERSION", "009")
INDEX_VERSION = env_str("YENTE_INDEX_VERSION", "011")
assert len(INDEX_VERSION) == 3, "Index version must be 3 characters long."

# ElasticSearch-only options:
Expand Down

0 comments on commit ab7c7c7

Please sign in to comment.