From 34a8917ef6b16acdbbe62be667d36e07962550e4 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Thu, 11 Jul 2024 11:28:26 -0400 Subject: [PATCH] wip: initial work for updating vrs version --- .../source/normalizing_data/normalization.rst | 14 +- docs/source/normalizing_data/sources.rst | 6 +- pyproject.toml | 2 +- src/gene/query.py | 22 ++-- src/gene/schemas.py | 13 +- tests/conftest.py | 3 + tests/unit/test_ensembl_source.py | 5 - tests/unit/test_ncbi_source.py | 10 -- tests/unit/test_query.py | 120 +++++++----------- tests/unit/test_schemas.py | 2 +- 10 files changed, 74 insertions(+), 123 deletions(-) diff --git a/docs/source/normalizing_data/normalization.rst b/docs/source/normalizing_data/normalization.rst index 624fcd26..20c2250c 100644 --- a/docs/source/normalizing_data/normalization.rst +++ b/docs/source/normalizing_data/normalization.rst @@ -73,26 +73,23 @@ Normalized records are structured as `Genes `_ is a service prov "location_annotations": [], "locations": [ { - "id": "ga4gh:SL.uNBZoxhjhohl24VlIut-JxPJAGfJ7EQE", + "id": "ga4gh:SL.0nPwKHYNnTmJ06G-gSmz8BEhB_NTp-0B", + "digest": "0nPwKHYNnTmJ06G-gSmz8BEhB_NTp-0B", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", diff --git a/pyproject.toml b/pyproject.toml index 89c1b634..a167a189 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ dependencies = [ "uvicorn", "click", "boto3", - "ga4gh.vrs~=2.0.0a1", + # "ga4gh.vrs~=2.0.0a1", ] dynamic = ["version"] diff --git a/src/gene/query.py b/src/gene/query.py index 4b81d89d..93adafba 100644 --- a/src/gene/query.py +++ b/src/gene/query.py @@ -4,7 +4,7 @@ from datetime import datetime from typing import Any, Callable, Dict, List, Optional, Set, Tuple, TypeVar -from ga4gh.core import core_models, ga4gh_identify +from ga4gh.core import domain_models, entity_models, ga4gh_identify from ga4gh.vrs import models from gene import ITEM_TYPES, NAMESPACE_LOOKUP, PREFIX_LOOKUP @@ -420,7 +420,7 @@ def _add_gene( :param possible_concepts: List of other normalized concepts found :return: Response with core Gene """ - gene_obj = core_models.Gene( + gene_obj = domain_models.Gene( id=f"normalize.gene.{record['concept_id']}", label=record["symbol"], ) @@ -431,11 +431,11 @@ def _add_gene( for source_id in source_ids: system, code = source_id.split(":") mappings.append( - core_models.Mapping( - coding=core_models.Coding( - code=core_models.Code(code), system=system.lower() + entity_models.ConceptMapping( + coding=entity_models.Coding( + code=entity_models.Code(code), system=system.lower() ), - relation=core_models.Relation.RELATED_MATCH, + relation=entity_models.Relation.RELATED_MATCH, ) ) if mappings: @@ -450,7 +450,7 @@ def _add_gene( val = [val] aliases.update(val) if aliases: - gene_obj.aliases = list(aliases) + gene_obj.alternativeLabels = list(aliases) # extensions extensions = [] @@ -464,7 +464,7 @@ def _add_gene( for ext_label, record_label in extension_and_record_labels: if record_label in record and record[record_label]: extensions.append( - core_models.Extension(name=ext_label, value=record[record_label]) + entity_models.Extension(name=ext_label, value=record[record_label]) ) record_locations = {} @@ -485,7 +485,7 @@ def _add_gene( if transformed_locs: extensions.append( - core_models.Extension(name=loc_name, value=transformed_locs) + entity_models.Extension(name=loc_name, value=transformed_locs) ) # handle gene types separately because they're wonky @@ -493,7 +493,7 @@ def _add_gene( gene_type = record.get("gene_type") if gene_type: extensions.append( - core_models.Extension( + entity_models.Extension( name=GeneTypeFieldName[record["src_name"].upper()].value, value=gene_type, ) @@ -504,7 +504,7 @@ def _add_gene( values = record.get(field_name, []) for value in values: extensions.append( - core_models.Extension(name=field_name, value=value) + entity_models.Extension(name=field_name, value=value) ) if extensions: gene_obj.extensions = extensions diff --git a/src/gene/schemas.py b/src/gene/schemas.py index 6f85b1bc..d79afd7e 100644 --- a/src/gene/schemas.py +++ b/src/gene/schemas.py @@ -2,7 +2,7 @@ from enum import Enum, IntEnum from typing import Dict, List, Literal, Optional, Union -from ga4gh.core import core_models +from ga4gh.core import domain_models from ga4gh.vrs import models from pydantic import ( BaseModel, @@ -321,7 +321,7 @@ class NormalizeService(BaseNormalizationService): """Define model for returning normalized concept.""" normalized_id: Optional[str] = None - gene: Optional[core_models.Gene] = None + gene: Optional[domain_models.Gene] = None source_meta_: Dict[SourceName, SourceMeta] = {} model_config = ConfigDict( @@ -402,12 +402,10 @@ class NormalizeService(BaseNormalizationService): { "name": "approved_name", "value": "B-Raf proto-oncogene, serine/threonine kinase", - "type": "Extension", }, { "name": "symbol_status", "value": "approved", - "type": "Extension", }, # { # "name": "chromosome_location", @@ -419,7 +417,6 @@ class NormalizeService(BaseNormalizationService): # "end": "q34", # "start": "q34", # }, - # "type": "Extension" # } ], }, @@ -578,7 +575,8 @@ class UnmergedNormalizationService(BaseNormalizationService): "location_annotations": [], "locations": [ { - "id": "ga4gh:SL.dnydHb2Bnv5pwXjI4MpJmrZUADf5QLe1", # noqa: E501 + "id": "ga4gh:SL.4taOKYezIxUvFozs6c6OC0bJAQ2zwjxu", # noqa: E501 + "digest": "4taOKYezIxUvFozs6c6OC0bJAQ2zwjxu", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -630,7 +628,8 @@ class UnmergedNormalizationService(BaseNormalizationService): # "end": "q22.1" }, { - "id": "ga4gh:SL.U7vPSlX8eyCKdFSiROIsc9om0Y7pCm2g", # noqa: E501 + "id": "ga4gh:SL.OWr9DoyBhr2zpf4uLLcZSvsTSIDElU6R", # noqa: E501 + "digest": "OWr9DoyBhr2zpf4uLLcZSvsTSIDElU6R", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", diff --git a/tests/conftest.py b/tests/conftest.py index ad1a14a2..c9f8f204 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -45,6 +45,9 @@ def _compare_records(normalized_gene, test_gene, match_type): assert normalized_gene.symbol == test_gene.symbol assert len(normalized_gene.locations) == len(test_gene.locations) for loc in normalized_gene.locations: + assert loc.id.split("ga4gh:SL.")[-1] == loc.digest + loc.id = None + loc.digest = None assert loc in test_gene.locations assert set(normalized_gene.location_annotations) == set( test_gene.location_annotations diff --git a/tests/unit/test_ensembl_source.py b/tests/unit/test_ensembl_source.py index 7660be3e..c4c8d2fa 100644 --- a/tests/unit/test_ensembl_source.py +++ b/tests/unit/test_ensembl_source.py @@ -36,7 +36,6 @@ def ddx11l1(): "location_annotations": [], "locations": [ { - "id": "ga4gh:SL.Ihi0T86UoFIEbH0DHttX2nIw_BdOkI5L", "end": 14409, "start": 11868, "sequenceReference": { @@ -68,7 +67,6 @@ def tp53(): "location_annotations": [], "locations": [ { - "id": "ga4gh:SL.TlGoA-JmP3Xky3RhJ6_UU3eJKq8EpEp9", "end": 7687538, "start": 7661778, "sequenceReference": { @@ -100,7 +98,6 @@ def ATP6AP1_DT(): # noqa: N802 "location_annotations": [], "locations": [ { - "id": "ga4gh:SL.bPbeeEGSqjlZJ1Ddmg5T9ptJz9tKxYi3", "end": 154428526, "start": 154424377, "sequenceReference": { @@ -132,7 +129,6 @@ def hsa_mir_1253(): "location_annotations": [], "locations": [ { - "id": "ga4gh:SL.x4kOE6ZXG-xY7nm6bu2W7lvm6ljaJXzR", "end": 2748182, "start": 2748077, "sequenceReference": { @@ -164,7 +160,6 @@ def spry3(): "location_annotations": [], "locations": [ { - "id": "ga4gh:SL.fxU7Axal2_GbyOfW8NQf0plM-SUWFCB0", "end": 155782459, "start": 155612571, "sequenceReference": { diff --git a/tests/unit/test_ncbi_source.py b/tests/unit/test_ncbi_source.py index d0083a43..9b245c5f 100644 --- a/tests/unit/test_ncbi_source.py +++ b/tests/unit/test_ncbi_source.py @@ -66,7 +66,6 @@ def dpf1(): # "type": "ChromosomeLocation" # }, { - "id": "ga4gh:SL.0bmpLh_dlBRrzfviiQY9Vg4iEH0XeR20", "end": 38229695, "start": 38211005, "sequenceReference": { @@ -106,7 +105,6 @@ def pdp1_symbol(): # "type": "ChromosomeLocation" # }, { - "id": "ga4gh:SL.-455M-S51D8nXPFoGH0dYNFVFAJxm5dG", "end": 93926068, "start": 93916922, "sequenceReference": { @@ -146,7 +144,6 @@ def pdp1_alias(): # "type": "ChromosomeLocation" # }, { - "id": "ga4gh:SL.VI_0P0-ei90MDsLjAeUrDfeXBlZVJtJY", "end": 4665258, "start": 4662293, "sequenceReference": { @@ -195,7 +192,6 @@ def spry3(): # "type": "ChromosomeLocation" # }, { - "id": "ga4gh:SL.2N5aguRIvBdGemRgABZFutmLTV925dsV", "end": 155782459, "start": 155612585, "sequenceReference": { @@ -205,7 +201,6 @@ def spry3(): "type": "SequenceLocation", }, { - "id": "ga4gh:SL.U9E9WtQdzFc4elR3t1qw48nueHgfWFWL", "end": 56968979, "start": 56954315, "sequenceReference": { @@ -290,7 +285,6 @@ def znf84(): # "type": "ChromosomeLocation" # }, { - "id": "ga4gh:SL.IRsls9vud2-CiA7Jq4L3ry2VVK7LoNud", "end": 133063299, "start": 133037508, "sequenceReference": { @@ -339,7 +333,6 @@ def slc25a6(): # "end": "p11.2" # }, { - "id": "ga4gh:SL.dvD-ZopQGZkVWx4Z-vFpP9ateicPHgQ6", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -349,7 +342,6 @@ def slc25a6(): "end": 1392113, }, { - "id": "ga4gh:SL.bv3LobZZ-sERq5cIthyS4w_tmSwV2QSg", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -484,7 +476,6 @@ def prkrap1(): # "type": "ChromosomeLocation" # }, { - "id": "ga4gh:SL.LwWy5JYncZVnOM9hWiLWW_z0n2eY-peb", "end": 3941874, "start": 3940269, "sequenceReference": { @@ -494,7 +485,6 @@ def prkrap1(): "type": "SequenceLocation", }, { - "id": "ga4gh:SL.q36ql_fX4HrZy_G2EXX_SGWl-7X5Bq6c", "end": 3932085, "start": 3930480, "sequenceReference": { diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index f767ced1..cedcb29f 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -1,6 +1,6 @@ """Module to test the query module.""" import pytest -from ga4gh.core import core_models +from ga4gh.core import domain_models from gene.query import InvalidParameterException, QueryHandler from gene.schemas import BaseGene, MatchType, SourceName @@ -87,15 +87,14 @@ def normalized_ache(): "relation": "relatedMatch", }, ], - "aliases": ["3.1.1.7", "YT", "N-ACHE", "ARACHE", "ACEE"], + "alternativeLabels": ["3.1.1.7", "YT", "N-ACHE", "ARACHE", "ACEE"], "extensions": [ - {"name": "previous_symbols", "value": ["ACEE", "YT"], "type": "Extension"}, + {"name": "previous_symbols", "value": ["ACEE", "YT"]}, { "name": "approved_name", "value": "acetylcholinesterase (Cartwright blood group)", - "type": "Extension", }, - {"name": "symbol_status", "value": "approved", "type": "Extension"}, + {"name": "symbol_status", "value": "approved"}, { "name": "ncbi_locations", "value": [ @@ -108,7 +107,6 @@ def normalized_ache(): # "start": "q22.1" # }, { - "id": "ga4gh:SL.U7vPSlX8eyCKdFSiROIsc9om0Y7pCm2g", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -118,7 +116,6 @@ def normalized_ache(): "end": 100896994, } ], - "type": "Extension", }, # { # "name": "hgnc_locations", @@ -132,13 +129,11 @@ def normalized_ache(): # "end": "q22.1" # } # ], - # "type": "Extension" # }, { "name": "ensembl_locations", "value": [ { - "id": "ga4gh:SL.dnydHb2Bnv5pwXjI4MpJmrZUADf5QLe1", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -148,19 +143,17 @@ def normalized_ache(): "end": 100896974, } ], - "type": "Extension", }, - {"name": "ncbi_gene_type", "type": "Extension", "value": "protein-coding"}, + {"name": "ncbi_gene_type", "value": "protein-coding"}, { "name": "hgnc_locus_type", - "type": "Extension", "value": "gene with protein product", }, - {"name": "ensembl_biotype", "type": "Extension", "value": "protein_coding"}, - {"name": "strand", "type": "Extension", "value": "-"}, + {"name": "ensembl_biotype", "value": "protein_coding"}, + {"name": "strand", "value": "-"}, ], } - return core_models.Gene(**params) + return domain_models.Gene(**params) @pytest.fixture(scope="module") @@ -240,12 +233,11 @@ def normalized_braf(): "relation": "relatedMatch", }, ], - "aliases": ["BRAF1", "BRAF-1", "RAFB1", "NS7", "B-RAF1", "B-raf"], + "alternativeLabels": ["BRAF1", "BRAF-1", "RAFB1", "NS7", "B-RAF1", "B-raf"], "extensions": [ { "name": "approved_name", "value": "B-Raf proto-oncogene, serine/threonine kinase", - "type": "Extension", }, # { # "name": "hgnc_locations", @@ -259,13 +251,11 @@ def normalized_braf(): # "start": "q34", # } # ], - # "type": "Extension" # }, { "name": "ensembl_locations", "value": [ { - "id": "ga4gh:SL.WJ0hsPzXuK54mQyVysTqUNV5jaCATnRf", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -275,7 +265,6 @@ def normalized_braf(): "end": 140924929, } ], - "type": "Extension", }, { "name": "ncbi_locations", @@ -289,7 +278,6 @@ def normalized_braf(): # "end": "q34" # }, { - "id": "ga4gh:SL.uNBZoxhjhohl24VlIut-JxPJAGfJ7EQE", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -299,20 +287,18 @@ def normalized_braf(): "end": 140924929, } ], - "type": "Extension", }, - {"name": "ncbi_gene_type", "type": "Extension", "value": "protein-coding"}, + {"name": "ncbi_gene_type", "value": "protein-coding"}, { "name": "hgnc_locus_type", - "type": "Extension", "value": "gene with protein product", }, - {"name": "ensembl_biotype", "type": "Extension", "value": "protein_coding"}, - {"name": "strand", "type": "Extension", "value": "-"}, - {"name": "symbol_status", "type": "Extension", "value": "approved"}, + {"name": "ensembl_biotype", "value": "protein_coding"}, + {"name": "strand", "value": "-"}, + {"name": "symbol_status", "value": "approved"}, ], } - return core_models.Gene(**params) + return domain_models.Gene(**params) @pytest.fixture(scope="module") @@ -384,7 +370,7 @@ def normalized_abl1(): "relation": "relatedMatch", }, ], - "aliases": [ + "alternativeLabels": [ "c-ABL", "JTK7", "p150", @@ -401,12 +387,10 @@ def normalized_abl1(): { "name": "previous_symbols", "value": ["LOC116063", "LOC112779", "ABL"], - "type": "Extension", }, { "name": "approved_name", "value": "ABL proto-oncogene 1, non-receptor tyrosine kinase", - "type": "Extension", }, # { # "name": "hgnc_locations", @@ -420,7 +404,6 @@ def normalized_abl1(): # "start": "q34.12" # } # ], - # "type": "Extension" # }, { "name": "ncbi_locations", @@ -434,7 +417,6 @@ def normalized_abl1(): # "end": "q34.12" # }, { - "id": "ga4gh:SL.F1QUtInXQaBEjAJNR1sYHXdp0XC000Qi", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -444,13 +426,11 @@ def normalized_abl1(): "end": 130887675, } ], - "type": "Extension", }, { "name": "ensembl_locations", "value": [ { - "id": "ga4gh:SL.P9Qu87GYxoWPYh1BdAQC5bTLorjvvye7", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -460,20 +440,18 @@ def normalized_abl1(): "end": 130887675, } ], - "type": "Extension", }, - {"name": "ncbi_gene_type", "type": "Extension", "value": "protein-coding"}, + {"name": "ncbi_gene_type", "value": "protein-coding"}, { "name": "hgnc_locus_type", - "type": "Extension", "value": "gene with protein product", }, - {"name": "ensembl_biotype", "type": "Extension", "value": "protein_coding"}, - {"name": "strand", "type": "Extension", "value": "+"}, - {"name": "symbol_status", "type": "Extension", "value": "approved"}, + {"name": "ensembl_biotype", "value": "protein_coding"}, + {"name": "strand", "value": "+"}, + {"name": "symbol_status", "value": "approved"}, ], } - return core_models.Gene(**params) + return domain_models.Gene(**params) @pytest.fixture(scope="module") @@ -525,7 +503,7 @@ def normalized_p150(): "relation": "relatedMatch", }, ], - "aliases": [ + "alternativeLabels": [ "CAF1P150", "MGC71229", "CAF-1", @@ -538,7 +516,6 @@ def normalized_p150(): { "name": "approved_name", "value": "chromatin assembly factor 1 subunit A", - "type": "Extension", }, # { # "name": "hgnc_locations", @@ -552,13 +529,11 @@ def normalized_p150(): # "start": "p13.3" # } # ], - # "type": "Extension" # }, { "name": "ensembl_locations", "value": [ { - "id": "ga4gh:SL.tLUFh2LAYq-bsMi0Vob_TIWrz-sE4HgE", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -581,7 +556,6 @@ def normalized_p150(): # "end": "p13.3" # }, { - "id": "ga4gh:SL.-3T7UXNk6nIkMKB9YGEb0RTYxbVY2TUy", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -592,23 +566,21 @@ def normalized_p150(): } ], }, - {"name": "ncbi_gene_type", "type": "Extension", "value": "protein-coding"}, + {"name": "ncbi_gene_type", "value": "protein-coding"}, { "name": "hgnc_locus_type", - "type": "Extension", "value": "gene with protein product", }, - {"name": "ensembl_biotype", "type": "Extension", "value": "protein_coding"}, + {"name": "ensembl_biotype", "value": "protein_coding"}, { "name": "previous_symbols", - "type": "Extension", "value": ["LOC107985297"], }, - {"name": "strand", "type": "Extension", "value": "+"}, - {"name": "symbol_status", "type": "Extension", "value": "approved"}, + {"name": "strand", "value": "+"}, + {"name": "symbol_status", "value": "approved"}, ], } - return core_models.Gene(**params) + return domain_models.Gene(**params) @pytest.fixture(scope="module") @@ -619,10 +591,9 @@ def normalized_loc_653303(): params = { "type": "Gene", "label": "LOC653303", - "aliases": ["LOC196266", "LOC654080", "LOC731196"], + "alternativeLabels": ["LOC196266", "LOC654080", "LOC731196"], "extensions": [ { - "type": "Extension", "name": "approved_name", "value": "proprotein convertase subtilisin/kexin type 7 pseudogene", }, @@ -638,7 +609,6 @@ def normalized_loc_653303(): # "end": "q23.3" # }, { - "id": "ga4gh:SL.hgpw5EH5q6_PFX1CTcOx5od0LKUQRuDH", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -650,16 +620,15 @@ def normalized_loc_653303(): ], }, { - "type": "Extension", "name": "previous_symbols", "value": ["LOC196266", "LOC731196", "LOC654080"], }, - {"type": "Extension", "name": "ncbi_gene_type", "value": "pseudo"}, - {"name": "strand", "type": "Extension", "value": "+"}, + {"name": "ncbi_gene_type", "value": "pseudo"}, + {"name": "strand", "value": "+"}, ], "id": "normalize.gene.ncbigene:653303", } - return core_models.Gene(**params) + return domain_models.Gene(**params) @pytest.fixture(scope="module") @@ -689,7 +658,6 @@ def normalize_unmerged_loc_653303(): # "end": "q23.3" # }, { - "id": "ga4gh:SL.hgpw5EH5q6_PFX1CTcOx5od0LKUQRuDH", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -771,7 +739,6 @@ def normalize_unmerged_chaf1a(): "location_annotations": [], "locations": [ { - "id": "ga4gh:SL.tLUFh2LAYq-bsMi0Vob_TIWrz-sE4HgE", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -808,7 +775,6 @@ def normalize_unmerged_chaf1a(): # "end": "p13.3" # }, { - "id": "ga4gh:SL.-3T7UXNk6nIkMKB9YGEb0RTYxbVY2TUy", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -855,7 +821,6 @@ def normalize_unmerged_ache(): # "end": "q22.1" # }, { - "id": "ga4gh:SL.U7vPSlX8eyCKdFSiROIsc9om0Y7pCm2g", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -884,7 +849,6 @@ def normalize_unmerged_ache(): "location_annotations": [], "locations": [ { - "id": "ga4gh:SL.dnydHb2Bnv5pwXjI4MpJmrZUADf5QLe1", "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", @@ -970,21 +934,20 @@ def normalized_ifnr(): "relation": "relatedMatch", }, ], - "aliases": ["IFNGM", "IFNGM2"], + "alternativeLabels": ["IFNGM", "IFNGM2"], "extensions": [ { "name": "approved_name", "value": "interferon production regulator", - "type": "Extension", }, - {"name": "symbol_status", "value": "approved", "type": "Extension"}, - {"name": "symbol_status", "value": "approved", "type": "Extension"}, - {"name": "ncbi_gene_type", "type": "Extension", "value": "unknown"}, - {"name": "hgnc_locus_type", "type": "Extension", "value": "unknown"}, - {"name": "location_annotations", "type": "Extension", "value": ["16"]}, + {"name": "symbol_status", "value": "approved"}, + {"name": "symbol_status", "value": "approved"}, + {"name": "ncbi_gene_type", "value": "unknown"}, + {"name": "hgnc_locus_type", "value": "unknown"}, + {"name": "location_annotations", "value": ["16"]}, ], } - return core_models.Gene(**params) + return domain_models.Gene(**params) @pytest.fixture(scope="module") @@ -1054,6 +1017,9 @@ def compare_unmerged_record(gene, test_gene): assert gene.symbol == test_gene.symbol assert len(gene.locations) == len(test_gene.locations) for loc in gene.locations: + assert loc.id.split("ga4gh:SL.")[-1] == loc.digest + loc.id = None + loc.digest = None assert loc in test_gene.locations assert set(gene.location_annotations) == set(test_gene.location_annotations) assert gene.strand == test_gene.strand @@ -1109,7 +1075,7 @@ def compare_gene(test, actual): assert no_matches == [], no_matches assert len(actual.mappings) == len(test.mappings) - assert set(actual.aliases) == set(test.aliases), "aliases" + assert set(actual.alternativeLabels) == set(test.alternativeLabels), "alternativeLabels" extensions_present = "extensions" in test.model_fields.keys() assert ("extensions" in actual.model_fields.keys()) == extensions_present if extensions_present: @@ -1126,6 +1092,9 @@ def compare_gene(test, actual): if isinstance(test_ext.value, list): if test_ext.value: if isinstance(test_ext.value[0], dict): + if test_ext.value[0].get("type") == "SequenceLocation": + actual_digest = actual_ext.value[0].pop("id").split("ga4gh:SL.")[-1] + assert actual_ext.value[0].pop("digest") == actual_digest assert actual_ext.value == test_ext.value else: assert set(actual_ext.value) == set( @@ -1135,7 +1104,6 @@ def compare_gene(test, actual): assert actual_ext.value == test_ext.value else: assert actual_ext.value == test_ext.value - assert actual_ext.type == test_ext.type n_ext_correct += 1 assert n_ext_correct == len(test.extensions), "number of correct extensions" diff --git a/tests/unit/test_schemas.py b/tests/unit/test_schemas.py index 3d5fceed..8c183cd6 100644 --- a/tests/unit/test_schemas.py +++ b/tests/unit/test_schemas.py @@ -20,7 +20,7 @@ def sequence_location(): """Create a valid sequence location test fixture.""" return models.SequenceLocation( - sequence=models.SequenceReference( + sequenceReference=models.SequenceReference( refgetAccession="SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul" ), start=140719327,