diff --git a/.gitignore b/.gitignore deleted file mode 100644 index adc8af2..0000000 --- a/.gitignore +++ /dev/null @@ -1,173 +0,0 @@ -# Ignore xlsx temp file -\~* - -# Ignore system files -.DS_Store - -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# allow pytest override with pytest.ini -pytest.ini - -# ignore data dir -data/* -therapy/data/ - -# IDE materials -.idea/ -.vim/ -*.swp - -# DynamoDB Local -dynamodb_local_latest/* - -# Build files -Pipfile.lock -pyproject.toml - -# local data -data/ - -# services -therapy-normalization/ -gene-normalization/ -variant-normalization/ -disease-normalization/ -metakb/ diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..6bd1074 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.9.1 diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..90bb475 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,25 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: +- family-names: "Kuzma" + given-names: "Kori" +- family-names: "Stevenson" + given-names: "James" +- family-names: "Liu" + given-names: "Jiachen" +- family-names: "Coffman" + given-names: "Adam" +- family-names: "Henkenjohann" + given-names: "Richard" +- family-names: "Babb" + given-names: "Lawrence" +- family-names: "Liu" + given-names: "Xuelu" +- family-names: "Wagner" + given-names: "Alex H." + orcid: "https://orcid.org/0000-0002-2502-8961" +doi: 10.5281/zenodo.5894937 +title: "VICC Variation Normalization Service" +version: 0.2.16dev +date-released: 2022-01-23 +url: "https://github.com/cancervariants/variation-normalization" diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..ddcd5fc --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +# A simple container for variant-service. +# Runs service on port 80. +# Healthchecks service up every 5m. + +FROM python:3.9 +RUN apt update ; apt install -y rsync +RUN pip install pipenv uvicorn[standard] +ENV SEQREPO_ROOT_DIR=/usr/local/share/seqrepo/2021-01-29 +#ENV GENE_NORM_DB_URL=http://localhost:8001 +ENV GENE_NORM_DB_URL=http://dynamodb:8001 +ENV AWS_ACCESS_KEY_ID = 'DUMMYIDEXAMPLE' +ENV AWS_SECRET_ACCESS_KEY = 'DUMMYEXAMPLEKEY' +ENV AWS_DEFAULT_REGION = 'us-west-2' +COPY . /app +WORKDIR /app +RUN if [ ! -f "Pipfile.lock" ] ; then pipenv lock ; else echo Pipfile.lock exists ; fi +RUN pipenv sync +EXPOSE 80 +HEALTHCHECK --interval=5m --timeout=3s \ + CMD curl -f http://localhost/variation || exit 1 + +CMD pipenv run uvicorn variation.main:app --log-level debug --port 80 --host 0.0.0.0 +#CMD pipenv run uvicorn variation.main:app --reload diff --git a/EBSampleApp-Python.iml b/EBSampleApp-Python.iml new file mode 100644 index 0000000..db04778 --- /dev/null +++ b/EBSampleApp-Python.iml @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..70dd49d --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018-2023 VICC + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..ea36fa6 --- /dev/null +++ b/Pipfile @@ -0,0 +1,27 @@ +[[source]] +name = "pypi" +url = "https://pypi.org/simple" +verify_ssl = true + +[dev-packages] +pytest = "*" +pytest-asyncio = "*" +pytest-cov = "*" +pre-commit = "*" +variation-normalizer = {editable = true, path = "."} +jupyter = "*" +ipykernel = "*" +psycopg2-binary = "*" +ruff = "*" +black = "*" + +[packages] +"biocommons.seqrepo" = "*" +fastapi = "*" +uvicorn = "*" +pydantic = "==2.*" +"ga4gh.vrs" = {version = "~=2.0.0a2", extras = ["extras"]} +gene-normalizer = "~=0.3.0.dev1" +boto3 = "*" +cool-seq-tool = "~=0.4.0.dev1" +bioutils = "*" diff --git a/Pipfile_staging.lock b/Pipfile_staging.lock new file mode 100644 index 0000000..4cf7afd --- /dev/null +++ b/Pipfile_staging.lock @@ -0,0 +1,3523 @@ +{ + "_meta": { + "hash": { + "sha256": "004e58791b50ee886136eeb85baf8d316adda6366feecae61f8fcd5a5793812f" + }, + "pipfile-spec": 6, + "requires": {}, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "aiofiles": { + "hashes": [ + "sha256:19297512c647d4b27a2cf7c34caa7e405c0d60b5560618a29a9fe027b18b0107", + "sha256:84ec2218d8419404abcb9f0c02df3f34c6e0a68ed41072acfb1cef5cbc29051a" + ], + "markers": "python_version >= '3.7'", + "version": "==23.2.1" + }, + "annotated-types": { + "hashes": [ + "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43", + "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d" + ], + "markers": "python_version >= '3.8'", + "version": "==0.6.0" + }, + "anyio": { + "hashes": [ + "sha256:745843b39e829e108e518c489b31dc757de7d2131d53fac32bd8df268227bfee", + "sha256:e1875bb4b4e2de1669f4bc7869b6d3f54231cdced71605e6e64c9be77e3be50f" + ], + "markers": "python_version >= '3.8'", + "version": "==4.2.0" + }, + "appdirs": { + "hashes": [ + "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41", + "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128" + ], + "version": "==1.4.4" + }, + "asttokens": { + "hashes": [ + "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24", + "sha256:b03869718ba9a6eb027e134bfdf69f38a236d681c83c160d510768af11254ba0" + ], + "version": "==2.4.1" + }, + "async-timeout": { + "hashes": [ + "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f", + "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028" + ], + "markers": "python_full_version < '3.12.0'", + "version": "==4.0.3" + }, + "asyncpg": { + "hashes": [ + "sha256:0009a300cae37b8c525e5b449233d59cd9868fd35431abc470a3e364d2b85cb9", + "sha256:000c996c53c04770798053e1730d34e30cb645ad95a63265aec82da9093d88e7", + "sha256:012d01df61e009015944ac7543d6ee30c2dc1eb2f6b10b62a3f598beb6531548", + "sha256:039a261af4f38f949095e1e780bae84a25ffe3e370175193174eb08d3cecab23", + "sha256:103aad2b92d1506700cbf51cd8bb5441e7e72e87a7b3a2ca4e32c840f051a6a3", + "sha256:1e186427c88225ef730555f5fdda6c1812daa884064bfe6bc462fd3a71c4b675", + "sha256:2245be8ec5047a605e0b454c894e54bf2ec787ac04b1cb7e0d3c67aa1e32f0fe", + "sha256:37a2ec1b9ff88d8773d3eb6d3784dc7e3fee7756a5317b67f923172a4748a175", + "sha256:48e7c58b516057126b363cec8ca02b804644fd012ef8e6c7e23386b7d5e6ce83", + "sha256:52e8f8f9ff6e21f9b39ca9f8e3e33a5fcdceaf5667a8c5c32bee158e313be385", + "sha256:5340dd515d7e52f4c11ada32171d87c05570479dc01dc66d03ee3e150fb695da", + "sha256:54858bc25b49d1114178d65a88e48ad50cb2b6f3e475caa0f0c092d5f527c106", + "sha256:5b52e46f165585fd6af4863f268566668407c76b2c72d366bb8b522fa66f1870", + "sha256:5bbb7f2cafd8d1fa3e65431833de2642f4b2124be61a449fa064e1a08d27e449", + "sha256:5cad1324dbb33f3ca0cd2074d5114354ed3be2b94d48ddfd88af75ebda7c43cc", + "sha256:6011b0dc29886ab424dc042bf9eeb507670a3b40aece3439944006aafe023178", + "sha256:642a36eb41b6313ffa328e8a5c5c2b5bea6ee138546c9c3cf1bffaad8ee36dd9", + "sha256:6feaf2d8f9138d190e5ec4390c1715c3e87b37715cd69b2c3dfca616134efd2b", + "sha256:72fd0ef9f00aeed37179c62282a3d14262dbbafb74ec0ba16e1b1864d8a12169", + "sha256:746e80d83ad5d5464cfbf94315eb6744222ab00aa4e522b704322fb182b83610", + "sha256:76c3ac6530904838a4b650b2880f8e7af938ee049e769ec2fba7cd66469d7772", + "sha256:797ab8123ebaed304a1fad4d7576d5376c3a006a4100380fb9d517f0b59c1ab2", + "sha256:8d36c7f14a22ec9e928f15f92a48207546ffe68bc412f3be718eedccdf10dc5c", + "sha256:97eb024685b1d7e72b1972863de527c11ff87960837919dac6e34754768098eb", + "sha256:a65c1dcd820d5aea7c7d82a3fdcb70e096f8f70d1a8bf93eb458e49bfad036ac", + "sha256:a921372bbd0aa3a5822dd0409da61b4cd50df89ae85150149f8c119f23e8c408", + "sha256:a9e6823a7012be8b68301342ba33b4740e5a166f6bbda0aee32bc01638491a22", + "sha256:b544ffc66b039d5ec5a7454667f855f7fec08e0dfaf5a5490dfafbb7abbd2cfb", + "sha256:bb1292d9fad43112a85e98ecdc2e051602bce97c199920586be83254d9dafc02", + "sha256:bde17a1861cf10d5afce80a36fca736a86769ab3579532c03e45f83ba8a09c59", + "sha256:cce08a178858b426ae1aa8409b5cc171def45d4293626e7aa6510696d46decd8", + "sha256:cfe73ffae35f518cfd6e4e5f5abb2618ceb5ef02a2365ce64f132601000587d3", + "sha256:d1c49e1f44fffafd9a55e1a9b101590859d881d639ea2922516f5d9c512d354e", + "sha256:d4900ee08e85af01adb207519bb4e14b1cae8fd21e0ccf80fac6aa60b6da37b4", + "sha256:d84156d5fb530b06c493f9e7635aa18f518fa1d1395ef240d211cb563c4e2364", + "sha256:dc600ee8ef3dd38b8d67421359779f8ccec30b463e7aec7ed481c8346decf99f", + "sha256:e0bfe9c4d3429706cf70d3249089de14d6a01192d617e9093a8e941fea8ee775", + "sha256:e17b52c6cf83e170d3d865571ba574577ab8e533e7361a2b8ce6157d02c665d3", + "sha256:f100d23f273555f4b19b74a96840aa27b85e99ba4b1f18d4ebff0734e78dc090", + "sha256:f9ea3f24eb4c49a615573724d88a48bd1b7821c890c2effe04f05382ed9e8810", + "sha256:ff8e8109cd6a46ff852a5e6bab8b0a047d7ea42fcb7ca5ae6eaae97d8eacf397" + ], + "markers": "python_full_version >= '3.8.0'", + "version": "==0.29.0" + }, + "attrs": { + "hashes": [ + "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30", + "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1" + ], + "markers": "python_version >= '3.7'", + "version": "==23.2.0" + }, + "beautifulsoup4": { + "hashes": [ + "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051", + "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed" + ], + "markers": "python_full_version >= '3.6.0'", + "version": "==4.12.3" + }, + "biocommons.seqrepo": { + "hashes": [ + "sha256:0a4361770978350ea35f71357a7aafc0bacb7200bf87a80677d3a6fd7cfde981", + "sha256:655eb8a2b6e3d4564ef2cbfaa2db6415ccb066f9fd786bd4b3303bbba00e752a" + ], + "markers": "python_version >= '3.9'", + "version": "==0.6.6" + }, + "bioutils": { + "hashes": [ + "sha256:f58de493260042bff78aef484a3caf84e40987b663075f8573022df6f4c2a2ac" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==0.5.8.post1" + }, + "boto3": { + "hashes": [ + "sha256:33a8b6d9136fa7427160edb92d2e50f2035f04e9d63a2d1027349053e12626aa", + "sha256:b2f321e20966f021ec800b7f2c01287a3dd04fc5965acdfbaa9c505a24ca45d1" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==1.34.34" + }, + "botocore": { + "hashes": [ + "sha256:54093dc97372bb7683f5c61a279aa8240408abf3b2cc494ae82a9a90c1b784b5", + "sha256:cd060b0d88ebb2b893f1411c1db7f2ba66cc18e52dcc57ad029564ef5fec437b" + ], + "markers": "python_version >= '3.8'", + "version": "==1.34.34" + }, + "bs4": { + "hashes": [ + "sha256:a48685c58f50fe127722417bae83fe6badf500d54b55f7e39ffe43b798653925", + "sha256:abf8742c0805ef7f662dce4b51cca104cffe52b835238afc169142ab9b3fbccc" + ], + "version": "==0.0.2" + }, + "canonicaljson": { + "hashes": [ + "sha256:c38a315de3b5a0532f1ec1f9153cd3d716abfc565a558d00a4835428a34fca5b", + "sha256:e2fdaef1d7fadc5d9cb59bd3d0d41b064ddda697809ac4325dced721d12f113f" + ], + "markers": "python_version >= '3.7'", + "version": "==2.0.0" + }, + "certifi": { + "hashes": [ + "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f", + "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1" + ], + "markers": "python_version >= '3.6'", + "version": "==2024.2.2" + }, + "charset-normalizer": { + "hashes": [ + "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027", + "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087", + "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786", + "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8", + "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09", + "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185", + "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574", + "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e", + "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519", + "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898", + "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269", + "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3", + "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f", + "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6", + "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8", + "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a", + "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73", + "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc", + "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714", + "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2", + "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc", + "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce", + "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d", + "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e", + "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6", + "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269", + "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96", + "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d", + "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a", + "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4", + "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77", + "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d", + "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0", + "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed", + "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068", + "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac", + "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25", + "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8", + "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab", + "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26", + "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2", + "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db", + "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f", + "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5", + "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99", + "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c", + "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d", + "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811", + "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa", + "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a", + "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03", + "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b", + "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04", + "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c", + "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001", + "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458", + "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389", + "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99", + "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985", + "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537", + "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238", + "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f", + "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d", + "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796", + "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a", + "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143", + "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8", + "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c", + "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5", + "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5", + "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711", + "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4", + "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6", + "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c", + "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7", + "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4", + "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b", + "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae", + "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12", + "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c", + "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae", + "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8", + "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887", + "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b", + "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4", + "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f", + "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5", + "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33", + "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519", + "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561" + ], + "markers": "python_full_version >= '3.7.0'", + "version": "==3.3.2" + }, + "click": { + "hashes": [ + "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", + "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de" + ], + "markers": "python_version >= '3.7'", + "version": "==8.1.7" + }, + "coloredlogs": { + "hashes": [ + "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934", + "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==15.0.1" + }, + "configparser": { + "hashes": [ + "sha256:900ea2bb01b2540b1a644ad3d5351e9b961a4a012d4732f619375fb8f641ee19", + "sha256:ec914ab1e56c672de1f5c3483964e68f71b34e457904b7b76e06b922aec067a8" + ], + "markers": "python_version >= '3.8'", + "version": "==6.0.0" + }, + "cool-seq-tool": { + "hashes": [ + "sha256:3b2f58210c9d365d0ad03ce5d12a1ca949e0bc47a9c97c7a9261e46b2fcd0c59", + "sha256:cae1becc85c228e7479b310546c060c4cb39cfd8f89180e6eb02dd699be275dd" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==0.3.0.dev1" + }, + "cssselect": { + "hashes": [ + "sha256:666b19839cfaddb9ce9d36bfe4c969132c647b92fc9088c4e23f786b30f1b3dc", + "sha256:da1885f0c10b60c03ed5eccbb6b68d6eff248d91976fcde348f395d54c9fd35e" + ], + "markers": "python_version >= '3.7'", + "version": "==1.2.0" + }, + "decorator": { + "hashes": [ + "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330", + "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186" + ], + "markers": "python_version >= '3.5'", + "version": "==5.1.1" + }, + "dill": { + "hashes": [ + "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca", + "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7" + ], + "version": "==0.3.8" + }, + "exceptiongroup": { + "hashes": [ + "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14", + "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68" + ], + "markers": "python_version < '3.11'", + "version": "==1.2.0" + }, + "executing": { + "hashes": [ + "sha256:35afe2ce3affba8ee97f2d69927fa823b08b472b7b994e36a52a964b93d16147", + "sha256:eac49ca94516ccc753f9fb5ce82603156e590b27525a8bc32cce8ae302eb61bc" + ], + "markers": "python_version >= '3.5'", + "version": "==2.0.1" + }, + "fake-useragent": { + "hashes": [ + "sha256:5426e4015d8ccc5bb25f64d3dfcfd3915eba30ffebd31b86b60dc7a4c5d65528", + "sha256:9acce439ee2c6cf9c3772fa6c200f62dc8d56605063327a4d8c5d0e47f414b85" + ], + "version": "==1.4.0" + }, + "fastapi": { + "hashes": [ + "sha256:2c9bab24667293b501cad8dd388c05240c850b58ec5876ee3283c47d6e1e3a4d", + "sha256:f3817eac96fe4f65a2ebb4baa000f394e55f5fccdaf7f75250804bc58f354f73" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==0.109.2" + }, + "ga4gh.vrs": { + "extras": [ + "extras" + ], + "hashes": [ + "sha256:60239da2e1c6dcac3022cc073fe82759dd787c140d589781da9150f85b859b08", + "sha256:95e49ca1ad2e3a7574f5b832ac64e1a75989eaa0b3f7d5d0163b0db8d9293177" + ], + "markers": "python_version >= '3.8'", + "version": "==2.0.0a2" + }, + "gene-normalizer": { + "hashes": [ + "sha256:4d4acad6c96dc7cf2015de0d758ab63022585a629db2436b54db19b89d4be983", + "sha256:87215320599cff7bc84a78041d857d626061792de7a049febe5e9482b0cae501" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==0.3.0.dev1" + }, + "h11": { + "hashes": [ + "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", + "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761" + ], + "markers": "python_version >= '3.7'", + "version": "==0.14.0" + }, + "hgvs": { + "hashes": [ + "sha256:06abb6363bb0c8ef9f3f8f9dc333d3a346ab5f9ebcb20a5bb56c69256262559f", + "sha256:598640bae0de34ff29c58440904fc9156d7a1bc750ddef5894edd415c772b957" + ], + "markers": "python_version >= '3.6'", + "version": "==1.5.4" + }, + "humanfriendly": { + "hashes": [ + "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", + "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==10.0" + }, + "idna": { + "hashes": [ + "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca", + "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f" + ], + "markers": "python_version >= '3.5'", + "version": "==3.6" + }, + "importlib-metadata": { + "hashes": [ + "sha256:4805911c3a4ec7c3966410053e9ec6a1fecd629117df5adee56dfc9432a1081e", + "sha256:f238736bb06590ae52ac1fab06a3a9ef1d8dce2b7a35b5ab329371d6c8f5d2cc" + ], + "markers": "python_version >= '3.8'", + "version": "==7.0.1" + }, + "importlib-resources": { + "hashes": [ + "sha256:3893a00122eafde6894c59914446a512f728a0c1a45f9bb9b63721b6bacf0b4a", + "sha256:e8bf90d8213b486f428c9c39714b920041cb02c184686a3dee24905aaa8105d6" + ], + "markers": "python_version < '3.10'", + "version": "==6.1.1" + }, + "ipython": { + "hashes": [ + "sha256:ca6f079bb33457c66e233e4580ebfc4128855b4cf6370dddd73842a9563e8a27", + "sha256:e8267419d72d81955ec1177f8a29aaa90ac80ad647499201119e2f05e99aa397" + ], + "markers": "python_version >= '3.9'", + "version": "==8.18.1" + }, + "jedi": { + "hashes": [ + "sha256:cf0496f3651bc65d7174ac1b7d043eff454892c708a87d1b683e57b569927ffd", + "sha256:e983c654fe5c02867aef4cdfce5a2fbb4a50adc0af145f70504238f18ef5e7e0" + ], + "markers": "python_version >= '3.6'", + "version": "==0.19.1" + }, + "jmespath": { + "hashes": [ + "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", + "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe" + ], + "markers": "python_version >= '3.7'", + "version": "==1.0.1" + }, + "lxml": { + "hashes": [ + "sha256:13521a321a25c641b9ea127ef478b580b5ec82aa2e9fc076c86169d161798b01", + "sha256:14deca1460b4b0f6b01f1ddc9557704e8b365f55c63070463f6c18619ebf964f", + "sha256:16018f7099245157564d7148165132c70adb272fb5a17c048ba70d9cc542a1a1", + "sha256:16dd953fb719f0ffc5bc067428fc9e88f599e15723a85618c45847c96f11f431", + "sha256:19a1bc898ae9f06bccb7c3e1dfd73897ecbbd2c96afe9095a6026016e5ca97b8", + "sha256:1ad17c20e3666c035db502c78b86e58ff6b5991906e55bdbef94977700c72623", + "sha256:22b7ee4c35f374e2c20337a95502057964d7e35b996b1c667b5c65c567d2252a", + "sha256:24ef5a4631c0b6cceaf2dbca21687e29725b7c4e171f33a8f8ce23c12558ded1", + "sha256:25663d6e99659544ee8fe1b89b1a8c0aaa5e34b103fab124b17fa958c4a324a6", + "sha256:262bc5f512a66b527d026518507e78c2f9c2bd9eb5c8aeeb9f0eb43fcb69dc67", + "sha256:280f3edf15c2a967d923bcfb1f8f15337ad36f93525828b40a0f9d6c2ad24890", + "sha256:2ad3a8ce9e8a767131061a22cd28fdffa3cd2dc193f399ff7b81777f3520e372", + "sha256:2befa20a13f1a75c751f47e00929fb3433d67eb9923c2c0b364de449121f447c", + "sha256:2f37c6d7106a9d6f0708d4e164b707037b7380fcd0b04c5bd9cae1fb46a856fb", + "sha256:304128394c9c22b6569eba2a6d98392b56fbdfbad58f83ea702530be80d0f9df", + "sha256:342e95bddec3a698ac24378d61996b3ee5ba9acfeb253986002ac53c9a5f6f84", + "sha256:3aeca824b38ca78d9ee2ab82bd9883083d0492d9d17df065ba3b94e88e4d7ee6", + "sha256:3d184e0d5c918cff04cdde9dbdf9600e960161d773666958c9d7b565ccc60c45", + "sha256:3e3898ae2b58eeafedfe99e542a17859017d72d7f6a63de0f04f99c2cb125936", + "sha256:3eea6ed6e6c918e468e693c41ef07f3c3acc310b70ddd9cc72d9ef84bc9564ca", + "sha256:3f14a4fb1c1c402a22e6a341a24c1341b4a3def81b41cd354386dcb795f83897", + "sha256:436a943c2900bb98123b06437cdd30580a61340fbdb7b28aaf345a459c19046a", + "sha256:4946e7f59b7b6a9e27bef34422f645e9a368cb2be11bf1ef3cafc39a1f6ba68d", + "sha256:49a9b4af45e8b925e1cd6f3b15bbba2c81e7dba6dce170c677c9cda547411e14", + "sha256:4f8b0c78e7aac24979ef09b7f50da871c2de2def043d468c4b41f512d831e912", + "sha256:52427a7eadc98f9e62cb1368a5079ae826f94f05755d2d567d93ee1bc3ceb354", + "sha256:5e53d7e6a98b64fe54775d23a7c669763451340c3d44ad5e3a3b48a1efbdc96f", + "sha256:5fcfbebdb0c5d8d18b84118842f31965d59ee3e66996ac842e21f957eb76138c", + "sha256:601f4a75797d7a770daed8b42b97cd1bb1ba18bd51a9382077a6a247a12aa38d", + "sha256:61c5a7edbd7c695e54fca029ceb351fc45cd8860119a0f83e48be44e1c464862", + "sha256:6a2a2c724d97c1eb8cf966b16ca2915566a4904b9aad2ed9a09c748ffe14f969", + "sha256:6d48fc57e7c1e3df57be5ae8614bab6d4e7b60f65c5457915c26892c41afc59e", + "sha256:6f11b77ec0979f7e4dc5ae081325a2946f1fe424148d3945f943ceaede98adb8", + "sha256:704f5572ff473a5f897745abebc6df40f22d4133c1e0a1f124e4f2bd3330ff7e", + "sha256:725e171e0b99a66ec8605ac77fa12239dbe061482ac854d25720e2294652eeaa", + "sha256:7cfced4a069003d8913408e10ca8ed092c49a7f6cefee9bb74b6b3e860683b45", + "sha256:7ec465e6549ed97e9f1e5ed51c657c9ede767bc1c11552f7f4d022c4df4a977a", + "sha256:82bddf0e72cb2af3cbba7cec1d2fd11fda0de6be8f4492223d4a268713ef2147", + "sha256:82cd34f1081ae4ea2ede3d52f71b7be313756e99b4b5f829f89b12da552d3aa3", + "sha256:843b9c835580d52828d8f69ea4302537337a21e6b4f1ec711a52241ba4a824f3", + "sha256:877efb968c3d7eb2dad540b6cabf2f1d3c0fbf4b2d309a3c141f79c7e0061324", + "sha256:8b9f19df998761babaa7f09e6bc169294eefafd6149aaa272081cbddc7ba4ca3", + "sha256:8cf5877f7ed384dabfdcc37922c3191bf27e55b498fecece9fd5c2c7aaa34c33", + "sha256:8d2900b7f5318bc7ad8631d3d40190b95ef2aa8cc59473b73b294e4a55e9f30f", + "sha256:8d7b4beebb178e9183138f552238f7e6613162a42164233e2bda00cb3afac58f", + "sha256:8f52fe6859b9db71ee609b0c0a70fea5f1e71c3462ecf144ca800d3f434f0764", + "sha256:98f3f020a2b736566c707c8e034945c02aa94e124c24f77ca097c446f81b01f1", + "sha256:9aa543980ab1fbf1720969af1d99095a548ea42e00361e727c58a40832439114", + "sha256:9b99f564659cfa704a2dd82d0684207b1aadf7d02d33e54845f9fc78e06b7581", + "sha256:9bcf86dfc8ff3e992fed847c077bd875d9e0ba2fa25d859c3a0f0f76f07f0c8d", + "sha256:9bd0ae7cc2b85320abd5e0abad5ccee5564ed5f0cc90245d2f9a8ef330a8deae", + "sha256:9d3c0f8567ffe7502d969c2c1b809892dc793b5d0665f602aad19895f8d508da", + "sha256:9e5ac3437746189a9b4121db2a7b86056ac8786b12e88838696899328fc44bb2", + "sha256:a36c506e5f8aeb40680491d39ed94670487ce6614b9d27cabe45d94cd5d63e1e", + "sha256:a5ab722ae5a873d8dcee1f5f45ddd93c34210aed44ff2dc643b5025981908cda", + "sha256:a96f02ba1bcd330807fc060ed91d1f7a20853da6dd449e5da4b09bfcc08fdcf5", + "sha256:acb6b2f96f60f70e7f34efe0c3ea34ca63f19ca63ce90019c6cbca6b676e81fa", + "sha256:ae15347a88cf8af0949a9872b57a320d2605ae069bcdf047677318bc0bba45b1", + "sha256:af8920ce4a55ff41167ddbc20077f5698c2e710ad3353d32a07d3264f3a2021e", + "sha256:afd825e30f8d1f521713a5669b63657bcfe5980a916c95855060048b88e1adb7", + "sha256:b21b4031b53d25b0858d4e124f2f9131ffc1530431c6d1321805c90da78388d1", + "sha256:b4b68c961b5cc402cbd99cca5eb2547e46ce77260eb705f4d117fd9c3f932b95", + "sha256:b66aa6357b265670bb574f050ffceefb98549c721cf28351b748be1ef9577d93", + "sha256:b9e240ae0ba96477682aa87899d94ddec1cc7926f9df29b1dd57b39e797d5ab5", + "sha256:bc64d1b1dab08f679fb89c368f4c05693f58a9faf744c4d390d7ed1d8223869b", + "sha256:bf8443781533b8d37b295016a4b53c1494fa9a03573c09ca5104550c138d5c05", + "sha256:c26aab6ea9c54d3bed716b8851c8bfc40cb249b8e9880e250d1eddde9f709bf5", + "sha256:c3cd1fc1dc7c376c54440aeaaa0dcc803d2126732ff5c6b68ccd619f2e64be4f", + "sha256:c7257171bb8d4432fe9d6fdde4d55fdbe663a63636a17f7f9aaba9bcb3153ad7", + "sha256:d42e3a3fc18acc88b838efded0e6ec3edf3e328a58c68fbd36a7263a874906c8", + "sha256:d74fcaf87132ffc0447b3c685a9f862ffb5b43e70ea6beec2fb8057d5d2a1fea", + "sha256:d8c1d679df4361408b628f42b26a5d62bd3e9ba7f0c0e7969f925021554755aa", + "sha256:e856c1c7255c739434489ec9c8aa9cdf5179785d10ff20add308b5d673bed5cd", + "sha256:eac68f96539b32fce2c9b47eb7c25bb2582bdaf1bbb360d25f564ee9e04c542b", + "sha256:ed7326563024b6e91fef6b6c7a1a2ff0a71b97793ac33dbbcf38f6005e51ff6e", + "sha256:ed8c3d2cd329bf779b7ed38db176738f3f8be637bb395ce9629fc76f78afe3d4", + "sha256:f4c9bda132ad108b387c33fabfea47866af87f4ea6ffb79418004f0521e63204", + "sha256:f643ffd2669ffd4b5a3e9b41c909b72b2a1d5e4915da90a77e119b8d48ce867a" + ], + "markers": "python_version >= '3.6'", + "version": "==5.1.0" + }, + "matplotlib-inline": { + "hashes": [ + "sha256:f1f41aab5328aa5aaea9b16d083b128102f8712542f819fe7e6a420ff581b311", + "sha256:f887e5f10ba98e8d2b150ddcf4702c1e5f8b3a20005eb0f74bfdbd360ee6f304" + ], + "markers": "python_version >= '3.5'", + "version": "==0.1.6" + }, + "numpy": { + "hashes": [ + "sha256:02f98011ba4ab17f46f80f7f8f1c291ee7d855fcef0a5a98db80767a468c85cd", + "sha256:0b7e807d6888da0db6e7e75838444d62495e2b588b99e90dd80c3459594e857b", + "sha256:12c70ac274b32bc00c7f61b515126c9205323703abb99cd41836e8125ea0043e", + "sha256:1666f634cb3c80ccbd77ec97bc17337718f56d6658acf5d3b906ca03e90ce87f", + "sha256:18c3319a7d39b2c6a9e3bb75aab2304ab79a811ac0168a671a62e6346c29b03f", + "sha256:211ddd1e94817ed2d175b60b6374120244a4dd2287f4ece45d49228b4d529178", + "sha256:21a9484e75ad018974a2fdaa216524d64ed4212e418e0a551a2d83403b0531d3", + "sha256:39763aee6dfdd4878032361b30b2b12593fb445ddb66bbac802e2113eb8a6ac4", + "sha256:3c67423b3703f8fbd90f5adaa37f85b5794d3366948efe9a5190a5f3a83fc34e", + "sha256:46f47ee566d98849323f01b349d58f2557f02167ee301e5e28809a8c0e27a2d0", + "sha256:51c7f1b344f302067b02e0f5b5d2daa9ed4a721cf49f070280ac202738ea7f00", + "sha256:5f24750ef94d56ce6e33e4019a8a4d68cfdb1ef661a52cdaee628a56d2437419", + "sha256:697df43e2b6310ecc9d95f05d5ef20eacc09c7c4ecc9da3f235d39e71b7da1e4", + "sha256:6d45b3ec2faed4baca41c76617fcdcfa4f684ff7a151ce6fc78ad3b6e85af0a6", + "sha256:77810ef29e0fb1d289d225cabb9ee6cf4d11978a00bb99f7f8ec2132a84e0166", + "sha256:7ca4f24341df071877849eb2034948459ce3a07915c2734f1abb4018d9c49d7b", + "sha256:7f784e13e598e9594750b2ef6729bcd5a47f6cfe4a12cca13def35e06d8163e3", + "sha256:806dd64230dbbfaca8a27faa64e2f414bf1c6622ab78cc4264f7f5f028fee3bf", + "sha256:867e3644e208c8922a3be26fc6bbf112a035f50f0a86497f98f228c50c607bb2", + "sha256:8c66d6fec467e8c0f975818c1796d25c53521124b7cfb760114be0abad53a0a2", + "sha256:8ed07a90f5450d99dad60d3799f9c03c6566709bd53b497eb9ccad9a55867f36", + "sha256:9bc6d1a7f8cedd519c4b7b1156d98e051b726bf160715b769106661d567b3f03", + "sha256:9e1591f6ae98bcfac2a4bbf9221c0b92ab49762228f38287f6eeb5f3f55905ce", + "sha256:9e87562b91f68dd8b1c39149d0323b42e0082db7ddb8e934ab4c292094d575d6", + "sha256:a7081fd19a6d573e1a05e600c82a1c421011db7935ed0d5c483e9dd96b99cf13", + "sha256:a8474703bffc65ca15853d5fd4d06b18138ae90c17c8d12169968e998e448bb5", + "sha256:af36e0aa45e25c9f57bf684b1175e59ea05d9a7d3e8e87b7ae1a1da246f2767e", + "sha256:b1240f767f69d7c4c8a29adde2310b871153df9b26b5cb2b54a561ac85146485", + "sha256:b4d362e17bcb0011738c2d83e0a65ea8ce627057b2fdda37678f4374a382a137", + "sha256:b831295e5472954104ecb46cd98c08b98b49c69fdb7040483aff799a755a7374", + "sha256:b8c275f0ae90069496068c714387b4a0eba5d531aace269559ff2b43655edd58", + "sha256:bdd2b45bf079d9ad90377048e2747a0c82351989a2165821f0c96831b4a2a54b", + "sha256:cc0743f0302b94f397a4a65a660d4cd24267439eb16493fb3caad2e4389bccbb", + "sha256:da4b0c6c699a0ad73c810736303f7fbae483bcb012e38d7eb06a5e3b432c981b", + "sha256:f25e2811a9c932e43943a2615e65fc487a0b6b49218899e62e426e7f0a57eeda", + "sha256:f73497e8c38295aaa4741bdfa4fda1a5aedda5473074369eca10626835445511" + ], + "markers": "python_version >= '3.9'", + "version": "==1.26.3" + }, + "parse": { + "hashes": [ + "sha256:09002ca350ad42e76629995f71f7b518670bcf93548bdde3684fd55d2be51975", + "sha256:76ddd5214255ae711db4c512be636151fbabaa948c6f30115aecc440422ca82c" + ], + "version": "==1.20.1" + }, + "parsley": { + "hashes": [ + "sha256:9444278d47161d5f2be76a767809a3cbe6db4db822f46a4fd7481d4057208d41", + "sha256:c3bc417b8c7e3a96c87c0f2f751bfd784ed5156ffccebe2f84330df5685f8dc3" + ], + "version": "==1.3" + }, + "parso": { + "hashes": [ + "sha256:8c07be290bb59f03588915921e29e8a50002acaf2cdc5fa0e0114f91709fafa0", + "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75" + ], + "markers": "python_version >= '3.6'", + "version": "==0.8.3" + }, + "pexpect": { + "hashes": [ + "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", + "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f" + ], + "markers": "sys_platform != 'win32'", + "version": "==4.9.0" + }, + "polars": { + "hashes": [ + "sha256:359d556fafcb533bb0caa34ddfbd5161ee23b8a43817c7a2b80189720a1f42f6", + "sha256:3cfc71d4569818548c9bf4285c497d62a3a542363c86940593a41dd731e69d7f", + "sha256:ddff2fa419f15aa64ee23a94655fcb24b3e1b5c3eb30d124e3315deca2039a92", + "sha256:ec742fdf41e16ff699c043259ba94a11bbc2f7dcb978d768495db1ff2b3c5c20", + "sha256:f0928576a52eca47e14a8b98f4da22025b4b2fa32549f80f4d92c5187fd3f461", + "sha256:fd6100df0ca53614c3fa7136251e030fb70dee8833023edf7a3ac380f8e2dce5" + ], + "markers": "python_version >= '3.8'", + "version": "==0.20.7" + }, + "prompt-toolkit": { + "hashes": [ + "sha256:3527b7af26106cbc65a040bcc84839a3566ec1b051bb0bfe953631e704b0ff7d", + "sha256:a11a29cb3bf0a28a387fe5122cdb649816a957cd9261dcedf8c9f1fef33eacf6" + ], + "markers": "python_full_version >= '3.7.0'", + "version": "==3.0.43" + }, + "psycopg2": { + "hashes": [ + "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981", + "sha256:38a8dcc6856f569068b47de286b472b7c473ac7977243593a288ebce0dc89516", + "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3", + "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa", + "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a", + "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693", + "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372", + "sha256:bac58c024c9922c23550af2a581998624d6e02350f4ae9c5f0bc642c633a2d5e", + "sha256:c92811b2d4c9b6ea0285942b2e7cac98a59e166d59c588fe5cfe1eda58e72d59", + "sha256:d1454bde93fb1e224166811694d600e746430c006fbb031ea06ecc2ea41bf156", + "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024", + "sha256:de80739447af31525feddeb8effd640782cf5998e1a4e9192ebdf829717e3913", + "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c" + ], + "markers": "python_version >= '3.7'", + "version": "==2.9.9" + }, + "psycopg2-binary": { + "hashes": [ + "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9", + "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77", + "sha256:0c009475ee389757e6e34611d75f6e4f05f0cf5ebb76c6037508318e1a1e0d7e", + "sha256:0ef4854e82c09e84cc63084a9e4ccd6d9b154f1dbdd283efb92ecd0b5e2b8c84", + "sha256:1236ed0952fbd919c100bc839eaa4a39ebc397ed1c08a97fc45fee2a595aa1b3", + "sha256:143072318f793f53819048fdfe30c321890af0c3ec7cb1dfc9cc87aa88241de2", + "sha256:15208be1c50b99203fe88d15695f22a5bed95ab3f84354c494bcb1d08557df67", + "sha256:1873aade94b74715be2246321c8650cabf5a0d098a95bab81145ffffa4c13876", + "sha256:18d0ef97766055fec15b5de2c06dd8e7654705ce3e5e5eed3b6651a1d2a9a152", + "sha256:1ea665f8ce695bcc37a90ee52de7a7980be5161375d42a0b6c6abedbf0d81f0f", + "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a", + "sha256:246b123cc54bb5361588acc54218c8c9fb73068bf227a4a531d8ed56fa3ca7d6", + "sha256:275ff571376626195ab95a746e6a04c7df8ea34638b99fc11160de91f2fef503", + "sha256:281309265596e388ef483250db3640e5f414168c5a67e9c665cafce9492eda2f", + "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493", + "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996", + "sha256:30dcc86377618a4c8f3b72418df92e77be4254d8f89f14b8e8f57d6d43603c0f", + "sha256:31a34c508c003a4347d389a9e6fcc2307cc2150eb516462a7a17512130de109e", + "sha256:323ba25b92454adb36fa425dc5cf6f8f19f78948cbad2e7bc6cdf7b0d7982e59", + "sha256:34eccd14566f8fe14b2b95bb13b11572f7c7d5c36da61caf414d23b91fcc5d94", + "sha256:3a58c98a7e9c021f357348867f537017057c2ed7f77337fd914d0bedb35dace7", + "sha256:3f78fd71c4f43a13d342be74ebbc0666fe1f555b8837eb113cb7416856c79682", + "sha256:4154ad09dac630a0f13f37b583eae260c6aa885d67dfbccb5b02c33f31a6d420", + "sha256:420f9bbf47a02616e8554e825208cb947969451978dceb77f95ad09c37791dae", + "sha256:4686818798f9194d03c9129a4d9a702d9e113a89cb03bffe08c6cf799e053291", + "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe", + "sha256:60989127da422b74a04345096c10d416c2b41bd7bf2a380eb541059e4e999980", + "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93", + "sha256:68fc1f1ba168724771e38bee37d940d2865cb0f562380a1fb1ffb428b75cb692", + "sha256:6e6f98446430fdf41bd36d4faa6cb409f5140c1c2cf58ce0bbdaf16af7d3f119", + "sha256:729177eaf0aefca0994ce4cffe96ad3c75e377c7b6f4efa59ebf003b6d398716", + "sha256:72dffbd8b4194858d0941062a9766f8297e8868e1dd07a7b36212aaa90f49472", + "sha256:75723c3c0fbbf34350b46a3199eb50638ab22a0228f93fb472ef4d9becc2382b", + "sha256:77853062a2c45be16fd6b8d6de2a99278ee1d985a7bd8b103e97e41c034006d2", + "sha256:78151aa3ec21dccd5cdef6c74c3e73386dcdfaf19bced944169697d7ac7482fc", + "sha256:7f01846810177d829c7692f1f5ada8096762d9172af1b1a28d4ab5b77c923c1c", + "sha256:804d99b24ad523a1fe18cc707bf741670332f7c7412e9d49cb5eab67e886b9b5", + "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab", + "sha256:8359bf4791968c5a78c56103702000105501adb557f3cf772b2c207284273984", + "sha256:83791a65b51ad6ee6cf0845634859d69a038ea9b03d7b26e703f94c7e93dbcf9", + "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf", + "sha256:876801744b0dee379e4e3c38b76fc89f88834bb15bf92ee07d94acd06ec890a0", + "sha256:8dbf6d1bc73f1d04ec1734bae3b4fb0ee3cb2a493d35ede9badbeb901fb40f6f", + "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212", + "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb", + "sha256:977646e05232579d2e7b9c59e21dbe5261f403a88417f6a6512e70d3f8a046be", + "sha256:9dba73be7305b399924709b91682299794887cbbd88e38226ed9f6712eabee90", + "sha256:a148c5d507bb9b4f2030a2025c545fccb0e1ef317393eaba42e7eabd28eb6041", + "sha256:a6cdcc3ede532f4a4b96000b6362099591ab4a3e913d70bcbac2b56c872446f7", + "sha256:ac05fb791acf5e1a3e39402641827780fe44d27e72567a000412c648a85ba860", + "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d", + "sha256:b58b4710c7f4161b5e9dcbe73bb7c62d65670a87df7bcce9e1faaad43e715245", + "sha256:b6356793b84728d9d50ead16ab43c187673831e9d4019013f1402c41b1db9b27", + "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417", + "sha256:bc7bb56d04601d443f24094e9e31ae6deec9ccb23581f75343feebaf30423359", + "sha256:c2470da5418b76232f02a2fcd2229537bb2d5a7096674ce61859c3229f2eb202", + "sha256:c332c8d69fb64979ebf76613c66b985414927a40f8defa16cf1bc028b7b0a7b0", + "sha256:c6af2a6d4b7ee9615cbb162b0738f6e1fd1f5c3eda7e5da17861eacf4c717ea7", + "sha256:c77e3d1862452565875eb31bdb45ac62502feabbd53429fdc39a1cc341d681ba", + "sha256:ca08decd2697fdea0aea364b370b1249d47336aec935f87b8bbfd7da5b2ee9c1", + "sha256:ca49a8119c6cbd77375ae303b0cfd8c11f011abbbd64601167ecca18a87e7cdd", + "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07", + "sha256:d2997c458c690ec2bc6b0b7ecbafd02b029b7b4283078d3b32a852a7ce3ddd98", + "sha256:d3f82c171b4ccd83bbaf35aa05e44e690113bd4f3b7b6cc54d2219b132f3ae55", + "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d", + "sha256:ead20f7913a9c1e894aebe47cccf9dc834e1618b7aa96155d2091a626e59c972", + "sha256:ebdc36bea43063116f0486869652cb2ed7032dbc59fbcb4445c4862b5c1ecf7f", + "sha256:ed1184ab8f113e8d660ce49a56390ca181f2981066acc27cf637d5c1e10ce46e", + "sha256:ee825e70b1a209475622f7f7b776785bd68f34af6e7a46e2e42f27b659b5bc26", + "sha256:f7ae5d65ccfbebdfa761585228eb4d0df3a8b15cfb53bd953e713e09fbb12957", + "sha256:f7fc5a5acafb7d6ccca13bfa8c90f8c51f13d8fb87d95656d3950f0158d3ce53", + "sha256:f9b5571d33660d5009a8b3c25dc1db560206e2d2f89d3df1cb32d72c0d117d52" + ], + "version": "==2.9.9" + }, + "ptyprocess": { + "hashes": [ + "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", + "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220" + ], + "version": "==0.7.0" + }, + "pure-eval": { + "hashes": [ + "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350", + "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3" + ], + "version": "==0.2.2" + }, + "pydantic": { + "hashes": [ + "sha256:1440966574e1b5b99cf75a13bec7b20e3512e8a61b894ae252f56275e2c465ae", + "sha256:ae887bd94eb404b09d86e4d12f93893bdca79d766e738528c6fa1c849f3c6bcf" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==2.6.0" + }, + "pydantic-core": { + "hashes": [ + "sha256:06f0d5a1d9e1b7932477c172cc720b3b23c18762ed7a8efa8398298a59d177c7", + "sha256:07982b82d121ed3fc1c51faf6e8f57ff09b1325d2efccaa257dd8c0dd937acca", + "sha256:0f478ec204772a5c8218e30eb813ca43e34005dff2eafa03931b3d8caef87d51", + "sha256:102569d371fadc40d8f8598a59379c37ec60164315884467052830b28cc4e9da", + "sha256:10dca874e35bb60ce4f9f6665bfbfad050dd7573596608aeb9e098621ac331dc", + "sha256:150ba5c86f502c040b822777e2e519b5625b47813bd05f9273a8ed169c97d9ae", + "sha256:1661c668c1bb67b7cec96914329d9ab66755911d093bb9063c4c8914188af6d4", + "sha256:1a2fe7b00a49b51047334d84aafd7e39f80b7675cad0083678c58983662da89b", + "sha256:1ae8048cba95f382dba56766525abca438328455e35c283bb202964f41a780b0", + "sha256:20f724a023042588d0f4396bbbcf4cffd0ddd0ad3ed4f0d8e6d4ac4264bae81e", + "sha256:2133b0e412a47868a358713287ff9f9a328879da547dc88be67481cdac529118", + "sha256:21e3298486c4ea4e4d5cc6fb69e06fb02a4e22089304308817035ac006a7f506", + "sha256:21ebaa4bf6386a3b22eec518da7d679c8363fb7fb70cf6972161e5542f470798", + "sha256:23632132f1fd608034f1a56cc3e484be00854db845b3a4a508834be5a6435a6f", + "sha256:2d5bea8012df5bb6dda1e67d0563ac50b7f64a5d5858348b5c8cb5043811c19d", + "sha256:300616102fb71241ff477a2cbbc847321dbec49428434a2f17f37528721c4948", + "sha256:30a8259569fbeec49cfac7fda3ec8123486ef1b729225222f0d41d5f840b476f", + "sha256:399166f24c33a0c5759ecc4801f040dbc87d412c1a6d6292b2349b4c505effc9", + "sha256:3fac641bbfa43d5a1bed99d28aa1fded1984d31c670a95aac1bf1d36ac6ce137", + "sha256:42c29d54ed4501a30cd71015bf982fa95e4a60117b44e1a200290ce687d3e640", + "sha256:462d599299c5971f03c676e2b63aa80fec5ebc572d89ce766cd11ca8bcb56f3f", + "sha256:4eebbd049008eb800f519578e944b8dc8e0f7d59a5abb5924cc2d4ed3a1834ff", + "sha256:502c062a18d84452858f8aea1e520e12a4d5228fc3621ea5061409d666ea1706", + "sha256:5317c04349472e683803da262c781c42c5628a9be73f4750ac7d13040efb5d2d", + "sha256:5511f962dd1b9b553e9534c3b9c6a4b0c9ded3d8c2be96e61d56f933feef9e1f", + "sha256:561be4e3e952c2f9056fba5267b99be4ec2afadc27261505d4992c50b33c513c", + "sha256:601d3e42452cd4f2891c13fa8c70366d71851c1593ed42f57bf37f40f7dca3c8", + "sha256:644904600c15816a1f9a1bafa6aab0d21db2788abcdf4e2a77951280473f33e1", + "sha256:653a5dfd00f601a0ed6654a8b877b18d65ac32c9d9997456e0ab240807be6cf7", + "sha256:694a5e9f1f2c124a17ff2d0be613fd53ba0c26de588eb4bdab8bca855e550d95", + "sha256:71b4a48a7427f14679f0015b13c712863d28bb1ab700bd11776a5368135c7d60", + "sha256:72bf9308a82b75039b8c8edd2be2924c352eda5da14a920551a8b65d5ee89253", + "sha256:735dceec50fa907a3c314b84ed609dec54b76a814aa14eb90da31d1d36873a5e", + "sha256:73802194f10c394c2bedce7a135ba1d8ba6cff23adf4217612bfc5cf060de34c", + "sha256:780daad9e35b18d10d7219d24bfb30148ca2afc309928e1d4d53de86822593dc", + "sha256:8655f55fe68c4685673265a650ef71beb2d31871c049c8b80262026f23605ee3", + "sha256:877045a7969ace04d59516d5d6a7dee13106822f99a5d8df5e6822941f7bedc8", + "sha256:87bce04f09f0552b66fca0c4e10da78d17cb0e71c205864bab4e9595122cb9d9", + "sha256:8d4dfc66abea3ec6d9f83e837a8f8a7d9d3a76d25c9911735c76d6745950e62c", + "sha256:8ec364e280db4235389b5e1e6ee924723c693cbc98e9d28dc1767041ff9bc388", + "sha256:8fa00fa24ffd8c31fac081bf7be7eb495be6d248db127f8776575a746fa55c95", + "sha256:920c4897e55e2881db6a6da151198e5001552c3777cd42b8a4c2f72eedc2ee91", + "sha256:920f4633bee43d7a2818e1a1a788906df5a17b7ab6fe411220ed92b42940f818", + "sha256:9795f56aa6b2296f05ac79d8a424e94056730c0b860a62b0fdcfe6340b658cc8", + "sha256:98f0edee7ee9cc7f9221af2e1b95bd02810e1c7a6d115cfd82698803d385b28f", + "sha256:99c095457eea8550c9fa9a7a992e842aeae1429dab6b6b378710f62bfb70b394", + "sha256:99d3a433ef5dc3021c9534a58a3686c88363c591974c16c54a01af7efd741f13", + "sha256:99f9a50b56713a598d33bc23a9912224fc5d7f9f292444e6664236ae471ddf17", + "sha256:9c46e556ee266ed3fb7b7a882b53df3c76b45e872fdab8d9cf49ae5e91147fd7", + "sha256:9f5d37ff01edcbace53a402e80793640c25798fb7208f105d87a25e6fcc9ea06", + "sha256:a0b4cfe408cd84c53bab7d83e4209458de676a6ec5e9c623ae914ce1cb79b96f", + "sha256:a497be217818c318d93f07e14502ef93d44e6a20c72b04c530611e45e54c2196", + "sha256:ac89ccc39cd1d556cc72d6752f252dc869dde41c7c936e86beac5eb555041b66", + "sha256:adf28099d061a25fbcc6531febb7a091e027605385de9fe14dd6a97319d614cf", + "sha256:afa01d25769af33a8dac0d905d5c7bb2d73c7c3d5161b2dd6f8b5b5eea6a3c4c", + "sha256:b1fc07896fc1851558f532dffc8987e526b682ec73140886c831d773cef44b76", + "sha256:b49c604ace7a7aa8af31196abbf8f2193be605db6739ed905ecaf62af31ccae0", + "sha256:b9f3e0bffad6e238f7acc20c393c1ed8fab4371e3b3bc311020dfa6020d99212", + "sha256:ba07646f35e4e49376c9831130039d1b478fbfa1215ae62ad62d2ee63cf9c18f", + "sha256:bd88f40f2294440d3f3c6308e50d96a0d3d0973d6f1a5732875d10f569acef49", + "sha256:c0be58529d43d38ae849a91932391eb93275a06b93b79a8ab828b012e916a206", + "sha256:c45f62e4107ebd05166717ac58f6feb44471ed450d07fecd90e5f69d9bf03c48", + "sha256:c56da23034fe66221f2208c813d8aa509eea34d97328ce2add56e219c3a9f41c", + "sha256:c94b5537bf6ce66e4d7830c6993152940a188600f6ae044435287753044a8fe2", + "sha256:cebf8d56fee3b08ad40d332a807ecccd4153d3f1ba8231e111d9759f02edfd05", + "sha256:d0bf6f93a55d3fa7a079d811b29100b019784e2ee6bc06b0bb839538272a5610", + "sha256:d195add190abccefc70ad0f9a0141ad7da53e16183048380e688b466702195dd", + "sha256:d25ef0c33f22649b7a088035fd65ac1ce6464fa2876578df1adad9472f918a76", + "sha256:d6cbdf12ef967a6aa401cf5cdf47850559e59eedad10e781471c960583f25aa1", + "sha256:d8c032ccee90b37b44e05948b449a2d6baed7e614df3d3f47fe432c952c21b60", + "sha256:daff04257b49ab7f4b3f73f98283d3dbb1a65bf3500d55c7beac3c66c310fe34", + "sha256:e83ebbf020be727d6e0991c1b192a5c2e7113eb66e3def0cd0c62f9f266247e4", + "sha256:ed3025a8a7e5a59817b7494686d449ebfbe301f3e757b852c8d0d1961d6be864", + "sha256:f1936ef138bed2165dd8573aa65e3095ef7c2b6247faccd0e15186aabdda7f66", + "sha256:f5247a3d74355f8b1d780d0f3b32a23dd9f6d3ff43ef2037c6dcd249f35ecf4c", + "sha256:fa496cd45cda0165d597e9d6f01e36c33c9508f75cf03c0a650018c5048f578e", + "sha256:fb4363e6c9fc87365c2bc777a1f585a22f2f56642501885ffc7942138499bf54", + "sha256:fb4370b15111905bf8b5ba2129b926af9470f014cb0493a67d23e9d7a48348e8", + "sha256:fbec2af0ebafa57eb82c18c304b37c86a8abddf7022955d1742b3d5471a6339e" + ], + "markers": "python_version >= '3.8'", + "version": "==2.16.1" + }, + "pyee": { + "hashes": [ + "sha256:5c7e60f8df95710dbe17550e16ce0153f83990c00ef744841b43f371ed53ebea", + "sha256:c09f56e36eb10bf23aa2aacf145f690ded75b990a3d9523fd478b005940303d2" + ], + "version": "==8.2.2" + }, + "pygments": { + "hashes": [ + "sha256:b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c", + "sha256:da46cec9fd2de5be3a8a784f434e4c4ab670b4ff54d605c4c2717e9d49c4c367" + ], + "markers": "python_version >= '3.7'", + "version": "==2.17.2" + }, + "pyliftover": { + "hashes": [ + "sha256:72bcfb7de907569b0eb75e86c817840365297d63ba43a961da394187e399da41" + ], + "version": "==0.4" + }, + "pyppeteer": { + "hashes": [ + "sha256:11a734d8f02c6b128035aba8faf32748f2016310a6a1cbc6aa5b1e2580742e8f", + "sha256:ddb0d15cb644720160d49abb1ad0d97e87a55581febf1b7531be9e983aad7742" + ], + "markers": "python_version >= '3.7' and python_version < '4.0'", + "version": "==1.0.2" + }, + "pyquery": { + "hashes": [ + "sha256:8dfc9b4b7c5f877d619bbae74b1898d5743f6ca248cfd5d72b504dd614da312f", + "sha256:963e8d4e90262ff6d8dec072ea97285dc374a2f69cad7776f4082abcf6a1d8ae" + ], + "version": "==2.0.0" + }, + "pysam": { + "hashes": [ + "sha256:021fbf6874ad998aba19be33828ad9d23d52273643793488ac4b12917d714c68", + "sha256:116278a7caa122b2b8acc56d13b3599be9b1236f27a12488bffc306858ff0d57", + "sha256:1b84f99aa04e30bd1cc35c01bd41c2b7680131f56c71a740805aff8086f24b56", + "sha256:26199e403855b9da45341d25682e0df27013687d9cb1b4fd328136fbd506292b", + "sha256:32042e0bf3c5dd8554769442c2e1f7b6ada902c33ee44c616d0403e7acd12ee3", + "sha256:34f5653a82138d28a8e86205785a0398eb6c89f776b4145ff42783168757323c", + "sha256:4779a99d1ece17a98724d87a5c10c455cf212b3baa3a8399d3d072e4d0ae5ba0", + "sha256:481e4efbfbc07b6b92194a005cb9a98006c8378024f41c7b66c58b14f6e77f9c", + "sha256:4f6657a09c81333adb5545cf9a20d4c2ca1686acf8609ad58f13b3ec1b52a9cf", + "sha256:6d6aa2346b11ad35e88c65eb0067321318c25c7f35f75c98061173eabefcf8b0", + "sha256:6ffe5c98725fea54b1b2aa8f14a60ee9ceaed32c04460d1b861a62603dcd7153", + "sha256:83776ba587eb9575a209efed1cedb49d69c5fa6cc520dd722a0a09d0bb4e9b87", + "sha256:87dbf72f3e61fd6d3f92b1b683d9a9e797b6cc213ffcd971899f24a16f9f6e8f", + "sha256:93eb12be3822fb387e5438811f62a0f5e56c1edd5c830aaa316fb50d3d0bc181", + "sha256:942dd4a2263996bc2daa21200886e9fde027f32ce8820e7832b20bbdb97eb393", + "sha256:9af1cd3d07fd4c84e9b3d8a46c65b25f95278185bc6d44c4a48951679d5189ac", + "sha256:9b8e18520e7a79bad91b44cf9199c7fa42cec5c3020024d7ef9a7161d0099bf8", + "sha256:9ba53f9b0b2c5cb57908855cdb35a31b34c5211d215aa01bdb3e9b3d05c659cc", + "sha256:9bfebf89b1dc2ff6f88d64b5f05d8630deb89562b22764f8ee7f6fa9e677bb91", + "sha256:9d3ebb1515c2fd9b11823469e5b211ca3cc89e976c00c284a2190804c9f11726", + "sha256:a98d1ddca64943f3ead507721e52466aea2f7303e549d4960a2eb1d9fff8e3d7", + "sha256:ab7a46973cf0ab8c6ac327f4c3fb67698d7ccbeef8631a716898c6ba01ef3e45", + "sha256:bb61bf30c15f6767403b423b04c293e96fd7635457b506c849aafcf48fc13242", + "sha256:cfd2b858c7405cf38c730cba779ddf9f8cff28b4842c6440e64781650dcb9a52", + "sha256:da2f1af461e44d5c2c7210d458ee216f8ab98486adf1eea6c88eea5c1058a62f", + "sha256:f23b2f47528b94e8abe3b700103fb1214c623ae1c1b8125ecf22d4d33d76720f", + "sha256:f73d7923c89618fb7024875ed8eddc5fb0c911f430e3495de482fcee48143e45" + ], + "markers": "python_version >= '3.6'", + "version": "==0.22.0" + }, + "python-dateutil": { + "hashes": [ + "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", + "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.8.2" + }, + "pyyaml": { + "hashes": [ + "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5", + "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc", + "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df", + "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741", + "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206", + "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27", + "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595", + "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62", + "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98", + "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696", + "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290", + "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9", + "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d", + "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6", + "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867", + "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47", + "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486", + "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6", + "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3", + "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007", + "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938", + "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0", + "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c", + "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735", + "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d", + "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28", + "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4", + "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba", + "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8", + "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef", + "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5", + "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd", + "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3", + "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0", + "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515", + "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c", + "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c", + "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924", + "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34", + "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43", + "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859", + "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673", + "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54", + "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a", + "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b", + "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab", + "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa", + "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c", + "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585", + "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d", + "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f" + ], + "markers": "python_version >= '3.6'", + "version": "==6.0.1" + }, + "requests": { + "hashes": [ + "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f", + "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1" + ], + "markers": "python_version >= '3.7'", + "version": "==2.31.0" + }, + "requests-html": { + "hashes": [ + "sha256:7e929ecfed95fb1d0994bb368295d6d7c4d06b03fcb900c33d7d0b17e6003947", + "sha256:cb8a78cf829c4eca9d6233f28524f65dd2bfaafb4bdbbc407f0a0b8f487df6e2" + ], + "markers": "python_full_version >= '3.6.0'", + "version": "==0.10.0" + }, + "s3transfer": { + "hashes": [ + "sha256:3cdb40f5cfa6966e812209d0994f2a4709b561c88e90cf00c2696d2df4e56b2e", + "sha256:d0c8bbf672d5eebbe4e57945e23b972d963f07d82f661cabf678a5c88831595b" + ], + "markers": "python_version >= '3.8'", + "version": "==0.10.0" + }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.16.0" + }, + "sniffio": { + "hashes": [ + "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101", + "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384" + ], + "markers": "python_version >= '3.7'", + "version": "==1.3.0" + }, + "soupsieve": { + "hashes": [ + "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690", + "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7" + ], + "markers": "python_version >= '3.8'", + "version": "==2.5" + }, + "sqlparse": { + "hashes": [ + "sha256:5430a4fe2ac7d0f93e66f1efc6e1338a41884b7ddf2a350cedd20ccc4d9d28f3", + "sha256:d446183e84b8349fa3061f0fe7f06ca94ba65b426946ffebe6e3e8295332420c" + ], + "markers": "python_version >= '3.5'", + "version": "==0.4.4" + }, + "stack-data": { + "hashes": [ + "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9", + "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695" + ], + "version": "==0.6.3" + }, + "starlette": { + "hashes": [ + "sha256:13d429aa93a61dc40bf503e8c801db1f1bca3dc706b10ef2434a36123568f044", + "sha256:90a671733cfb35771d8cc605e0b679d23b992f8dcfad48cc60b38cb29aeb7080" + ], + "markers": "python_version >= '3.8'", + "version": "==0.36.3" + }, + "tabulate": { + "hashes": [ + "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c", + "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f" + ], + "markers": "python_version >= '3.7'", + "version": "==0.9.0" + }, + "tqdm": { + "hashes": [ + "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386", + "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7" + ], + "markers": "python_version >= '3.7'", + "version": "==4.66.1" + }, + "traitlets": { + "hashes": [ + "sha256:2e5a030e6eff91737c643231bfcf04a65b0132078dad75e4936700b213652e74", + "sha256:8585105b371a04b8316a43d5ce29c098575c2e477850b62b848b964f1444527e" + ], + "markers": "python_version >= '3.8'", + "version": "==5.14.1" + }, + "typing-extensions": { + "hashes": [ + "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783", + "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd" + ], + "markers": "python_version >= '3.8'", + "version": "==4.9.0" + }, + "urllib3": { + "hashes": [ + "sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07", + "sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==1.26.18" + }, + "uvicorn": { + "hashes": [ + "sha256:4b85ba02b8a20429b9b205d015cbeb788a12da527f731811b643fd739ef90d5f", + "sha256:54898fcd80c13ff1cd28bf77b04ec9dbd8ff60c5259b499b4b12bb0917f22907" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==0.27.0.post1" + }, + "w3lib": { + "hashes": [ + "sha256:c4432926e739caa8e3f49f5de783f336df563d9490416aebd5d39fb896d264e7", + "sha256:ed5b74e997eea2abe3c1321f916e344144ee8e9072a6f33463ee8e57f858a4b1" + ], + "markers": "python_version >= '3.7'", + "version": "==2.1.2" + }, + "wcwidth": { + "hashes": [ + "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", + "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5" + ], + "version": "==0.2.13" + }, + "websockets": { + "hashes": [ + "sha256:00213676a2e46b6ebf6045bc11d0f529d9120baa6f58d122b4021ad92adabd41", + "sha256:00c870522cdb69cd625b93f002961ffb0c095394f06ba8c48f17eef7c1541f96", + "sha256:0154f7691e4fe6c2b2bc275b5701e8b158dae92a1ab229e2b940efe11905dff4", + "sha256:05a7233089f8bd355e8cbe127c2e8ca0b4ea55467861906b80d2ebc7db4d6b72", + "sha256:09a1814bb15eff7069e51fed0826df0bc0702652b5cb8f87697d469d79c23576", + "sha256:0cff816f51fb33c26d6e2b16b5c7d48eaa31dae5488ace6aae468b361f422b63", + "sha256:185929b4808b36a79c65b7865783b87b6841e852ef5407a2fb0c03381092fa3b", + "sha256:2fc8709c00704194213d45e455adc106ff9e87658297f72d544220e32029cd3d", + "sha256:33d69ca7612f0ddff3316b0c7b33ca180d464ecac2d115805c044bf0a3b0d032", + "sha256:389f8dbb5c489e305fb113ca1b6bdcdaa130923f77485db5b189de343a179393", + "sha256:38ea7b82bfcae927eeffc55d2ffa31665dc7fec7b8dc654506b8e5a518eb4d50", + "sha256:3d3cac3e32b2c8414f4f87c1b2ab686fa6284a980ba283617404377cd448f631", + "sha256:40e826de3085721dabc7cf9bfd41682dadc02286d8cf149b3ad05bff89311e4f", + "sha256:4239b6027e3d66a89446908ff3027d2737afc1a375f8fd3eea630a4842ec9a0c", + "sha256:45ec8e75b7dbc9539cbfafa570742fe4f676eb8b0d3694b67dabe2f2ceed8aa6", + "sha256:47a2964021f2110116cc1125b3e6d87ab5ad16dea161949e7244ec583b905bb4", + "sha256:48c08473563323f9c9debac781ecf66f94ad5a3680a38fe84dee5388cf5acaf6", + "sha256:4c6d2264f485f0b53adf22697ac11e261ce84805c232ed5dbe6b1bcb84b00ff0", + "sha256:4f72e5cd0f18f262f5da20efa9e241699e0cf3a766317a17392550c9ad7b37d8", + "sha256:56029457f219ade1f2fc12a6504ea61e14ee227a815531f9738e41203a429112", + "sha256:5c1289596042fad2cdceb05e1ebf7aadf9995c928e0da2b7a4e99494953b1b94", + "sha256:62e627f6b6d4aed919a2052efc408da7a545c606268d5ab5bfab4432734b82b4", + "sha256:74de2b894b47f1d21cbd0b37a5e2b2392ad95d17ae983e64727e18eb281fe7cb", + "sha256:7c584f366f46ba667cfa66020344886cf47088e79c9b9d39c84ce9ea98aaa331", + "sha256:7d27a7e34c313b3a7f91adcd05134315002aaf8540d7b4f90336beafaea6217c", + "sha256:7d3f0b61c45c3fa9a349cf484962c559a8a1d80dae6977276df8fd1fa5e3cb8c", + "sha256:82ff5e1cae4e855147fd57a2863376ed7454134c2bf49ec604dfe71e446e2193", + "sha256:84bc2a7d075f32f6ed98652db3a680a17a4edb21ca7f80fe42e38753a58ee02b", + "sha256:884be66c76a444c59f801ac13f40c76f176f1bfa815ef5b8ed44321e74f1600b", + "sha256:8a5cc00546e0a701da4639aa0bbcb0ae2bb678c87f46da01ac2d789e1f2d2038", + "sha256:8dc96f64ae43dde92530775e9cb169979f414dcf5cff670455d81a6823b42089", + "sha256:8f38706e0b15d3c20ef6259fd4bc1700cd133b06c3c1bb108ffe3f8947be15fa", + "sha256:90fcf8929836d4a0e964d799a58823547df5a5e9afa83081761630553be731f9", + "sha256:931c039af54fc195fe6ad536fde4b0de04da9d5916e78e55405436348cfb0e56", + "sha256:932af322458da7e4e35df32f050389e13d3d96b09d274b22a7aa1808f292fee4", + "sha256:942de28af58f352a6f588bc72490ae0f4ccd6dfc2bd3de5945b882a078e4e179", + "sha256:9bc42e8402dc5e9905fb8b9649f57efcb2056693b7e88faa8fb029256ba9c68c", + "sha256:a7a240d7a74bf8d5cb3bfe6be7f21697a28ec4b1a437607bae08ac7acf5b4882", + "sha256:a9f9a735deaf9a0cadc2d8c50d1a5bcdbae8b6e539c6e08237bc4082d7c13f28", + "sha256:ae5e95cfb53ab1da62185e23b3130e11d64431179debac6dc3c6acf08760e9b1", + "sha256:b029fb2032ae4724d8ae8d4f6b363f2cc39e4c7b12454df8df7f0f563ed3e61a", + "sha256:b0d15c968ea7a65211e084f523151dbf8ae44634de03c801b8bd070b74e85033", + "sha256:b343f521b047493dc4022dd338fc6db9d9282658862756b4f6fd0e996c1380e1", + "sha256:b627c266f295de9dea86bd1112ed3d5fafb69a348af30a2422e16590a8ecba13", + "sha256:b9968694c5f467bf67ef97ae7ad4d56d14be2751000c1207d31bf3bb8860bae8", + "sha256:ba089c499e1f4155d2a3c2a05d2878a3428cf321c848f2b5a45ce55f0d7d310c", + "sha256:bbccd847aa0c3a69b5f691a84d2341a4f8a629c6922558f2a70611305f902d74", + "sha256:bc0b82d728fe21a0d03e65f81980abbbcb13b5387f733a1a870672c5be26edab", + "sha256:c57e4c1349fbe0e446c9fa7b19ed2f8a4417233b6984277cce392819123142d3", + "sha256:c94ae4faf2d09f7c81847c63843f84fe47bf6253c9d60b20f25edfd30fb12588", + "sha256:c9b27d6c1c6cd53dc93614967e9ce00ae7f864a2d9f99fe5ed86706e1ecbf485", + "sha256:d210abe51b5da0ffdbf7b43eed0cfdff8a55a1ab17abbec4301c9ff077dd0342", + "sha256:d58804e996d7d2307173d56c297cf7bc132c52df27a3efaac5e8d43e36c21c48", + "sha256:d6a4162139374a49eb18ef5b2f4da1dd95c994588f5033d64e0bbfda4b6b6fcf", + "sha256:da39dd03d130162deb63da51f6e66ed73032ae62e74aaccc4236e30edccddbb0", + "sha256:db3c336f9eda2532ec0fd8ea49fef7a8df8f6c804cdf4f39e5c5c0d4a4ad9a7a", + "sha256:dd500e0a5e11969cdd3320935ca2ff1e936f2358f9c2e61f100a1660933320ea", + "sha256:dd9becd5fe29773d140d68d607d66a38f60e31b86df75332703757ee645b6faf", + "sha256:e0cb5cc6ece6ffa75baccfd5c02cffe776f3f5c8bf486811f9d3ea3453676ce8", + "sha256:e23173580d740bf8822fd0379e4bf30aa1d5a92a4f252d34e893070c081050df", + "sha256:e3a686ecb4aa0d64ae60c9c9f1a7d5d46cab9bfb5d91a2d303d00e2cd4c4c5cc", + "sha256:e789376b52c295c4946403bd0efecf27ab98f05319df4583d3c48e43c7342c2f", + "sha256:edc344de4dac1d89300a053ac973299e82d3db56330f3494905643bb68801269", + "sha256:eef610b23933c54d5d921c92578ae5f89813438fded840c2e9809d378dc765d3", + "sha256:f2c38d588887a609191d30e902df2a32711f708abfd85d318ca9b367258cfd0c", + "sha256:f55b5905705725af31ccef50e55391621532cd64fbf0bc6f4bac935f0fccec46", + "sha256:f5fc088b7a32f244c519a048c170f14cf2251b849ef0e20cbbb0fdf0fdaf556f", + "sha256:fe10ddc59b304cb19a1bdf5bd0a7719cbbc9fbdd57ac80ed436b709fcf889106", + "sha256:ff64a1d38d156d429404aaa84b27305e957fd10c30e5880d1765c9480bea490f" + ], + "markers": "python_version >= '3.7'", + "version": "==10.4" + }, + "yoyo-migrations": { + "hashes": [ + "sha256:27dabe7432859288b0bd771093f593e3dd2ff6dd4e3b8438992a07c9a7154660", + "sha256:820606a03e262cf1cd4f59e256c28fa446425224d5b82a3d1275fd78178523e4" + ], + "version": "==8.2.0" + }, + "zipp": { + "hashes": [ + "sha256:0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31", + "sha256:84e64a1c28cf7e91ed2078bb8cc8c259cb19b76942096c8d7b84947690cabaf0" + ], + "markers": "python_version >= '3.8'", + "version": "==3.17.0" + } + }, + "develop": { + "aiofiles": { + "hashes": [ + "sha256:19297512c647d4b27a2cf7c34caa7e405c0d60b5560618a29a9fe027b18b0107", + "sha256:84ec2218d8419404abcb9f0c02df3f34c6e0a68ed41072acfb1cef5cbc29051a" + ], + "markers": "python_version >= '3.7'", + "version": "==23.2.1" + }, + "annotated-types": { + "hashes": [ + "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43", + "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d" + ], + "markers": "python_version >= '3.8'", + "version": "==0.6.0" + }, + "anyio": { + "hashes": [ + "sha256:745843b39e829e108e518c489b31dc757de7d2131d53fac32bd8df268227bfee", + "sha256:e1875bb4b4e2de1669f4bc7869b6d3f54231cdced71605e6e64c9be77e3be50f" + ], + "markers": "python_version >= '3.8'", + "version": "==4.2.0" + }, + "appdirs": { + "hashes": [ + "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41", + "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128" + ], + "version": "==1.4.4" + }, + "appnope": { + "hashes": [ + "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24", + "sha256:265a455292d0bd8a72453494fa24df5a11eb18373a60c7c0430889f22548605e" + ], + "markers": "platform_system == 'Darwin'", + "version": "==0.1.3" + }, + "argon2-cffi": { + "hashes": [ + "sha256:879c3e79a2729ce768ebb7d36d4609e3a78a4ca2ec3a9f12286ca057e3d0db08", + "sha256:c670642b78ba29641818ab2e68bd4e6a78ba53b7eff7b4c3815ae16abf91c7ea" + ], + "markers": "python_version >= '3.7'", + "version": "==23.1.0" + }, + "argon2-cffi-bindings": { + "hashes": [ + "sha256:20ef543a89dee4db46a1a6e206cd015360e5a75822f76df533845c3cbaf72670", + "sha256:2c3e3cc67fdb7d82c4718f19b4e7a87123caf8a93fde7e23cf66ac0337d3cb3f", + "sha256:3b9ef65804859d335dc6b31582cad2c5166f0c3e7975f324d9ffaa34ee7e6583", + "sha256:3e385d1c39c520c08b53d63300c3ecc28622f076f4c2b0e6d7e796e9f6502194", + "sha256:58ed19212051f49a523abb1dbe954337dc82d947fb6e5a0da60f7c8471a8476c", + "sha256:5e00316dabdaea0b2dd82d141cc66889ced0cdcbfa599e8b471cf22c620c329a", + "sha256:603ca0aba86b1349b147cab91ae970c63118a0f30444d4bc80355937c950c082", + "sha256:6a22ad9800121b71099d0fb0a65323810a15f2e292f2ba450810a7316e128ee5", + "sha256:8cd69c07dd875537a824deec19f978e0f2078fdda07fd5c42ac29668dda5f40f", + "sha256:93f9bf70084f97245ba10ee36575f0c3f1e7d7724d67d8e5b08e61787c320ed7", + "sha256:9524464572e12979364b7d600abf96181d3541da11e23ddf565a32e70bd4dc0d", + "sha256:b2ef1c30440dbbcba7a5dc3e319408b59676e2e039e2ae11a8775ecf482b192f", + "sha256:b746dba803a79238e925d9046a63aa26bf86ab2a2fe74ce6b009a1c3f5c8f2ae", + "sha256:bb89ceffa6c791807d1305ceb77dbfacc5aa499891d2c55661c6459651fc39e3", + "sha256:bd46088725ef7f58b5a1ef7ca06647ebaf0eb4baff7d1d0d177c6cc8744abd86", + "sha256:ccb949252cb2ab3a08c02024acb77cfb179492d5701c7cbdbfd776124d4d2367", + "sha256:d4966ef5848d820776f5f562a7d45fdd70c2f330c961d0d745b784034bd9f48d", + "sha256:e415e3f62c8d124ee16018e491a009937f8cf7ebf5eb430ffc5de21b900dad93", + "sha256:ed2937d286e2ad0cc79a7087d3c272832865f779430e0cc2b4f3718d3159b0cb", + "sha256:f1152ac548bd5b8bcecfb0b0371f082037e47128653df2e8ba6e914d384f3c3e", + "sha256:f9f8b450ed0547e3d473fdc8612083fd08dd2120d6ac8f73828df9b7d45bb351" + ], + "markers": "python_version >= '3.6'", + "version": "==21.2.0" + }, + "arrow": { + "hashes": [ + "sha256:c728b120ebc00eb84e01882a6f5e7927a53960aa990ce7dd2b10f39005a67f80", + "sha256:d4540617648cb5f895730f1ad8c82a65f2dad0166f57b75f3ca54759c4d67a85" + ], + "markers": "python_version >= '3.8'", + "version": "==1.3.0" + }, + "asttokens": { + "hashes": [ + "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24", + "sha256:b03869718ba9a6eb027e134bfdf69f38a236d681c83c160d510768af11254ba0" + ], + "version": "==2.4.1" + }, + "async-lru": { + "hashes": [ + "sha256:b8a59a5df60805ff63220b2a0c5b5393da5521b113cd5465a44eb037d81a5627", + "sha256:ff02944ce3c288c5be660c42dbcca0742b32c3b279d6dceda655190240b99224" + ], + "markers": "python_version >= '3.8'", + "version": "==2.0.4" + }, + "async-timeout": { + "hashes": [ + "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f", + "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028" + ], + "markers": "python_full_version < '3.12.0'", + "version": "==4.0.3" + }, + "asyncpg": { + "hashes": [ + "sha256:0009a300cae37b8c525e5b449233d59cd9868fd35431abc470a3e364d2b85cb9", + "sha256:000c996c53c04770798053e1730d34e30cb645ad95a63265aec82da9093d88e7", + "sha256:012d01df61e009015944ac7543d6ee30c2dc1eb2f6b10b62a3f598beb6531548", + "sha256:039a261af4f38f949095e1e780bae84a25ffe3e370175193174eb08d3cecab23", + "sha256:103aad2b92d1506700cbf51cd8bb5441e7e72e87a7b3a2ca4e32c840f051a6a3", + "sha256:1e186427c88225ef730555f5fdda6c1812daa884064bfe6bc462fd3a71c4b675", + "sha256:2245be8ec5047a605e0b454c894e54bf2ec787ac04b1cb7e0d3c67aa1e32f0fe", + "sha256:37a2ec1b9ff88d8773d3eb6d3784dc7e3fee7756a5317b67f923172a4748a175", + "sha256:48e7c58b516057126b363cec8ca02b804644fd012ef8e6c7e23386b7d5e6ce83", + "sha256:52e8f8f9ff6e21f9b39ca9f8e3e33a5fcdceaf5667a8c5c32bee158e313be385", + "sha256:5340dd515d7e52f4c11ada32171d87c05570479dc01dc66d03ee3e150fb695da", + "sha256:54858bc25b49d1114178d65a88e48ad50cb2b6f3e475caa0f0c092d5f527c106", + "sha256:5b52e46f165585fd6af4863f268566668407c76b2c72d366bb8b522fa66f1870", + "sha256:5bbb7f2cafd8d1fa3e65431833de2642f4b2124be61a449fa064e1a08d27e449", + "sha256:5cad1324dbb33f3ca0cd2074d5114354ed3be2b94d48ddfd88af75ebda7c43cc", + "sha256:6011b0dc29886ab424dc042bf9eeb507670a3b40aece3439944006aafe023178", + "sha256:642a36eb41b6313ffa328e8a5c5c2b5bea6ee138546c9c3cf1bffaad8ee36dd9", + "sha256:6feaf2d8f9138d190e5ec4390c1715c3e87b37715cd69b2c3dfca616134efd2b", + "sha256:72fd0ef9f00aeed37179c62282a3d14262dbbafb74ec0ba16e1b1864d8a12169", + "sha256:746e80d83ad5d5464cfbf94315eb6744222ab00aa4e522b704322fb182b83610", + "sha256:76c3ac6530904838a4b650b2880f8e7af938ee049e769ec2fba7cd66469d7772", + "sha256:797ab8123ebaed304a1fad4d7576d5376c3a006a4100380fb9d517f0b59c1ab2", + "sha256:8d36c7f14a22ec9e928f15f92a48207546ffe68bc412f3be718eedccdf10dc5c", + "sha256:97eb024685b1d7e72b1972863de527c11ff87960837919dac6e34754768098eb", + "sha256:a65c1dcd820d5aea7c7d82a3fdcb70e096f8f70d1a8bf93eb458e49bfad036ac", + "sha256:a921372bbd0aa3a5822dd0409da61b4cd50df89ae85150149f8c119f23e8c408", + "sha256:a9e6823a7012be8b68301342ba33b4740e5a166f6bbda0aee32bc01638491a22", + "sha256:b544ffc66b039d5ec5a7454667f855f7fec08e0dfaf5a5490dfafbb7abbd2cfb", + "sha256:bb1292d9fad43112a85e98ecdc2e051602bce97c199920586be83254d9dafc02", + "sha256:bde17a1861cf10d5afce80a36fca736a86769ab3579532c03e45f83ba8a09c59", + "sha256:cce08a178858b426ae1aa8409b5cc171def45d4293626e7aa6510696d46decd8", + "sha256:cfe73ffae35f518cfd6e4e5f5abb2618ceb5ef02a2365ce64f132601000587d3", + "sha256:d1c49e1f44fffafd9a55e1a9b101590859d881d639ea2922516f5d9c512d354e", + "sha256:d4900ee08e85af01adb207519bb4e14b1cae8fd21e0ccf80fac6aa60b6da37b4", + "sha256:d84156d5fb530b06c493f9e7635aa18f518fa1d1395ef240d211cb563c4e2364", + "sha256:dc600ee8ef3dd38b8d67421359779f8ccec30b463e7aec7ed481c8346decf99f", + "sha256:e0bfe9c4d3429706cf70d3249089de14d6a01192d617e9093a8e941fea8ee775", + "sha256:e17b52c6cf83e170d3d865571ba574577ab8e533e7361a2b8ce6157d02c665d3", + "sha256:f100d23f273555f4b19b74a96840aa27b85e99ba4b1f18d4ebff0734e78dc090", + "sha256:f9ea3f24eb4c49a615573724d88a48bd1b7821c890c2effe04f05382ed9e8810", + "sha256:ff8e8109cd6a46ff852a5e6bab8b0a047d7ea42fcb7ca5ae6eaae97d8eacf397" + ], + "markers": "python_full_version >= '3.8.0'", + "version": "==0.29.0" + }, + "attrs": { + "hashes": [ + "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30", + "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1" + ], + "markers": "python_version >= '3.7'", + "version": "==23.2.0" + }, + "babel": { + "hashes": [ + "sha256:6919867db036398ba21eb5c7a0f6b28ab8cbc3ae7a73a44ebe34ae74a4e7d363", + "sha256:efb1a25b7118e67ce3a259bed20545c29cb68be8ad2c784c83689981b7a57287" + ], + "markers": "python_version >= '3.7'", + "version": "==2.14.0" + }, + "beautifulsoup4": { + "hashes": [ + "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051", + "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed" + ], + "markers": "python_full_version >= '3.6.0'", + "version": "==4.12.3" + }, + "biocommons.seqrepo": { + "hashes": [ + "sha256:0a4361770978350ea35f71357a7aafc0bacb7200bf87a80677d3a6fd7cfde981", + "sha256:655eb8a2b6e3d4564ef2cbfaa2db6415ccb066f9fd786bd4b3303bbba00e752a" + ], + "markers": "python_version >= '3.9'", + "version": "==0.6.6" + }, + "bioutils": { + "hashes": [ + "sha256:f58de493260042bff78aef484a3caf84e40987b663075f8573022df6f4c2a2ac" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==0.5.8.post1" + }, + "black": { + "hashes": [ + "sha256:0269dfdea12442022e88043d2910429bed717b2d04523867a85dacce535916b8", + "sha256:07204d078e25327aad9ed2c64790d681238686bce254c910de640c7cc4fc3aa6", + "sha256:08b34e85170d368c37ca7bf81cf67ac863c9d1963b2c1780c39102187ec8dd62", + "sha256:1a95915c98d6e32ca43809d46d932e2abc5f1f7d582ffbe65a5b4d1588af7445", + "sha256:2588021038bd5ada078de606f2a804cadd0a3cc6a79cb3e9bb3a8bf581325a4c", + "sha256:2fa6a0e965779c8f2afb286f9ef798df770ba2b6cee063c650b96adec22c056a", + "sha256:34afe9da5056aa123b8bfda1664bfe6fb4e9c6f311d8e4a6eb089da9a9173bf9", + "sha256:3897ae5a21ca132efa219c029cce5e6bfc9c3d34ed7e892113d199c0b1b444a2", + "sha256:40657e1b78212d582a0edecafef133cf1dd02e6677f539b669db4746150d38f6", + "sha256:48b5760dcbfe5cf97fd4fba23946681f3a81514c6ab8a45b50da67ac8fbc6c7b", + "sha256:5242ecd9e990aeb995b6d03dc3b2d112d4a78f2083e5a8e86d566340ae80fec4", + "sha256:5cdc2e2195212208fbcae579b931407c1fa9997584f0a415421748aeafff1168", + "sha256:5d7b06ea8816cbd4becfe5f70accae953c53c0e53aa98730ceccb0395520ee5d", + "sha256:7258c27115c1e3b5de9ac6c4f9957e3ee2c02c0b39222a24dc7aa03ba0e986f5", + "sha256:854c06fb86fd854140f37fb24dbf10621f5dab9e3b0c29a690ba595e3d543024", + "sha256:a21725862d0e855ae05da1dd25e3825ed712eaaccef6b03017fe0853a01aa45e", + "sha256:a83fe522d9698d8f9a101b860b1ee154c1d25f8a82ceb807d319f085b2627c5b", + "sha256:b3d64db762eae4a5ce04b6e3dd745dcca0fb9560eb931a5be97472e38652a161", + "sha256:e298d588744efda02379521a19639ebcd314fba7a49be22136204d7ed1782717", + "sha256:e2c8dfa14677f90d976f68e0c923947ae68fa3961d61ee30976c388adc0b02c8", + "sha256:ecba2a15dfb2d97105be74bbfe5128bc5e9fa8477d8c46766505c1dda5883aac", + "sha256:fc1ec9aa6f4d98d022101e015261c056ddebe3da6a8ccfc2c792cbe0349d48b7" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==24.1.1" + }, + "bleach": { + "hashes": [ + "sha256:0a31f1837963c41d46bbf1331b8778e1308ea0791db03cc4e7357b97cf42a8fe", + "sha256:3225f354cfc436b9789c66c4ee030194bee0568fbf9cbdad3bc8b5c26c5f12b6" + ], + "markers": "python_version >= '3.8'", + "version": "==6.1.0" + }, + "boto3": { + "hashes": [ + "sha256:33a8b6d9136fa7427160edb92d2e50f2035f04e9d63a2d1027349053e12626aa", + "sha256:b2f321e20966f021ec800b7f2c01287a3dd04fc5965acdfbaa9c505a24ca45d1" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==1.34.34" + }, + "botocore": { + "hashes": [ + "sha256:54093dc97372bb7683f5c61a279aa8240408abf3b2cc494ae82a9a90c1b784b5", + "sha256:cd060b0d88ebb2b893f1411c1db7f2ba66cc18e52dcc57ad029564ef5fec437b" + ], + "markers": "python_version >= '3.8'", + "version": "==1.34.34" + }, + "bs4": { + "hashes": [ + "sha256:a48685c58f50fe127722417bae83fe6badf500d54b55f7e39ffe43b798653925", + "sha256:abf8742c0805ef7f662dce4b51cca104cffe52b835238afc169142ab9b3fbccc" + ], + "version": "==0.0.2" + }, + "canonicaljson": { + "hashes": [ + "sha256:c38a315de3b5a0532f1ec1f9153cd3d716abfc565a558d00a4835428a34fca5b", + "sha256:e2fdaef1d7fadc5d9cb59bd3d0d41b064ddda697809ac4325dced721d12f113f" + ], + "markers": "python_version >= '3.7'", + "version": "==2.0.0" + }, + "certifi": { + "hashes": [ + "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f", + "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1" + ], + "markers": "python_version >= '3.6'", + "version": "==2024.2.2" + }, + "cffi": { + "hashes": [ + "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc", + "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a", + "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417", + "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab", + "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520", + "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36", + "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743", + "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8", + "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed", + "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684", + "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56", + "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324", + "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d", + "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235", + "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e", + "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088", + "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000", + "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7", + "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e", + "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673", + "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c", + "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe", + "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2", + "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098", + "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8", + "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a", + "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0", + "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b", + "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896", + "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e", + "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9", + "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2", + "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b", + "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6", + "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404", + "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f", + "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0", + "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4", + "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc", + "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936", + "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba", + "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872", + "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb", + "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614", + "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1", + "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d", + "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969", + "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b", + "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4", + "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627", + "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956", + "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357" + ], + "markers": "python_version >= '3.8'", + "version": "==1.16.0" + }, + "cfgv": { + "hashes": [ + "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", + "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560" + ], + "markers": "python_version >= '3.8'", + "version": "==3.4.0" + }, + "charset-normalizer": { + "hashes": [ + "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027", + "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087", + "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786", + "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8", + "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09", + "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185", + "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574", + "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e", + "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519", + "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898", + "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269", + "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3", + "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f", + "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6", + "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8", + "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a", + "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73", + "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc", + "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714", + "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2", + "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc", + "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce", + "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d", + "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e", + "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6", + "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269", + "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96", + "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d", + "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a", + "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4", + "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77", + "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d", + "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0", + "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed", + "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068", + "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac", + "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25", + "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8", + "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab", + "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26", + "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2", + "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db", + "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f", + "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5", + "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99", + "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c", + "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d", + "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811", + "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa", + "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a", + "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03", + "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b", + "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04", + "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c", + "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001", + "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458", + "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389", + "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99", + "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985", + "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537", + "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238", + "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f", + "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d", + "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796", + "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a", + "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143", + "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8", + "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c", + "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5", + "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5", + "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711", + "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4", + "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6", + "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c", + "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7", + "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4", + "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b", + "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae", + "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12", + "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c", + "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae", + "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8", + "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887", + "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b", + "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4", + "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f", + "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5", + "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33", + "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519", + "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561" + ], + "markers": "python_full_version >= '3.7.0'", + "version": "==3.3.2" + }, + "click": { + "hashes": [ + "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", + "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de" + ], + "markers": "python_version >= '3.7'", + "version": "==8.1.7" + }, + "coloredlogs": { + "hashes": [ + "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934", + "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==15.0.1" + }, + "comm": { + "hashes": [ + "sha256:0bc91edae1344d39d3661dcbc36937181fdaddb304790458f8b044dbc064b89a", + "sha256:87928485c0dfc0e7976fd89fc1e187023cf587e7c353e4a9b417555b44adf021" + ], + "markers": "python_version >= '3.8'", + "version": "==0.2.1" + }, + "configparser": { + "hashes": [ + "sha256:900ea2bb01b2540b1a644ad3d5351e9b961a4a012d4732f619375fb8f641ee19", + "sha256:ec914ab1e56c672de1f5c3483964e68f71b34e457904b7b76e06b922aec067a8" + ], + "markers": "python_version >= '3.8'", + "version": "==6.0.0" + }, + "cool-seq-tool": { + "hashes": [ + "sha256:3b2f58210c9d365d0ad03ce5d12a1ca949e0bc47a9c97c7a9261e46b2fcd0c59", + "sha256:cae1becc85c228e7479b310546c060c4cb39cfd8f89180e6eb02dd699be275dd" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==0.3.0.dev1" + }, + "coverage": { + "extras": [ + "toml" + ], + "hashes": [ + "sha256:0193657651f5399d433c92f8ae264aff31fc1d066deee4b831549526433f3f61", + "sha256:02f2edb575d62172aa28fe00efe821ae31f25dc3d589055b3fb64d51e52e4ab1", + "sha256:0491275c3b9971cdbd28a4595c2cb5838f08036bca31765bad5e17edf900b2c7", + "sha256:077d366e724f24fc02dbfe9d946534357fda71af9764ff99d73c3c596001bbd7", + "sha256:10e88e7f41e6197ea0429ae18f21ff521d4f4490aa33048f6c6f94c6045a6a75", + "sha256:18e961aa13b6d47f758cc5879383d27b5b3f3dcd9ce8cdbfdc2571fe86feb4dd", + "sha256:1a78b656a4d12b0490ca72651fe4d9f5e07e3c6461063a9b6265ee45eb2bdd35", + "sha256:1ed4b95480952b1a26d863e546fa5094564aa0065e1e5f0d4d0041f293251d04", + "sha256:23b27b8a698e749b61809fb637eb98ebf0e505710ec46a8aa6f1be7dc0dc43a6", + "sha256:23f5881362dcb0e1a92b84b3c2809bdc90db892332daab81ad8f642d8ed55042", + "sha256:32a8d985462e37cfdab611a6f95b09d7c091d07668fdc26e47a725ee575fe166", + "sha256:3468cc8720402af37b6c6e7e2a9cdb9f6c16c728638a2ebc768ba1ef6f26c3a1", + "sha256:379d4c7abad5afbe9d88cc31ea8ca262296480a86af945b08214eb1a556a3e4d", + "sha256:3cacfaefe6089d477264001f90f55b7881ba615953414999c46cc9713ff93c8c", + "sha256:3e3424c554391dc9ef4a92ad28665756566a28fecf47308f91841f6c49288e66", + "sha256:46342fed0fff72efcda77040b14728049200cbba1279e0bf1188f1f2078c1d70", + "sha256:536d609c6963c50055bab766d9951b6c394759190d03311f3e9fcf194ca909e1", + "sha256:5d6850e6e36e332d5511a48a251790ddc545e16e8beaf046c03985c69ccb2676", + "sha256:6008adeca04a445ea6ef31b2cbaf1d01d02986047606f7da266629afee982630", + "sha256:64e723ca82a84053dd7bfcc986bdb34af8d9da83c521c19d6b472bc6880e191a", + "sha256:6b00e21f86598b6330f0019b40fb397e705135040dbedc2ca9a93c7441178e74", + "sha256:6d224f0c4c9c98290a6990259073f496fcec1b5cc613eecbd22786d398ded3ad", + "sha256:6dceb61d40cbfcf45f51e59933c784a50846dc03211054bd76b421a713dcdf19", + "sha256:7ac8f8eb153724f84885a1374999b7e45734bf93a87d8df1e7ce2146860edef6", + "sha256:85ccc5fa54c2ed64bd91ed3b4a627b9cce04646a659512a051fa82a92c04a448", + "sha256:869b5046d41abfea3e381dd143407b0d29b8282a904a19cb908fa24d090cc018", + "sha256:8bdb0285a0202888d19ec6b6d23d5990410decb932b709f2b0dfe216d031d218", + "sha256:8dfc5e195bbef80aabd81596ef52a1277ee7143fe419efc3c4d8ba2754671756", + "sha256:8e738a492b6221f8dcf281b67129510835461132b03024830ac0e554311a5c54", + "sha256:918440dea04521f499721c039863ef95433314b1db00ff826a02580c1f503e45", + "sha256:9641e21670c68c7e57d2053ddf6c443e4f0a6e18e547e86af3fad0795414a628", + "sha256:9d2f9d4cc2a53b38cabc2d6d80f7f9b7e3da26b2f53d48f05876fef7956b6968", + "sha256:a07f61fc452c43cd5328b392e52555f7d1952400a1ad09086c4a8addccbd138d", + "sha256:a3277f5fa7483c927fe3a7b017b39351610265308f5267ac6d4c2b64cc1d8d25", + "sha256:a4a3907011d39dbc3e37bdc5df0a8c93853c369039b59efa33a7b6669de04c60", + "sha256:aeb2c2688ed93b027eb0d26aa188ada34acb22dceea256d76390eea135083950", + "sha256:b094116f0b6155e36a304ff912f89bbb5067157aff5f94060ff20bbabdc8da06", + "sha256:b8ffb498a83d7e0305968289441914154fb0ef5d8b3157df02a90c6695978295", + "sha256:b9bb62fac84d5f2ff523304e59e5c439955fb3b7f44e3d7b2085184db74d733b", + "sha256:c61f66d93d712f6e03369b6a7769233bfda880b12f417eefdd4f16d1deb2fc4c", + "sha256:ca6e61dc52f601d1d224526360cdeab0d0712ec104a2ce6cc5ccef6ed9a233bc", + "sha256:ca7b26a5e456a843b9b6683eada193fc1f65c761b3a473941efe5a291f604c74", + "sha256:d12c923757de24e4e2110cf8832d83a886a4cf215c6e61ed506006872b43a6d1", + "sha256:d17bbc946f52ca67adf72a5ee783cd7cd3477f8f8796f59b4974a9b59cacc9ee", + "sha256:dfd1e1b9f0898817babf840b77ce9fe655ecbe8b1b327983df485b30df8cc011", + "sha256:e0860a348bf7004c812c8368d1fc7f77fe8e4c095d661a579196a9533778e156", + "sha256:f2f5968608b1fe2a1d00d01ad1017ee27efd99b3437e08b83ded9b7af3f6f766", + "sha256:f3771b23bb3675a06f5d885c3630b1d01ea6cac9e84a01aaf5508706dba546c5", + "sha256:f68ef3660677e6624c8cace943e4765545f8191313a07288a53d3da188bd8581", + "sha256:f86f368e1c7ce897bf2457b9eb61169a44e2ef797099fb5728482b8d69f3f016", + "sha256:f90515974b39f4dea2f27c0959688621b46d96d5a626cf9c53dbc653a895c05c", + "sha256:fe558371c1bdf3b8fa03e097c523fb9645b8730399c14fe7721ee9c9e2a545d3" + ], + "markers": "python_version >= '3.8'", + "version": "==7.4.1" + }, + "cssselect": { + "hashes": [ + "sha256:666b19839cfaddb9ce9d36bfe4c969132c647b92fc9088c4e23f786b30f1b3dc", + "sha256:da1885f0c10b60c03ed5eccbb6b68d6eff248d91976fcde348f395d54c9fd35e" + ], + "markers": "python_version >= '3.7'", + "version": "==1.2.0" + }, + "debugpy": { + "hashes": [ + "sha256:125b9a637e013f9faac0a3d6a82bd17c8b5d2c875fb6b7e2772c5aba6d082332", + "sha256:12af2c55b419521e33d5fb21bd022df0b5eb267c3e178f1d374a63a2a6bdccd0", + "sha256:3c6fb41c98ec51dd010d7ed650accfd07a87fe5e93eca9d5f584d0578f28f35f", + "sha256:46ab6780159eeabb43c1495d9c84cf85d62975e48b6ec21ee10c95767c0590aa", + "sha256:57161629133113c97b387382045649a2b985a348f0c9366e22217c87b68b73c6", + "sha256:5d9de202f5d42e62f932507ee8b21e30d49aae7e46d5b1dd5c908db1d7068637", + "sha256:60009b132c91951354f54363f8ebdf7457aeb150e84abba5ae251b8e9f29a8a6", + "sha256:61eab4a4c8b6125d41a34bad4e5fe3d2cc145caecd63c3fe953be4cc53e65bf8", + "sha256:7fb95ca78f7ac43393cd0e0f2b6deda438ec7c5e47fa5d38553340897d2fbdfb", + "sha256:8cd0197141eb9e8a4566794550cfdcdb8b3db0818bdf8c49a8e8f8053e56e38b", + "sha256:9c9b0ac1ce2a42888199df1a1906e45e6f3c9555497643a85e0bf2406e3ffbc4", + "sha256:a64093656c4c64dc6a438e11d59369875d200bd5abb8f9b26c1f5f723622e153", + "sha256:a8b7a2fd27cd9f3553ac112f356ad4ca93338feadd8910277aff71ab24d8775f", + "sha256:b05a6b503ed520ad58c8dc682749113d2fd9f41ffd45daec16e558ca884008cd", + "sha256:bdc5ef99d14b9c0fcb35351b4fbfc06ac0ee576aeab6b2511702e5a648a2e595", + "sha256:e3412f9faa9ade82aa64a50b602544efcba848c91384e9f93497a458767e6926", + "sha256:ef54404365fae8d45cf450d0544ee40cefbcb9cb85ea7afe89a963c27028261e", + "sha256:ef9ab7df0b9a42ed9c878afd3eaaff471fce3fa73df96022e1f5c9f8f8c87ada" + ], + "markers": "python_version >= '3.8'", + "version": "==1.8.0" + }, + "decorator": { + "hashes": [ + "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330", + "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186" + ], + "markers": "python_version >= '3.5'", + "version": "==5.1.1" + }, + "defusedxml": { + "hashes": [ + "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", + "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==0.7.1" + }, + "dill": { + "hashes": [ + "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca", + "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7" + ], + "version": "==0.3.8" + }, + "distlib": { + "hashes": [ + "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784", + "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64" + ], + "version": "==0.3.8" + }, + "exceptiongroup": { + "hashes": [ + "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14", + "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68" + ], + "markers": "python_version < '3.11'", + "version": "==1.2.0" + }, + "executing": { + "hashes": [ + "sha256:35afe2ce3affba8ee97f2d69927fa823b08b472b7b994e36a52a964b93d16147", + "sha256:eac49ca94516ccc753f9fb5ce82603156e590b27525a8bc32cce8ae302eb61bc" + ], + "markers": "python_version >= '3.5'", + "version": "==2.0.1" + }, + "fake-useragent": { + "hashes": [ + "sha256:5426e4015d8ccc5bb25f64d3dfcfd3915eba30ffebd31b86b60dc7a4c5d65528", + "sha256:9acce439ee2c6cf9c3772fa6c200f62dc8d56605063327a4d8c5d0e47f414b85" + ], + "version": "==1.4.0" + }, + "fastapi": { + "hashes": [ + "sha256:2c9bab24667293b501cad8dd388c05240c850b58ec5876ee3283c47d6e1e3a4d", + "sha256:f3817eac96fe4f65a2ebb4baa000f394e55f5fccdaf7f75250804bc58f354f73" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==0.109.2" + }, + "fastjsonschema": { + "hashes": [ + "sha256:3672b47bc94178c9f23dbb654bf47440155d4db9df5f7bc47643315f9c405cd0", + "sha256:e3126a94bdc4623d3de4485f8d468a12f02a67921315ddc87836d6e456dc789d" + ], + "version": "==2.19.1" + }, + "filelock": { + "hashes": [ + "sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e", + "sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c" + ], + "markers": "python_version >= '3.8'", + "version": "==3.13.1" + }, + "fqdn": { + "hashes": [ + "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f", + "sha256:3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014" + ], + "version": "==1.5.1" + }, + "ga4gh.vrs": { + "extras": [ + "extras" + ], + "hashes": [ + "sha256:60239da2e1c6dcac3022cc073fe82759dd787c140d589781da9150f85b859b08", + "sha256:95e49ca1ad2e3a7574f5b832ac64e1a75989eaa0b3f7d5d0163b0db8d9293177" + ], + "markers": "python_version >= '3.8'", + "version": "==2.0.0a2" + }, + "gene-normalizer": { + "hashes": [ + "sha256:4d4acad6c96dc7cf2015de0d758ab63022585a629db2436b54db19b89d4be983", + "sha256:87215320599cff7bc84a78041d857d626061792de7a049febe5e9482b0cae501" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==0.3.0.dev1" + }, + "h11": { + "hashes": [ + "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", + "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761" + ], + "markers": "python_version >= '3.7'", + "version": "==0.14.0" + }, + "hgvs": { + "hashes": [ + "sha256:06abb6363bb0c8ef9f3f8f9dc333d3a346ab5f9ebcb20a5bb56c69256262559f", + "sha256:598640bae0de34ff29c58440904fc9156d7a1bc750ddef5894edd415c772b957" + ], + "markers": "python_version >= '3.6'", + "version": "==1.5.4" + }, + "humanfriendly": { + "hashes": [ + "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", + "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==10.0" + }, + "identify": { + "hashes": [ + "sha256:161558f9fe4559e1557e1bff323e8631f6a0e4837f7497767c1782832f16b62d", + "sha256:d40ce5fcd762817627670da8a7d8d8e65f24342d14539c59488dc603bf662e34" + ], + "markers": "python_version >= '3.8'", + "version": "==2.5.33" + }, + "idna": { + "hashes": [ + "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca", + "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f" + ], + "markers": "python_version >= '3.5'", + "version": "==3.6" + }, + "importlib-metadata": { + "hashes": [ + "sha256:4805911c3a4ec7c3966410053e9ec6a1fecd629117df5adee56dfc9432a1081e", + "sha256:f238736bb06590ae52ac1fab06a3a9ef1d8dce2b7a35b5ab329371d6c8f5d2cc" + ], + "markers": "python_version >= '3.8'", + "version": "==7.0.1" + }, + "importlib-resources": { + "hashes": [ + "sha256:3893a00122eafde6894c59914446a512f728a0c1a45f9bb9b63721b6bacf0b4a", + "sha256:e8bf90d8213b486f428c9c39714b920041cb02c184686a3dee24905aaa8105d6" + ], + "markers": "python_version < '3.10'", + "version": "==6.1.1" + }, + "iniconfig": { + "hashes": [ + "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", + "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374" + ], + "markers": "python_version >= '3.7'", + "version": "==2.0.0" + }, + "ipykernel": { + "hashes": [ + "sha256:076663ca68492576f051e4af7720d33f34383e655f2be0d544c8b1c9de915b2f", + "sha256:b5dd3013cab7b330df712891c96cd1ab868c27a7159e606f762015e9bf8ceb3f" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==6.29.0" + }, + "ipython": { + "hashes": [ + "sha256:ca6f079bb33457c66e233e4580ebfc4128855b4cf6370dddd73842a9563e8a27", + "sha256:e8267419d72d81955ec1177f8a29aaa90ac80ad647499201119e2f05e99aa397" + ], + "markers": "python_version >= '3.9'", + "version": "==8.18.1" + }, + "ipywidgets": { + "hashes": [ + "sha256:2b88d728656aea3bbfd05d32c747cfd0078f9d7e159cf982433b58ad717eed7f", + "sha256:40211efb556adec6fa450ccc2a77d59ca44a060f4f9f136833df59c9f538e6e8" + ], + "markers": "python_version >= '3.7'", + "version": "==8.1.1" + }, + "isoduration": { + "hashes": [ + "sha256:ac2f9015137935279eac671f94f89eb00584f940f5dc49462a0c4ee692ba1bd9", + "sha256:b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042" + ], + "version": "==20.11.0" + }, + "jedi": { + "hashes": [ + "sha256:cf0496f3651bc65d7174ac1b7d043eff454892c708a87d1b683e57b569927ffd", + "sha256:e983c654fe5c02867aef4cdfce5a2fbb4a50adc0af145f70504238f18ef5e7e0" + ], + "markers": "python_version >= '3.6'", + "version": "==0.19.1" + }, + "jinja2": { + "hashes": [ + "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa", + "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90" + ], + "markers": "python_version >= '3.7'", + "version": "==3.1.3" + }, + "jmespath": { + "hashes": [ + "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", + "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe" + ], + "markers": "python_version >= '3.7'", + "version": "==1.0.1" + }, + "json5": { + "hashes": [ + "sha256:740c7f1b9e584a468dbb2939d8d458db3427f2c93ae2139d05f47e453eae964f", + "sha256:9ed66c3a6ca3510a976a9ef9b8c0787de24802724ab1860bc0153c7fdd589b02" + ], + "version": "==0.9.14" + }, + "jsonpointer": { + "hashes": [ + "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a", + "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88" + ], + "version": "==2.4" + }, + "jsonschema": { + "extras": [ + "format-nongpl" + ], + "hashes": [ + "sha256:7996507afae316306f9e2290407761157c6f78002dcf7419acb99822143d1c6f", + "sha256:85727c00279f5fa6bedbe6238d2aa6403bedd8b4864ab11207d07df3cc1b2ee5" + ], + "markers": "python_version >= '3.8'", + "version": "==4.21.1" + }, + "jsonschema-specifications": { + "hashes": [ + "sha256:48a76787b3e70f5ed53f1160d2b81f586e4ca6d1548c5de7085d1682674764cc", + "sha256:87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c" + ], + "markers": "python_version >= '3.8'", + "version": "==2023.12.1" + }, + "jupyter": { + "hashes": [ + "sha256:3e1f86076bbb7c8c207829390305a2b1fe836d471ed54be66a3b8c41e7f46cc7", + "sha256:5b290f93b98ffbc21c0c7e749f054b3267782166d72fa5e3ed1ed4eaf34a2b78", + "sha256:d9dc4b3318f310e34c82951ea5d6683f67bed7def4b259fafbfe4f1beb1d8e5f" + ], + "index": "pypi", + "version": "==1.0.0" + }, + "jupyter-client": { + "hashes": [ + "sha256:0642244bb83b4764ae60d07e010e15f0e2d275ec4e918a8f7b80fbbef3ca60c7", + "sha256:909c474dbe62582ae62b758bca86d6518c85234bdee2d908c778db6d72f39d99" + ], + "markers": "python_version >= '3.8'", + "version": "==8.6.0" + }, + "jupyter-console": { + "hashes": [ + "sha256:309d33409fcc92ffdad25f0bcdf9a4a9daa61b6f341177570fdac03de5352485", + "sha256:566a4bf31c87adbfadf22cdf846e3069b59a71ed5da71d6ba4d8aaad14a53539" + ], + "markers": "python_version >= '3.7'", + "version": "==6.6.3" + }, + "jupyter-core": { + "hashes": [ + "sha256:c65c82126453a723a2804aa52409930434598fd9d35091d63dfb919d2b765bb7", + "sha256:de61a9d7fc71240f688b2fb5ab659fbb56979458dc66a71decd098e03c79e218" + ], + "markers": "python_version >= '3.8'", + "version": "==5.7.1" + }, + "jupyter-events": { + "hashes": [ + "sha256:81ad2e4bc710881ec274d31c6c50669d71bbaa5dd9d01e600b56faa85700d399", + "sha256:d853b3c10273ff9bc8bb8b30076d65e2c9685579db736873de6c2232dde148bf" + ], + "markers": "python_version >= '3.8'", + "version": "==0.9.0" + }, + "jupyter-lsp": { + "hashes": [ + "sha256:256d24620542ae4bba04a50fc1f6ffe208093a07d8e697fea0a8d1b8ca1b7e5b", + "sha256:3b95229e4168355a8c91928057c1621ac3510ba98b2a925e82ebd77f078b1aa5" + ], + "markers": "python_version >= '3.8'", + "version": "==2.2.2" + }, + "jupyter-server": { + "hashes": [ + "sha256:0edb626c94baa22809be1323f9770cf1c00a952b17097592e40d03e6a3951689", + "sha256:184a0f82809a8522777cfb6b760ab6f4b1bb398664c5860a27cec696cb884923" + ], + "markers": "python_version >= '3.8'", + "version": "==2.12.5" + }, + "jupyter-server-terminals": { + "hashes": [ + "sha256:1b80c12765da979513c42c90215481bbc39bd8ae7c0350b4f85bc3eb58d0fa80", + "sha256:396b5ccc0881e550bf0ee7012c6ef1b53edbde69e67cab1d56e89711b46052e8" + ], + "markers": "python_version >= '3.8'", + "version": "==0.5.2" + }, + "jupyterlab": { + "hashes": [ + "sha256:53f132480e5f6564f4e20d1b5ed4e8b7945952a2decd5bdfa43760b1b536c99d", + "sha256:965d92efa82a538ed70ccb3968d9aabba788840da882e13d7b061780cdedc3b7" + ], + "markers": "python_version >= '3.8'", + "version": "==4.0.12" + }, + "jupyterlab-pygments": { + "hashes": [ + "sha256:721aca4d9029252b11cfa9d185e5b5af4d54772bb8072f9b7036f4170054d35d", + "sha256:841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780" + ], + "markers": "python_version >= '3.8'", + "version": "==0.3.0" + }, + "jupyterlab-server": { + "hashes": [ + "sha256:5b1798c9cc6a44f65c757de9f97fc06fc3d42535afbf47d2ace5e964ab447aaf", + "sha256:bd0ec7a99ebcedc8bcff939ef86e52c378e44c2707e053fcd81d046ce979ee63" + ], + "markers": "python_version >= '3.8'", + "version": "==2.25.2" + }, + "jupyterlab-widgets": { + "hashes": [ + "sha256:3cf5bdf5b897bf3bccf1c11873aa4afd776d7430200f765e0686bd352487b58d", + "sha256:6005a4e974c7beee84060fdfba341a3218495046de8ae3ec64888e5fe19fdb4c" + ], + "markers": "python_version >= '3.7'", + "version": "==3.0.9" + }, + "lxml": { + "hashes": [ + "sha256:13521a321a25c641b9ea127ef478b580b5ec82aa2e9fc076c86169d161798b01", + "sha256:14deca1460b4b0f6b01f1ddc9557704e8b365f55c63070463f6c18619ebf964f", + "sha256:16018f7099245157564d7148165132c70adb272fb5a17c048ba70d9cc542a1a1", + "sha256:16dd953fb719f0ffc5bc067428fc9e88f599e15723a85618c45847c96f11f431", + "sha256:19a1bc898ae9f06bccb7c3e1dfd73897ecbbd2c96afe9095a6026016e5ca97b8", + "sha256:1ad17c20e3666c035db502c78b86e58ff6b5991906e55bdbef94977700c72623", + "sha256:22b7ee4c35f374e2c20337a95502057964d7e35b996b1c667b5c65c567d2252a", + "sha256:24ef5a4631c0b6cceaf2dbca21687e29725b7c4e171f33a8f8ce23c12558ded1", + "sha256:25663d6e99659544ee8fe1b89b1a8c0aaa5e34b103fab124b17fa958c4a324a6", + "sha256:262bc5f512a66b527d026518507e78c2f9c2bd9eb5c8aeeb9f0eb43fcb69dc67", + "sha256:280f3edf15c2a967d923bcfb1f8f15337ad36f93525828b40a0f9d6c2ad24890", + "sha256:2ad3a8ce9e8a767131061a22cd28fdffa3cd2dc193f399ff7b81777f3520e372", + "sha256:2befa20a13f1a75c751f47e00929fb3433d67eb9923c2c0b364de449121f447c", + "sha256:2f37c6d7106a9d6f0708d4e164b707037b7380fcd0b04c5bd9cae1fb46a856fb", + "sha256:304128394c9c22b6569eba2a6d98392b56fbdfbad58f83ea702530be80d0f9df", + "sha256:342e95bddec3a698ac24378d61996b3ee5ba9acfeb253986002ac53c9a5f6f84", + "sha256:3aeca824b38ca78d9ee2ab82bd9883083d0492d9d17df065ba3b94e88e4d7ee6", + "sha256:3d184e0d5c918cff04cdde9dbdf9600e960161d773666958c9d7b565ccc60c45", + "sha256:3e3898ae2b58eeafedfe99e542a17859017d72d7f6a63de0f04f99c2cb125936", + "sha256:3eea6ed6e6c918e468e693c41ef07f3c3acc310b70ddd9cc72d9ef84bc9564ca", + "sha256:3f14a4fb1c1c402a22e6a341a24c1341b4a3def81b41cd354386dcb795f83897", + "sha256:436a943c2900bb98123b06437cdd30580a61340fbdb7b28aaf345a459c19046a", + "sha256:4946e7f59b7b6a9e27bef34422f645e9a368cb2be11bf1ef3cafc39a1f6ba68d", + "sha256:49a9b4af45e8b925e1cd6f3b15bbba2c81e7dba6dce170c677c9cda547411e14", + "sha256:4f8b0c78e7aac24979ef09b7f50da871c2de2def043d468c4b41f512d831e912", + "sha256:52427a7eadc98f9e62cb1368a5079ae826f94f05755d2d567d93ee1bc3ceb354", + "sha256:5e53d7e6a98b64fe54775d23a7c669763451340c3d44ad5e3a3b48a1efbdc96f", + "sha256:5fcfbebdb0c5d8d18b84118842f31965d59ee3e66996ac842e21f957eb76138c", + "sha256:601f4a75797d7a770daed8b42b97cd1bb1ba18bd51a9382077a6a247a12aa38d", + "sha256:61c5a7edbd7c695e54fca029ceb351fc45cd8860119a0f83e48be44e1c464862", + "sha256:6a2a2c724d97c1eb8cf966b16ca2915566a4904b9aad2ed9a09c748ffe14f969", + "sha256:6d48fc57e7c1e3df57be5ae8614bab6d4e7b60f65c5457915c26892c41afc59e", + "sha256:6f11b77ec0979f7e4dc5ae081325a2946f1fe424148d3945f943ceaede98adb8", + "sha256:704f5572ff473a5f897745abebc6df40f22d4133c1e0a1f124e4f2bd3330ff7e", + "sha256:725e171e0b99a66ec8605ac77fa12239dbe061482ac854d25720e2294652eeaa", + "sha256:7cfced4a069003d8913408e10ca8ed092c49a7f6cefee9bb74b6b3e860683b45", + "sha256:7ec465e6549ed97e9f1e5ed51c657c9ede767bc1c11552f7f4d022c4df4a977a", + "sha256:82bddf0e72cb2af3cbba7cec1d2fd11fda0de6be8f4492223d4a268713ef2147", + "sha256:82cd34f1081ae4ea2ede3d52f71b7be313756e99b4b5f829f89b12da552d3aa3", + "sha256:843b9c835580d52828d8f69ea4302537337a21e6b4f1ec711a52241ba4a824f3", + "sha256:877efb968c3d7eb2dad540b6cabf2f1d3c0fbf4b2d309a3c141f79c7e0061324", + "sha256:8b9f19df998761babaa7f09e6bc169294eefafd6149aaa272081cbddc7ba4ca3", + "sha256:8cf5877f7ed384dabfdcc37922c3191bf27e55b498fecece9fd5c2c7aaa34c33", + "sha256:8d2900b7f5318bc7ad8631d3d40190b95ef2aa8cc59473b73b294e4a55e9f30f", + "sha256:8d7b4beebb178e9183138f552238f7e6613162a42164233e2bda00cb3afac58f", + "sha256:8f52fe6859b9db71ee609b0c0a70fea5f1e71c3462ecf144ca800d3f434f0764", + "sha256:98f3f020a2b736566c707c8e034945c02aa94e124c24f77ca097c446f81b01f1", + "sha256:9aa543980ab1fbf1720969af1d99095a548ea42e00361e727c58a40832439114", + "sha256:9b99f564659cfa704a2dd82d0684207b1aadf7d02d33e54845f9fc78e06b7581", + "sha256:9bcf86dfc8ff3e992fed847c077bd875d9e0ba2fa25d859c3a0f0f76f07f0c8d", + "sha256:9bd0ae7cc2b85320abd5e0abad5ccee5564ed5f0cc90245d2f9a8ef330a8deae", + "sha256:9d3c0f8567ffe7502d969c2c1b809892dc793b5d0665f602aad19895f8d508da", + "sha256:9e5ac3437746189a9b4121db2a7b86056ac8786b12e88838696899328fc44bb2", + "sha256:a36c506e5f8aeb40680491d39ed94670487ce6614b9d27cabe45d94cd5d63e1e", + "sha256:a5ab722ae5a873d8dcee1f5f45ddd93c34210aed44ff2dc643b5025981908cda", + "sha256:a96f02ba1bcd330807fc060ed91d1f7a20853da6dd449e5da4b09bfcc08fdcf5", + "sha256:acb6b2f96f60f70e7f34efe0c3ea34ca63f19ca63ce90019c6cbca6b676e81fa", + "sha256:ae15347a88cf8af0949a9872b57a320d2605ae069bcdf047677318bc0bba45b1", + "sha256:af8920ce4a55ff41167ddbc20077f5698c2e710ad3353d32a07d3264f3a2021e", + "sha256:afd825e30f8d1f521713a5669b63657bcfe5980a916c95855060048b88e1adb7", + "sha256:b21b4031b53d25b0858d4e124f2f9131ffc1530431c6d1321805c90da78388d1", + "sha256:b4b68c961b5cc402cbd99cca5eb2547e46ce77260eb705f4d117fd9c3f932b95", + "sha256:b66aa6357b265670bb574f050ffceefb98549c721cf28351b748be1ef9577d93", + "sha256:b9e240ae0ba96477682aa87899d94ddec1cc7926f9df29b1dd57b39e797d5ab5", + "sha256:bc64d1b1dab08f679fb89c368f4c05693f58a9faf744c4d390d7ed1d8223869b", + "sha256:bf8443781533b8d37b295016a4b53c1494fa9a03573c09ca5104550c138d5c05", + "sha256:c26aab6ea9c54d3bed716b8851c8bfc40cb249b8e9880e250d1eddde9f709bf5", + "sha256:c3cd1fc1dc7c376c54440aeaaa0dcc803d2126732ff5c6b68ccd619f2e64be4f", + "sha256:c7257171bb8d4432fe9d6fdde4d55fdbe663a63636a17f7f9aaba9bcb3153ad7", + "sha256:d42e3a3fc18acc88b838efded0e6ec3edf3e328a58c68fbd36a7263a874906c8", + "sha256:d74fcaf87132ffc0447b3c685a9f862ffb5b43e70ea6beec2fb8057d5d2a1fea", + "sha256:d8c1d679df4361408b628f42b26a5d62bd3e9ba7f0c0e7969f925021554755aa", + "sha256:e856c1c7255c739434489ec9c8aa9cdf5179785d10ff20add308b5d673bed5cd", + "sha256:eac68f96539b32fce2c9b47eb7c25bb2582bdaf1bbb360d25f564ee9e04c542b", + "sha256:ed7326563024b6e91fef6b6c7a1a2ff0a71b97793ac33dbbcf38f6005e51ff6e", + "sha256:ed8c3d2cd329bf779b7ed38db176738f3f8be637bb395ce9629fc76f78afe3d4", + "sha256:f4c9bda132ad108b387c33fabfea47866af87f4ea6ffb79418004f0521e63204", + "sha256:f643ffd2669ffd4b5a3e9b41c909b72b2a1d5e4915da90a77e119b8d48ce867a" + ], + "markers": "python_version >= '3.6'", + "version": "==5.1.0" + }, + "markupsafe": { + "hashes": [ + "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf", + "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff", + "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f", + "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3", + "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532", + "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f", + "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617", + "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df", + "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4", + "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906", + "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f", + "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4", + "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8", + "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371", + "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2", + "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465", + "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52", + "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6", + "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169", + "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad", + "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2", + "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0", + "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029", + "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f", + "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a", + "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced", + "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5", + "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c", + "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf", + "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9", + "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb", + "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad", + "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3", + "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1", + "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46", + "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc", + "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a", + "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee", + "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900", + "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5", + "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea", + "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f", + "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5", + "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e", + "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a", + "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f", + "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50", + "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a", + "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b", + "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4", + "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff", + "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2", + "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46", + "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b", + "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf", + "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5", + "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5", + "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab", + "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd", + "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68" + ], + "markers": "python_version >= '3.7'", + "version": "==2.1.5" + }, + "matplotlib-inline": { + "hashes": [ + "sha256:f1f41aab5328aa5aaea9b16d083b128102f8712542f819fe7e6a420ff581b311", + "sha256:f887e5f10ba98e8d2b150ddcf4702c1e5f8b3a20005eb0f74bfdbd360ee6f304" + ], + "markers": "python_version >= '3.5'", + "version": "==0.1.6" + }, + "mistune": { + "hashes": [ + "sha256:71481854c30fdbc938963d3605b72501f5c10a9320ecd412c121c163a1c7d205", + "sha256:fc7f93ded930c92394ef2cb6f04a8aabab4117a91449e72dcc8dfa646a508be8" + ], + "markers": "python_version >= '3.7'", + "version": "==3.0.2" + }, + "mypy-extensions": { + "hashes": [ + "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d", + "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782" + ], + "markers": "python_version >= '3.5'", + "version": "==1.0.0" + }, + "nbclient": { + "hashes": [ + "sha256:4b28c207877cf33ef3a9838cdc7a54c5ceff981194a82eac59d558f05487295e", + "sha256:a3a1ddfb34d4a9d17fc744d655962714a866639acd30130e9be84191cd97cd15" + ], + "markers": "python_full_version >= '3.8.0'", + "version": "==0.9.0" + }, + "nbconvert": { + "hashes": [ + "sha256:a7f8808fd4e082431673ac538400218dd45efd076fbeb07cc6e5aa5a3a4e949e", + "sha256:db28590cef90f7faf2ebbc71acd402cbecf13d29176df728c0a9025a49345ea1" + ], + "markers": "python_version >= '3.8'", + "version": "==7.14.2" + }, + "nbformat": { + "hashes": [ + "sha256:1c5172d786a41b82bcfd0c23f9e6b6f072e8fb49c39250219e4acfff1efe89e9", + "sha256:5f98b5ba1997dff175e77e0c17d5c10a96eaed2cbd1de3533d1fc35d5e111192" + ], + "markers": "python_version >= '3.8'", + "version": "==5.9.2" + }, + "nest-asyncio": { + "hashes": [ + "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe", + "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c" + ], + "markers": "python_version >= '3.5'", + "version": "==1.6.0" + }, + "nodeenv": { + "hashes": [ + "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2", + "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'", + "version": "==1.8.0" + }, + "notebook": { + "hashes": [ + "sha256:289b606d7e173f75a18beb1406ef411b43f97f7a9c55ba03efa3622905a62346", + "sha256:3bcff00c17b3ac142ef5f436d50637d936b274cfa0b41f6ac0175363de9b4e09" + ], + "markers": "python_version >= '3.8'", + "version": "==7.0.7" + }, + "notebook-shim": { + "hashes": [ + "sha256:a83496a43341c1674b093bfcebf0fe8e74cbe7eda5fd2bbc56f8e39e1486c0c7", + "sha256:f69388ac283ae008cd506dda10d0288b09a017d822d5e8c7129a152cbd3ce7e9" + ], + "markers": "python_version >= '3.7'", + "version": "==0.2.3" + }, + "numpy": { + "hashes": [ + "sha256:02f98011ba4ab17f46f80f7f8f1c291ee7d855fcef0a5a98db80767a468c85cd", + "sha256:0b7e807d6888da0db6e7e75838444d62495e2b588b99e90dd80c3459594e857b", + "sha256:12c70ac274b32bc00c7f61b515126c9205323703abb99cd41836e8125ea0043e", + "sha256:1666f634cb3c80ccbd77ec97bc17337718f56d6658acf5d3b906ca03e90ce87f", + "sha256:18c3319a7d39b2c6a9e3bb75aab2304ab79a811ac0168a671a62e6346c29b03f", + "sha256:211ddd1e94817ed2d175b60b6374120244a4dd2287f4ece45d49228b4d529178", + "sha256:21a9484e75ad018974a2fdaa216524d64ed4212e418e0a551a2d83403b0531d3", + "sha256:39763aee6dfdd4878032361b30b2b12593fb445ddb66bbac802e2113eb8a6ac4", + "sha256:3c67423b3703f8fbd90f5adaa37f85b5794d3366948efe9a5190a5f3a83fc34e", + "sha256:46f47ee566d98849323f01b349d58f2557f02167ee301e5e28809a8c0e27a2d0", + "sha256:51c7f1b344f302067b02e0f5b5d2daa9ed4a721cf49f070280ac202738ea7f00", + "sha256:5f24750ef94d56ce6e33e4019a8a4d68cfdb1ef661a52cdaee628a56d2437419", + "sha256:697df43e2b6310ecc9d95f05d5ef20eacc09c7c4ecc9da3f235d39e71b7da1e4", + "sha256:6d45b3ec2faed4baca41c76617fcdcfa4f684ff7a151ce6fc78ad3b6e85af0a6", + "sha256:77810ef29e0fb1d289d225cabb9ee6cf4d11978a00bb99f7f8ec2132a84e0166", + "sha256:7ca4f24341df071877849eb2034948459ce3a07915c2734f1abb4018d9c49d7b", + "sha256:7f784e13e598e9594750b2ef6729bcd5a47f6cfe4a12cca13def35e06d8163e3", + "sha256:806dd64230dbbfaca8a27faa64e2f414bf1c6622ab78cc4264f7f5f028fee3bf", + "sha256:867e3644e208c8922a3be26fc6bbf112a035f50f0a86497f98f228c50c607bb2", + "sha256:8c66d6fec467e8c0f975818c1796d25c53521124b7cfb760114be0abad53a0a2", + "sha256:8ed07a90f5450d99dad60d3799f9c03c6566709bd53b497eb9ccad9a55867f36", + "sha256:9bc6d1a7f8cedd519c4b7b1156d98e051b726bf160715b769106661d567b3f03", + "sha256:9e1591f6ae98bcfac2a4bbf9221c0b92ab49762228f38287f6eeb5f3f55905ce", + "sha256:9e87562b91f68dd8b1c39149d0323b42e0082db7ddb8e934ab4c292094d575d6", + "sha256:a7081fd19a6d573e1a05e600c82a1c421011db7935ed0d5c483e9dd96b99cf13", + "sha256:a8474703bffc65ca15853d5fd4d06b18138ae90c17c8d12169968e998e448bb5", + "sha256:af36e0aa45e25c9f57bf684b1175e59ea05d9a7d3e8e87b7ae1a1da246f2767e", + "sha256:b1240f767f69d7c4c8a29adde2310b871153df9b26b5cb2b54a561ac85146485", + "sha256:b4d362e17bcb0011738c2d83e0a65ea8ce627057b2fdda37678f4374a382a137", + "sha256:b831295e5472954104ecb46cd98c08b98b49c69fdb7040483aff799a755a7374", + "sha256:b8c275f0ae90069496068c714387b4a0eba5d531aace269559ff2b43655edd58", + "sha256:bdd2b45bf079d9ad90377048e2747a0c82351989a2165821f0c96831b4a2a54b", + "sha256:cc0743f0302b94f397a4a65a660d4cd24267439eb16493fb3caad2e4389bccbb", + "sha256:da4b0c6c699a0ad73c810736303f7fbae483bcb012e38d7eb06a5e3b432c981b", + "sha256:f25e2811a9c932e43943a2615e65fc487a0b6b49218899e62e426e7f0a57eeda", + "sha256:f73497e8c38295aaa4741bdfa4fda1a5aedda5473074369eca10626835445511" + ], + "markers": "python_version >= '3.9'", + "version": "==1.26.3" + }, + "overrides": { + "hashes": [ + "sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a", + "sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49" + ], + "markers": "python_version >= '3.6'", + "version": "==7.7.0" + }, + "packaging": { + "hashes": [ + "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5", + "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7" + ], + "markers": "python_version >= '3.7'", + "version": "==23.2" + }, + "pandocfilters": { + "hashes": [ + "sha256:002b4a555ee4ebc03f8b66307e287fa492e4a77b4ea14d3f934328297bb4939e", + "sha256:93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.5.1" + }, + "parse": { + "hashes": [ + "sha256:09002ca350ad42e76629995f71f7b518670bcf93548bdde3684fd55d2be51975", + "sha256:76ddd5214255ae711db4c512be636151fbabaa948c6f30115aecc440422ca82c" + ], + "version": "==1.20.1" + }, + "parsley": { + "hashes": [ + "sha256:9444278d47161d5f2be76a767809a3cbe6db4db822f46a4fd7481d4057208d41", + "sha256:c3bc417b8c7e3a96c87c0f2f751bfd784ed5156ffccebe2f84330df5685f8dc3" + ], + "version": "==1.3" + }, + "parso": { + "hashes": [ + "sha256:8c07be290bb59f03588915921e29e8a50002acaf2cdc5fa0e0114f91709fafa0", + "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75" + ], + "markers": "python_version >= '3.6'", + "version": "==0.8.3" + }, + "pathspec": { + "hashes": [ + "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", + "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712" + ], + "markers": "python_version >= '3.8'", + "version": "==0.12.1" + }, + "pexpect": { + "hashes": [ + "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", + "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f" + ], + "markers": "sys_platform != 'win32'", + "version": "==4.9.0" + }, + "platformdirs": { + "hashes": [ + "sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068", + "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768" + ], + "markers": "python_version >= '3.8'", + "version": "==4.2.0" + }, + "pluggy": { + "hashes": [ + "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981", + "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be" + ], + "markers": "python_version >= '3.8'", + "version": "==1.4.0" + }, + "polars": { + "hashes": [ + "sha256:359d556fafcb533bb0caa34ddfbd5161ee23b8a43817c7a2b80189720a1f42f6", + "sha256:3cfc71d4569818548c9bf4285c497d62a3a542363c86940593a41dd731e69d7f", + "sha256:ddff2fa419f15aa64ee23a94655fcb24b3e1b5c3eb30d124e3315deca2039a92", + "sha256:ec742fdf41e16ff699c043259ba94a11bbc2f7dcb978d768495db1ff2b3c5c20", + "sha256:f0928576a52eca47e14a8b98f4da22025b4b2fa32549f80f4d92c5187fd3f461", + "sha256:fd6100df0ca53614c3fa7136251e030fb70dee8833023edf7a3ac380f8e2dce5" + ], + "markers": "python_version >= '3.8'", + "version": "==0.20.7" + }, + "pre-commit": { + "hashes": [ + "sha256:c255039ef399049a5544b6ce13d135caba8f2c28c3b4033277a788f434308376", + "sha256:d30bad9abf165f7785c15a21a1f46da7d0677cb00ee7ff4c579fd38922efe15d" + ], + "index": "pypi", + "markers": "python_version >= '3.9'", + "version": "==3.6.0" + }, + "prometheus-client": { + "hashes": [ + "sha256:4585b0d1223148c27a225b10dbec5ae9bc4c81a99a3fa80774fa6209935324e1", + "sha256:c88b1e6ecf6b41cd8fb5731c7ae919bf66df6ec6fafa555cd6c0e16ca169ae92" + ], + "markers": "python_version >= '3.8'", + "version": "==0.19.0" + }, + "prompt-toolkit": { + "hashes": [ + "sha256:3527b7af26106cbc65a040bcc84839a3566ec1b051bb0bfe953631e704b0ff7d", + "sha256:a11a29cb3bf0a28a387fe5122cdb649816a957cd9261dcedf8c9f1fef33eacf6" + ], + "markers": "python_full_version >= '3.7.0'", + "version": "==3.0.43" + }, + "psutil": { + "hashes": [ + "sha256:02615ed8c5ea222323408ceba16c60e99c3f91639b07da6373fb7e6539abc56d", + "sha256:05806de88103b25903dff19bb6692bd2e714ccf9e668d050d144012055cbca73", + "sha256:26bd09967ae00920df88e0352a91cff1a78f8d69b3ecabbfe733610c0af486c8", + "sha256:27cc40c3493bb10de1be4b3f07cae4c010ce715290a5be22b98493509c6299e2", + "sha256:36f435891adb138ed3c9e58c6af3e2e6ca9ac2f365efe1f9cfef2794e6c93b4e", + "sha256:50187900d73c1381ba1454cf40308c2bf6f34268518b3f36a9b663ca87e65e36", + "sha256:611052c4bc70432ec770d5d54f64206aa7203a101ec273a0cd82418c86503bb7", + "sha256:6be126e3225486dff286a8fb9a06246a5253f4c7c53b475ea5f5ac934e64194c", + "sha256:7d79560ad97af658a0f6adfef8b834b53f64746d45b403f225b85c5c2c140eee", + "sha256:8cb6403ce6d8e047495a701dc7c5bd788add903f8986d523e3e20b98b733e421", + "sha256:8db4c1b57507eef143a15a6884ca10f7c73876cdf5d51e713151c1236a0e68cf", + "sha256:aee678c8720623dc456fa20659af736241f575d79429a0e5e9cf88ae0605cc81", + "sha256:bc56c2a1b0d15aa3eaa5a60c9f3f8e3e565303b465dbf57a1b730e7a2b9844e0", + "sha256:bd1184ceb3f87651a67b2708d4c3338e9b10c5df903f2e3776b62303b26cb631", + "sha256:d06016f7f8625a1825ba3732081d77c94589dca78b7a3fc072194851e88461a4", + "sha256:d16bbddf0693323b8c6123dd804100241da461e41d6e332fb0ba6058f630f8c8" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==5.9.8" + }, + "psycopg2": { + "hashes": [ + "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981", + "sha256:38a8dcc6856f569068b47de286b472b7c473ac7977243593a288ebce0dc89516", + "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3", + "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa", + "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a", + "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693", + "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372", + "sha256:bac58c024c9922c23550af2a581998624d6e02350f4ae9c5f0bc642c633a2d5e", + "sha256:c92811b2d4c9b6ea0285942b2e7cac98a59e166d59c588fe5cfe1eda58e72d59", + "sha256:d1454bde93fb1e224166811694d600e746430c006fbb031ea06ecc2ea41bf156", + "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024", + "sha256:de80739447af31525feddeb8effd640782cf5998e1a4e9192ebdf829717e3913", + "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c" + ], + "markers": "python_version >= '3.7'", + "version": "==2.9.9" + }, + "psycopg2-binary": { + "hashes": [ + "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9", + "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77", + "sha256:0c009475ee389757e6e34611d75f6e4f05f0cf5ebb76c6037508318e1a1e0d7e", + "sha256:0ef4854e82c09e84cc63084a9e4ccd6d9b154f1dbdd283efb92ecd0b5e2b8c84", + "sha256:1236ed0952fbd919c100bc839eaa4a39ebc397ed1c08a97fc45fee2a595aa1b3", + "sha256:143072318f793f53819048fdfe30c321890af0c3ec7cb1dfc9cc87aa88241de2", + "sha256:15208be1c50b99203fe88d15695f22a5bed95ab3f84354c494bcb1d08557df67", + "sha256:1873aade94b74715be2246321c8650cabf5a0d098a95bab81145ffffa4c13876", + "sha256:18d0ef97766055fec15b5de2c06dd8e7654705ce3e5e5eed3b6651a1d2a9a152", + "sha256:1ea665f8ce695bcc37a90ee52de7a7980be5161375d42a0b6c6abedbf0d81f0f", + "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a", + "sha256:246b123cc54bb5361588acc54218c8c9fb73068bf227a4a531d8ed56fa3ca7d6", + "sha256:275ff571376626195ab95a746e6a04c7df8ea34638b99fc11160de91f2fef503", + "sha256:281309265596e388ef483250db3640e5f414168c5a67e9c665cafce9492eda2f", + "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493", + "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996", + "sha256:30dcc86377618a4c8f3b72418df92e77be4254d8f89f14b8e8f57d6d43603c0f", + "sha256:31a34c508c003a4347d389a9e6fcc2307cc2150eb516462a7a17512130de109e", + "sha256:323ba25b92454adb36fa425dc5cf6f8f19f78948cbad2e7bc6cdf7b0d7982e59", + "sha256:34eccd14566f8fe14b2b95bb13b11572f7c7d5c36da61caf414d23b91fcc5d94", + "sha256:3a58c98a7e9c021f357348867f537017057c2ed7f77337fd914d0bedb35dace7", + "sha256:3f78fd71c4f43a13d342be74ebbc0666fe1f555b8837eb113cb7416856c79682", + "sha256:4154ad09dac630a0f13f37b583eae260c6aa885d67dfbccb5b02c33f31a6d420", + "sha256:420f9bbf47a02616e8554e825208cb947969451978dceb77f95ad09c37791dae", + "sha256:4686818798f9194d03c9129a4d9a702d9e113a89cb03bffe08c6cf799e053291", + "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe", + "sha256:60989127da422b74a04345096c10d416c2b41bd7bf2a380eb541059e4e999980", + "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93", + "sha256:68fc1f1ba168724771e38bee37d940d2865cb0f562380a1fb1ffb428b75cb692", + "sha256:6e6f98446430fdf41bd36d4faa6cb409f5140c1c2cf58ce0bbdaf16af7d3f119", + "sha256:729177eaf0aefca0994ce4cffe96ad3c75e377c7b6f4efa59ebf003b6d398716", + "sha256:72dffbd8b4194858d0941062a9766f8297e8868e1dd07a7b36212aaa90f49472", + "sha256:75723c3c0fbbf34350b46a3199eb50638ab22a0228f93fb472ef4d9becc2382b", + "sha256:77853062a2c45be16fd6b8d6de2a99278ee1d985a7bd8b103e97e41c034006d2", + "sha256:78151aa3ec21dccd5cdef6c74c3e73386dcdfaf19bced944169697d7ac7482fc", + "sha256:7f01846810177d829c7692f1f5ada8096762d9172af1b1a28d4ab5b77c923c1c", + "sha256:804d99b24ad523a1fe18cc707bf741670332f7c7412e9d49cb5eab67e886b9b5", + "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab", + "sha256:8359bf4791968c5a78c56103702000105501adb557f3cf772b2c207284273984", + "sha256:83791a65b51ad6ee6cf0845634859d69a038ea9b03d7b26e703f94c7e93dbcf9", + "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf", + "sha256:876801744b0dee379e4e3c38b76fc89f88834bb15bf92ee07d94acd06ec890a0", + "sha256:8dbf6d1bc73f1d04ec1734bae3b4fb0ee3cb2a493d35ede9badbeb901fb40f6f", + "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212", + "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb", + "sha256:977646e05232579d2e7b9c59e21dbe5261f403a88417f6a6512e70d3f8a046be", + "sha256:9dba73be7305b399924709b91682299794887cbbd88e38226ed9f6712eabee90", + "sha256:a148c5d507bb9b4f2030a2025c545fccb0e1ef317393eaba42e7eabd28eb6041", + "sha256:a6cdcc3ede532f4a4b96000b6362099591ab4a3e913d70bcbac2b56c872446f7", + "sha256:ac05fb791acf5e1a3e39402641827780fe44d27e72567a000412c648a85ba860", + "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d", + "sha256:b58b4710c7f4161b5e9dcbe73bb7c62d65670a87df7bcce9e1faaad43e715245", + "sha256:b6356793b84728d9d50ead16ab43c187673831e9d4019013f1402c41b1db9b27", + "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417", + "sha256:bc7bb56d04601d443f24094e9e31ae6deec9ccb23581f75343feebaf30423359", + "sha256:c2470da5418b76232f02a2fcd2229537bb2d5a7096674ce61859c3229f2eb202", + "sha256:c332c8d69fb64979ebf76613c66b985414927a40f8defa16cf1bc028b7b0a7b0", + "sha256:c6af2a6d4b7ee9615cbb162b0738f6e1fd1f5c3eda7e5da17861eacf4c717ea7", + "sha256:c77e3d1862452565875eb31bdb45ac62502feabbd53429fdc39a1cc341d681ba", + "sha256:ca08decd2697fdea0aea364b370b1249d47336aec935f87b8bbfd7da5b2ee9c1", + "sha256:ca49a8119c6cbd77375ae303b0cfd8c11f011abbbd64601167ecca18a87e7cdd", + "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07", + "sha256:d2997c458c690ec2bc6b0b7ecbafd02b029b7b4283078d3b32a852a7ce3ddd98", + "sha256:d3f82c171b4ccd83bbaf35aa05e44e690113bd4f3b7b6cc54d2219b132f3ae55", + "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d", + "sha256:ead20f7913a9c1e894aebe47cccf9dc834e1618b7aa96155d2091a626e59c972", + "sha256:ebdc36bea43063116f0486869652cb2ed7032dbc59fbcb4445c4862b5c1ecf7f", + "sha256:ed1184ab8f113e8d660ce49a56390ca181f2981066acc27cf637d5c1e10ce46e", + "sha256:ee825e70b1a209475622f7f7b776785bd68f34af6e7a46e2e42f27b659b5bc26", + "sha256:f7ae5d65ccfbebdfa761585228eb4d0df3a8b15cfb53bd953e713e09fbb12957", + "sha256:f7fc5a5acafb7d6ccca13bfa8c90f8c51f13d8fb87d95656d3950f0158d3ce53", + "sha256:f9b5571d33660d5009a8b3c25dc1db560206e2d2f89d3df1cb32d72c0d117d52" + ], + "version": "==2.9.9" + }, + "ptyprocess": { + "hashes": [ + "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", + "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220" + ], + "version": "==0.7.0" + }, + "pure-eval": { + "hashes": [ + "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350", + "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3" + ], + "version": "==0.2.2" + }, + "pycparser": { + "hashes": [ + "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9", + "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206" + ], + "version": "==2.21" + }, + "pydantic": { + "hashes": [ + "sha256:1440966574e1b5b99cf75a13bec7b20e3512e8a61b894ae252f56275e2c465ae", + "sha256:ae887bd94eb404b09d86e4d12f93893bdca79d766e738528c6fa1c849f3c6bcf" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==2.6.0" + }, + "pydantic-core": { + "hashes": [ + "sha256:06f0d5a1d9e1b7932477c172cc720b3b23c18762ed7a8efa8398298a59d177c7", + "sha256:07982b82d121ed3fc1c51faf6e8f57ff09b1325d2efccaa257dd8c0dd937acca", + "sha256:0f478ec204772a5c8218e30eb813ca43e34005dff2eafa03931b3d8caef87d51", + "sha256:102569d371fadc40d8f8598a59379c37ec60164315884467052830b28cc4e9da", + "sha256:10dca874e35bb60ce4f9f6665bfbfad050dd7573596608aeb9e098621ac331dc", + "sha256:150ba5c86f502c040b822777e2e519b5625b47813bd05f9273a8ed169c97d9ae", + "sha256:1661c668c1bb67b7cec96914329d9ab66755911d093bb9063c4c8914188af6d4", + "sha256:1a2fe7b00a49b51047334d84aafd7e39f80b7675cad0083678c58983662da89b", + "sha256:1ae8048cba95f382dba56766525abca438328455e35c283bb202964f41a780b0", + "sha256:20f724a023042588d0f4396bbbcf4cffd0ddd0ad3ed4f0d8e6d4ac4264bae81e", + "sha256:2133b0e412a47868a358713287ff9f9a328879da547dc88be67481cdac529118", + "sha256:21e3298486c4ea4e4d5cc6fb69e06fb02a4e22089304308817035ac006a7f506", + "sha256:21ebaa4bf6386a3b22eec518da7d679c8363fb7fb70cf6972161e5542f470798", + "sha256:23632132f1fd608034f1a56cc3e484be00854db845b3a4a508834be5a6435a6f", + "sha256:2d5bea8012df5bb6dda1e67d0563ac50b7f64a5d5858348b5c8cb5043811c19d", + "sha256:300616102fb71241ff477a2cbbc847321dbec49428434a2f17f37528721c4948", + "sha256:30a8259569fbeec49cfac7fda3ec8123486ef1b729225222f0d41d5f840b476f", + "sha256:399166f24c33a0c5759ecc4801f040dbc87d412c1a6d6292b2349b4c505effc9", + "sha256:3fac641bbfa43d5a1bed99d28aa1fded1984d31c670a95aac1bf1d36ac6ce137", + "sha256:42c29d54ed4501a30cd71015bf982fa95e4a60117b44e1a200290ce687d3e640", + "sha256:462d599299c5971f03c676e2b63aa80fec5ebc572d89ce766cd11ca8bcb56f3f", + "sha256:4eebbd049008eb800f519578e944b8dc8e0f7d59a5abb5924cc2d4ed3a1834ff", + "sha256:502c062a18d84452858f8aea1e520e12a4d5228fc3621ea5061409d666ea1706", + "sha256:5317c04349472e683803da262c781c42c5628a9be73f4750ac7d13040efb5d2d", + "sha256:5511f962dd1b9b553e9534c3b9c6a4b0c9ded3d8c2be96e61d56f933feef9e1f", + "sha256:561be4e3e952c2f9056fba5267b99be4ec2afadc27261505d4992c50b33c513c", + "sha256:601d3e42452cd4f2891c13fa8c70366d71851c1593ed42f57bf37f40f7dca3c8", + "sha256:644904600c15816a1f9a1bafa6aab0d21db2788abcdf4e2a77951280473f33e1", + "sha256:653a5dfd00f601a0ed6654a8b877b18d65ac32c9d9997456e0ab240807be6cf7", + "sha256:694a5e9f1f2c124a17ff2d0be613fd53ba0c26de588eb4bdab8bca855e550d95", + "sha256:71b4a48a7427f14679f0015b13c712863d28bb1ab700bd11776a5368135c7d60", + "sha256:72bf9308a82b75039b8c8edd2be2924c352eda5da14a920551a8b65d5ee89253", + "sha256:735dceec50fa907a3c314b84ed609dec54b76a814aa14eb90da31d1d36873a5e", + "sha256:73802194f10c394c2bedce7a135ba1d8ba6cff23adf4217612bfc5cf060de34c", + "sha256:780daad9e35b18d10d7219d24bfb30148ca2afc309928e1d4d53de86822593dc", + "sha256:8655f55fe68c4685673265a650ef71beb2d31871c049c8b80262026f23605ee3", + "sha256:877045a7969ace04d59516d5d6a7dee13106822f99a5d8df5e6822941f7bedc8", + "sha256:87bce04f09f0552b66fca0c4e10da78d17cb0e71c205864bab4e9595122cb9d9", + "sha256:8d4dfc66abea3ec6d9f83e837a8f8a7d9d3a76d25c9911735c76d6745950e62c", + "sha256:8ec364e280db4235389b5e1e6ee924723c693cbc98e9d28dc1767041ff9bc388", + "sha256:8fa00fa24ffd8c31fac081bf7be7eb495be6d248db127f8776575a746fa55c95", + "sha256:920c4897e55e2881db6a6da151198e5001552c3777cd42b8a4c2f72eedc2ee91", + "sha256:920f4633bee43d7a2818e1a1a788906df5a17b7ab6fe411220ed92b42940f818", + "sha256:9795f56aa6b2296f05ac79d8a424e94056730c0b860a62b0fdcfe6340b658cc8", + "sha256:98f0edee7ee9cc7f9221af2e1b95bd02810e1c7a6d115cfd82698803d385b28f", + "sha256:99c095457eea8550c9fa9a7a992e842aeae1429dab6b6b378710f62bfb70b394", + "sha256:99d3a433ef5dc3021c9534a58a3686c88363c591974c16c54a01af7efd741f13", + "sha256:99f9a50b56713a598d33bc23a9912224fc5d7f9f292444e6664236ae471ddf17", + "sha256:9c46e556ee266ed3fb7b7a882b53df3c76b45e872fdab8d9cf49ae5e91147fd7", + "sha256:9f5d37ff01edcbace53a402e80793640c25798fb7208f105d87a25e6fcc9ea06", + "sha256:a0b4cfe408cd84c53bab7d83e4209458de676a6ec5e9c623ae914ce1cb79b96f", + "sha256:a497be217818c318d93f07e14502ef93d44e6a20c72b04c530611e45e54c2196", + "sha256:ac89ccc39cd1d556cc72d6752f252dc869dde41c7c936e86beac5eb555041b66", + "sha256:adf28099d061a25fbcc6531febb7a091e027605385de9fe14dd6a97319d614cf", + "sha256:afa01d25769af33a8dac0d905d5c7bb2d73c7c3d5161b2dd6f8b5b5eea6a3c4c", + "sha256:b1fc07896fc1851558f532dffc8987e526b682ec73140886c831d773cef44b76", + "sha256:b49c604ace7a7aa8af31196abbf8f2193be605db6739ed905ecaf62af31ccae0", + "sha256:b9f3e0bffad6e238f7acc20c393c1ed8fab4371e3b3bc311020dfa6020d99212", + "sha256:ba07646f35e4e49376c9831130039d1b478fbfa1215ae62ad62d2ee63cf9c18f", + "sha256:bd88f40f2294440d3f3c6308e50d96a0d3d0973d6f1a5732875d10f569acef49", + "sha256:c0be58529d43d38ae849a91932391eb93275a06b93b79a8ab828b012e916a206", + "sha256:c45f62e4107ebd05166717ac58f6feb44471ed450d07fecd90e5f69d9bf03c48", + "sha256:c56da23034fe66221f2208c813d8aa509eea34d97328ce2add56e219c3a9f41c", + "sha256:c94b5537bf6ce66e4d7830c6993152940a188600f6ae044435287753044a8fe2", + "sha256:cebf8d56fee3b08ad40d332a807ecccd4153d3f1ba8231e111d9759f02edfd05", + "sha256:d0bf6f93a55d3fa7a079d811b29100b019784e2ee6bc06b0bb839538272a5610", + "sha256:d195add190abccefc70ad0f9a0141ad7da53e16183048380e688b466702195dd", + "sha256:d25ef0c33f22649b7a088035fd65ac1ce6464fa2876578df1adad9472f918a76", + "sha256:d6cbdf12ef967a6aa401cf5cdf47850559e59eedad10e781471c960583f25aa1", + "sha256:d8c032ccee90b37b44e05948b449a2d6baed7e614df3d3f47fe432c952c21b60", + "sha256:daff04257b49ab7f4b3f73f98283d3dbb1a65bf3500d55c7beac3c66c310fe34", + "sha256:e83ebbf020be727d6e0991c1b192a5c2e7113eb66e3def0cd0c62f9f266247e4", + "sha256:ed3025a8a7e5a59817b7494686d449ebfbe301f3e757b852c8d0d1961d6be864", + "sha256:f1936ef138bed2165dd8573aa65e3095ef7c2b6247faccd0e15186aabdda7f66", + "sha256:f5247a3d74355f8b1d780d0f3b32a23dd9f6d3ff43ef2037c6dcd249f35ecf4c", + "sha256:fa496cd45cda0165d597e9d6f01e36c33c9508f75cf03c0a650018c5048f578e", + "sha256:fb4363e6c9fc87365c2bc777a1f585a22f2f56642501885ffc7942138499bf54", + "sha256:fb4370b15111905bf8b5ba2129b926af9470f014cb0493a67d23e9d7a48348e8", + "sha256:fbec2af0ebafa57eb82c18c304b37c86a8abddf7022955d1742b3d5471a6339e" + ], + "markers": "python_version >= '3.8'", + "version": "==2.16.1" + }, + "pyee": { + "hashes": [ + "sha256:5c7e60f8df95710dbe17550e16ce0153f83990c00ef744841b43f371ed53ebea", + "sha256:c09f56e36eb10bf23aa2aacf145f690ded75b990a3d9523fd478b005940303d2" + ], + "version": "==8.2.2" + }, + "pygments": { + "hashes": [ + "sha256:b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c", + "sha256:da46cec9fd2de5be3a8a784f434e4c4ab670b4ff54d605c4c2717e9d49c4c367" + ], + "markers": "python_version >= '3.7'", + "version": "==2.17.2" + }, + "pyliftover": { + "hashes": [ + "sha256:72bcfb7de907569b0eb75e86c817840365297d63ba43a961da394187e399da41" + ], + "version": "==0.4" + }, + "pyppeteer": { + "hashes": [ + "sha256:11a734d8f02c6b128035aba8faf32748f2016310a6a1cbc6aa5b1e2580742e8f", + "sha256:ddb0d15cb644720160d49abb1ad0d97e87a55581febf1b7531be9e983aad7742" + ], + "markers": "python_version >= '3.7' and python_version < '4.0'", + "version": "==1.0.2" + }, + "pyquery": { + "hashes": [ + "sha256:8dfc9b4b7c5f877d619bbae74b1898d5743f6ca248cfd5d72b504dd614da312f", + "sha256:963e8d4e90262ff6d8dec072ea97285dc374a2f69cad7776f4082abcf6a1d8ae" + ], + "version": "==2.0.0" + }, + "pysam": { + "hashes": [ + "sha256:021fbf6874ad998aba19be33828ad9d23d52273643793488ac4b12917d714c68", + "sha256:116278a7caa122b2b8acc56d13b3599be9b1236f27a12488bffc306858ff0d57", + "sha256:1b84f99aa04e30bd1cc35c01bd41c2b7680131f56c71a740805aff8086f24b56", + "sha256:26199e403855b9da45341d25682e0df27013687d9cb1b4fd328136fbd506292b", + "sha256:32042e0bf3c5dd8554769442c2e1f7b6ada902c33ee44c616d0403e7acd12ee3", + "sha256:34f5653a82138d28a8e86205785a0398eb6c89f776b4145ff42783168757323c", + "sha256:4779a99d1ece17a98724d87a5c10c455cf212b3baa3a8399d3d072e4d0ae5ba0", + "sha256:481e4efbfbc07b6b92194a005cb9a98006c8378024f41c7b66c58b14f6e77f9c", + "sha256:4f6657a09c81333adb5545cf9a20d4c2ca1686acf8609ad58f13b3ec1b52a9cf", + "sha256:6d6aa2346b11ad35e88c65eb0067321318c25c7f35f75c98061173eabefcf8b0", + "sha256:6ffe5c98725fea54b1b2aa8f14a60ee9ceaed32c04460d1b861a62603dcd7153", + "sha256:83776ba587eb9575a209efed1cedb49d69c5fa6cc520dd722a0a09d0bb4e9b87", + "sha256:87dbf72f3e61fd6d3f92b1b683d9a9e797b6cc213ffcd971899f24a16f9f6e8f", + "sha256:93eb12be3822fb387e5438811f62a0f5e56c1edd5c830aaa316fb50d3d0bc181", + "sha256:942dd4a2263996bc2daa21200886e9fde027f32ce8820e7832b20bbdb97eb393", + "sha256:9af1cd3d07fd4c84e9b3d8a46c65b25f95278185bc6d44c4a48951679d5189ac", + "sha256:9b8e18520e7a79bad91b44cf9199c7fa42cec5c3020024d7ef9a7161d0099bf8", + "sha256:9ba53f9b0b2c5cb57908855cdb35a31b34c5211d215aa01bdb3e9b3d05c659cc", + "sha256:9bfebf89b1dc2ff6f88d64b5f05d8630deb89562b22764f8ee7f6fa9e677bb91", + "sha256:9d3ebb1515c2fd9b11823469e5b211ca3cc89e976c00c284a2190804c9f11726", + "sha256:a98d1ddca64943f3ead507721e52466aea2f7303e549d4960a2eb1d9fff8e3d7", + "sha256:ab7a46973cf0ab8c6ac327f4c3fb67698d7ccbeef8631a716898c6ba01ef3e45", + "sha256:bb61bf30c15f6767403b423b04c293e96fd7635457b506c849aafcf48fc13242", + "sha256:cfd2b858c7405cf38c730cba779ddf9f8cff28b4842c6440e64781650dcb9a52", + "sha256:da2f1af461e44d5c2c7210d458ee216f8ab98486adf1eea6c88eea5c1058a62f", + "sha256:f23b2f47528b94e8abe3b700103fb1214c623ae1c1b8125ecf22d4d33d76720f", + "sha256:f73d7923c89618fb7024875ed8eddc5fb0c911f430e3495de482fcee48143e45" + ], + "markers": "python_version >= '3.6'", + "version": "==0.22.0" + }, + "pytest": { + "hashes": [ + "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280", + "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==7.4.4" + }, + "pytest-asyncio": { + "hashes": [ + "sha256:2143d9d9375bf372a73260e4114541485e84fca350b0b6b92674ca56ff5f7ea2", + "sha256:b0079dfac14b60cd1ce4691fbfb1748fe939db7d0234b5aba97197d10fbe0fef" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==0.23.4" + }, + "pytest-cov": { + "hashes": [ + "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6", + "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==4.1.0" + }, + "python-dateutil": { + "hashes": [ + "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", + "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.8.2" + }, + "python-json-logger": { + "hashes": [ + "sha256:23e7ec02d34237c5aa1e29a070193a4ea87583bb4e7f8fd06d3de8264c4b2e1c", + "sha256:f380b826a991ebbe3de4d897aeec42760035ac760345e57b812938dc8b35e2bd" + ], + "markers": "python_version >= '3.6'", + "version": "==2.0.7" + }, + "pyyaml": { + "hashes": [ + "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5", + "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc", + "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df", + "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741", + "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206", + "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27", + "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595", + "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62", + "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98", + "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696", + "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290", + "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9", + "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d", + "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6", + "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867", + "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47", + "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486", + "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6", + "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3", + "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007", + "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938", + "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0", + "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c", + "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735", + "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d", + "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28", + "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4", + "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba", + "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8", + "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef", + "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5", + "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd", + "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3", + "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0", + "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515", + "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c", + "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c", + "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924", + "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34", + "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43", + "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859", + "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673", + "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54", + "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a", + "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b", + "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab", + "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa", + "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c", + "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585", + "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d", + "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f" + ], + "markers": "python_version >= '3.6'", + "version": "==6.0.1" + }, + "pyzmq": { + "hashes": [ + "sha256:004ff469d21e86f0ef0369717351073e0e577428e514c47c8480770d5e24a565", + "sha256:00a06faa7165634f0cac1abb27e54d7a0b3b44eb9994530b8ec73cf52e15353b", + "sha256:00c48ae2fd81e2a50c3485de1b9d5c7c57cd85dc8ec55683eac16846e57ac979", + "sha256:01171fc48542348cd1a360a4b6c3e7d8f46cdcf53a8d40f84db6707a6768acc1", + "sha256:019744b99da30330798bb37df33549d59d380c78e516e3bab9c9b84f87a9592f", + "sha256:02bbc1a87b76e04fd780b45e7f695471ae6de747769e540da909173d50ff8e2d", + "sha256:02c9087b109070c5ab0b383079fa1b5f797f8d43e9a66c07a4b8b8bdecfd88ee", + "sha256:07cd61a20a535524906595e09344505a9bd46f1da7a07e504b315d41cd42eb07", + "sha256:0806175f2ae5ad4b835ecd87f5f85583316b69f17e97786f7443baaf54b9bb98", + "sha256:09dfe949e83087da88c4a76767df04b22304a682d6154de2c572625c62ad6886", + "sha256:0dabfb10ef897f3b7e101cacba1437bd3a5032ee667b7ead32bbcdd1a8422fe7", + "sha256:0ddd6d71d4ef17ba5a87becf7ddf01b371eaba553c603477679ae817a8d84d75", + "sha256:0f513130c4c361201da9bc69df25a086487250e16b5571ead521b31ff6b02220", + "sha256:0f97bc2f1f13cb16905a5f3e1fbdf100e712d841482b2237484360f8bc4cb3d7", + "sha256:11e70516688190e9c2db14fcf93c04192b02d457b582a1f6190b154691b4c93a", + "sha256:146b9b1f29ead41255387fb07be56dc29639262c0f7344f570eecdcd8d683314", + "sha256:16b726c1f6c2e7625706549f9dbe9b06004dfbec30dbed4bf50cbdfc73e5b32a", + "sha256:1b3cbba2f47062b85fe0ef9de5b987612140a9ba3a9c6d2543c6dec9f7c2ab27", + "sha256:1b9b1f2ad6498445a941d9a4fee096d387fee436e45cc660e72e768d3d8ee611", + "sha256:1ec23bd7b3a893ae676d0e54ad47d18064e6c5ae1fadc2f195143fb27373f7f6", + "sha256:246747b88917e4867e2367b005fc8eefbb4a54b7db363d6c92f89d69abfff4b6", + "sha256:25c2dbb97d38b5ac9fd15586e048ec5eb1e38f3d47fe7d92167b0c77bb3584e9", + "sha256:2c6441e0398c2baacfe5ba30c937d274cfc2dc5b55e82e3749e333aabffde561", + "sha256:2c9a79f1d2495b167119d02be7448bfba57fad2a4207c4f68abc0bab4b92925b", + "sha256:2e2713ef44be5d52dd8b8e2023d706bf66cb22072e97fc71b168e01d25192755", + "sha256:313c3794d650d1fccaaab2df942af9f2c01d6217c846177cfcbc693c7410839e", + "sha256:3516e0b6224cf6e43e341d56da15fd33bdc37fa0c06af4f029f7d7dfceceabbc", + "sha256:359f7f74b5d3c65dae137f33eb2bcfa7ad9ebefd1cab85c935f063f1dbb245cc", + "sha256:39b1067f13aba39d794a24761e385e2eddc26295826530a8c7b6c6c341584289", + "sha256:3c00c9b7d1ca8165c610437ca0c92e7b5607b2f9076f4eb4b095c85d6e680a1d", + "sha256:3c53687dde4d9d473c587ae80cc328e5b102b517447456184b485587ebd18b62", + "sha256:3e124e6b1dd3dfbeb695435dff0e383256655bb18082e094a8dd1f6293114642", + "sha256:4345c9a27f4310afbb9c01750e9461ff33d6fb74cd2456b107525bbeebcb5be3", + "sha256:45999e7f7ed5c390f2e87ece7f6c56bf979fb213550229e711e45ecc7d42ccb8", + "sha256:49151b0efece79f6a79d41a461d78535356136ee70084a1c22532fc6383f4ad0", + "sha256:4cb8fc1f8d69b411b8ec0b5f1ffbcaf14c1db95b6bccea21d83610987435f1a4", + "sha256:4e5837af3e5aaa99a091302df5ee001149baff06ad22b722d34e30df5f0d9097", + "sha256:4e6f689880d5ad87918430957297c975203a082d9a036cc426648fcbedae769b", + "sha256:5074adeacede5f810b7ef39607ee59d94e948b4fd954495bdb072f8c54558181", + "sha256:518efd91c3d8ac9f9b4f7dd0e2b7b8bf1a4fe82a308009016b07eaa48681af82", + "sha256:55875492f820d0eb3417b51d96fea549cde77893ae3790fd25491c5754ea2f68", + "sha256:5a68d491fc20762b630e5db2191dd07ff89834086740f70e978bb2ef2668be08", + "sha256:5dde6751e857910c1339890f3524de74007958557593b9e7e8c5f01cd919f8a7", + "sha256:5e319ed7d6b8f5fad9b76daa0a68497bc6f129858ad956331a5835785761e003", + "sha256:5edac3f57c7ddaacdb4d40f6ef2f9e299471fc38d112f4bc6d60ab9365445fb0", + "sha256:6cc0020b74b2e410287e5942e1e10886ff81ac77789eb20bec13f7ae681f0fdd", + "sha256:6dd0d50bbf9dca1d0bdea219ae6b40f713a3fb477c06ca3714f208fd69e16fd8", + "sha256:7598d2ba821caa37a0f9d54c25164a4fa351ce019d64d0b44b45540950458840", + "sha256:759cfd391a0996345ba94b6a5110fca9c557ad4166d86a6e81ea526c376a01e8", + "sha256:7ae8f354b895cbd85212da245f1a5ad8159e7840e37d78b476bb4f4c3f32a9fe", + "sha256:7b6d09a8962a91151f0976008eb7b29b433a560fde056ec7a3db9ec8f1075438", + "sha256:7c61e346ac34b74028ede1c6b4bcecf649d69b707b3ff9dc0fab453821b04d1e", + "sha256:7f51a7b4ead28d3fca8dda53216314a553b0f7a91ee8fc46a72b402a78c3e43d", + "sha256:82544e0e2d0c1811482d37eef297020a040c32e0687c1f6fc23a75b75db8062c", + "sha256:8807c87fa893527ae8a524c15fc505d9950d5e856f03dae5921b5e9aa3b8783b", + "sha256:889370d5174a741a62566c003ee8ddba4b04c3f09a97b8000092b7ca83ec9c49", + "sha256:8b14c75979ce932c53b79976a395cb2a8cd3aaf14aef75e8c2cb55a330b9b49d", + "sha256:8c5f80e578427d4695adac6fdf4370c14a2feafdc8cb35549c219b90652536ae", + "sha256:8e9f3fabc445d0ce320ea2c59a75fe3ea591fdbdeebec5db6de530dd4b09412e", + "sha256:93f1aa311e8bb912e34f004cf186407a4e90eec4f0ecc0efd26056bf7eda0226", + "sha256:94504ff66f278ab4b7e03e4cba7e7e400cb73bfa9d3d71f58d8972a8dc67e7a6", + "sha256:967668420f36878a3c9ecb5ab33c9d0ff8d054f9c0233d995a6d25b0e95e1b6b", + "sha256:9880078f683466b7f567b8624bfc16cad65077be046b6e8abb53bed4eeb82dd3", + "sha256:99a6b36f95c98839ad98f8c553d8507644c880cf1e0a57fe5e3a3f3969040882", + "sha256:9a18fff090441a40ffda8a7f4f18f03dc56ae73f148f1832e109f9bffa85df15", + "sha256:9add2e5b33d2cd765ad96d5eb734a5e795a0755f7fc49aa04f76d7ddda73fd70", + "sha256:a793ac733e3d895d96f865f1806f160696422554e46d30105807fdc9841b9f7d", + "sha256:a86c2dd76ef71a773e70551a07318b8e52379f58dafa7ae1e0a4be78efd1ff16", + "sha256:a8c1d566344aee826b74e472e16edae0a02e2a044f14f7c24e123002dcff1c05", + "sha256:ac170e9e048b40c605358667aca3d94e98f604a18c44bdb4c102e67070f3ac9b", + "sha256:b264bf2cc96b5bc43ce0e852be995e400376bd87ceb363822e2cb1964fcdc737", + "sha256:b8c8a419dfb02e91b453615c69568442e897aaf77561ee0064d789705ff37a92", + "sha256:bc69c96735ab501419c432110016329bf0dea8898ce16fab97c6d9106dc0b348", + "sha256:bef02cfcbded83473bdd86dd8d3729cd82b2e569b75844fb4ea08fee3c26ae41", + "sha256:c0b5ca88a8928147b7b1e2dfa09f3b6c256bc1135a1338536cbc9ea13d3b7add", + "sha256:cc69949484171cc961e6ecd4a8911b9ce7a0d1f738fcae717177c231bf77437b", + "sha256:ced111c2e81506abd1dc142e6cd7b68dd53747b3b7ae5edbea4578c5eeff96b7", + "sha256:d1299d7e964c13607efd148ca1f07dcbf27c3ab9e125d1d0ae1d580a1682399d", + "sha256:d1b604734bec94f05f81b360a272fc824334267426ae9905ff32dc2be433ab96", + "sha256:d9a5f194cf730f2b24d6af1f833c14c10f41023da46a7f736f48b6d35061e76e", + "sha256:db36c27baed588a5a8346b971477b718fdc66cf5b80cbfbd914b4d6d355e44e2", + "sha256:df0c7a16ebb94452d2909b9a7b3337940e9a87a824c4fc1c7c36bb4404cb0cde", + "sha256:e10a4b5a4b1192d74853cc71a5e9fd022594573926c2a3a4802020360aa719d8", + "sha256:e624c789359f1a16f83f35e2c705d07663ff2b4d4479bad35621178d8f0f6ea4", + "sha256:e690145a8c0c273c28d3b89d6fb32c45e0d9605b2293c10e650265bf5c11cfec", + "sha256:ea1608dd169da230a0ad602d5b1ebd39807ac96cae1845c3ceed39af08a5c6df", + "sha256:ea253b368eb41116011add00f8d5726762320b1bda892f744c91997b65754d73", + "sha256:eb7e49a17fb8c77d3119d41a4523e432eb0c6932187c37deb6fbb00cc3028088", + "sha256:ef12e259e7bc317c7597d4f6ef59b97b913e162d83b421dd0db3d6410f17a244", + "sha256:f8429b17cbb746c3e043cb986328da023657e79d5ed258b711c06a70c2ea7537", + "sha256:fa99973d2ed20417744fca0073390ad65ce225b546febb0580358e36aa90dba6", + "sha256:faf79a302f834d9e8304fafdc11d0d042266667ac45209afa57e5efc998e3872", + "sha256:fc31baa0c32a2ca660784d5af3b9487e13b61b3032cb01a115fce6588e1bed30" + ], + "markers": "python_version >= '3.6'", + "version": "==25.1.2" + }, + "qtconsole": { + "hashes": [ + "sha256:8c75fa3e9b4ed884880ff7cea90a1b67451219279ec33deaee1d59e3df1a5d2b", + "sha256:a0e806c6951db9490628e4df80caec9669b65149c7ba40f9bf033c025a5b56bc" + ], + "markers": "python_version >= '3.8'", + "version": "==5.5.1" + }, + "qtpy": { + "hashes": [ + "sha256:1c1d8c4fa2c884ae742b069151b0abe15b3f70491f3972698c683b8e38de839b", + "sha256:a5a15ffd519550a1361bdc56ffc07fda56a6af7292f17c7b395d4083af632987" + ], + "markers": "python_version >= '3.7'", + "version": "==2.4.1" + }, + "referencing": { + "hashes": [ + "sha256:39240f2ecc770258f28b642dd47fd74bc8b02484de54e1882b74b35ebd779bd5", + "sha256:c775fedf74bc0f9189c2a3be1c12fd03e8c23f4d371dce795df44e06c5b412f7" + ], + "markers": "python_version >= '3.8'", + "version": "==0.33.0" + }, + "requests": { + "hashes": [ + "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f", + "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1" + ], + "markers": "python_version >= '3.7'", + "version": "==2.31.0" + }, + "requests-html": { + "hashes": [ + "sha256:7e929ecfed95fb1d0994bb368295d6d7c4d06b03fcb900c33d7d0b17e6003947", + "sha256:cb8a78cf829c4eca9d6233f28524f65dd2bfaafb4bdbbc407f0a0b8f487df6e2" + ], + "markers": "python_full_version >= '3.6.0'", + "version": "==0.10.0" + }, + "rfc3339-validator": { + "hashes": [ + "sha256:138a2abdf93304ad60530167e51d2dfb9549521a836871b88d7f4695d0022f6b", + "sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==0.1.4" + }, + "rfc3986-validator": { + "hashes": [ + "sha256:2f235c432ef459970b4306369336b9d5dbdda31b510ca1e327636e01f528bfa9", + "sha256:3d44bde7921b3b9ec3ae4e3adca370438eccebc676456449b145d533b240d055" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==0.1.1" + }, + "rpds-py": { + "hashes": [ + "sha256:01f58a7306b64e0a4fe042047dd2b7d411ee82e54240284bab63e325762c1147", + "sha256:0210b2668f24c078307260bf88bdac9d6f1093635df5123789bfee4d8d7fc8e7", + "sha256:02866e060219514940342a1f84303a1ef7a1dad0ac311792fbbe19b521b489d2", + "sha256:0387ce69ba06e43df54e43968090f3626e231e4bc9150e4c3246947567695f68", + "sha256:060f412230d5f19fc8c8b75f315931b408d8ebf56aec33ef4168d1b9e54200b1", + "sha256:071bc28c589b86bc6351a339114fb7a029f5cddbaca34103aa573eba7b482382", + "sha256:0bfb09bf41fe7c51413f563373e5f537eaa653d7adc4830399d4e9bdc199959d", + "sha256:10162fe3f5f47c37ebf6d8ff5a2368508fe22007e3077bf25b9c7d803454d921", + "sha256:149c5cd24f729e3567b56e1795f74577aa3126c14c11e457bec1b1c90d212e38", + "sha256:1701fc54460ae2e5efc1dd6350eafd7a760f516df8dbe51d4a1c79d69472fbd4", + "sha256:1957a2ab607f9added64478a6982742eb29f109d89d065fa44e01691a20fc20a", + "sha256:1a746a6d49665058a5896000e8d9d2f1a6acba8a03b389c1e4c06e11e0b7f40d", + "sha256:1bfcad3109c1e5ba3cbe2f421614e70439f72897515a96c462ea657261b96518", + "sha256:1d36b2b59e8cc6e576f8f7b671e32f2ff43153f0ad6d0201250a7c07f25d570e", + "sha256:1db228102ab9d1ff4c64148c96320d0be7044fa28bd865a9ce628ce98da5973d", + "sha256:1dc29db3900cb1bb40353772417800f29c3d078dbc8024fd64655a04ee3c4bdf", + "sha256:1e626b365293a2142a62b9a614e1f8e331b28f3ca57b9f05ebbf4cf2a0f0bdc5", + "sha256:1f3c3461ebb4c4f1bbc70b15d20b565759f97a5aaf13af811fcefc892e9197ba", + "sha256:20de7b7179e2031a04042e85dc463a93a82bc177eeba5ddd13ff746325558aa6", + "sha256:24e4900a6643f87058a27320f81336d527ccfe503984528edde4bb660c8c8d59", + "sha256:2528ff96d09f12e638695f3a2e0c609c7b84c6df7c5ae9bfeb9252b6fa686253", + "sha256:25f071737dae674ca8937a73d0f43f5a52e92c2d178330b4c0bb6ab05586ffa6", + "sha256:270987bc22e7e5a962b1094953ae901395e8c1e1e83ad016c5cfcfff75a15a3f", + "sha256:292f7344a3301802e7c25c53792fae7d1593cb0e50964e7bcdcc5cf533d634e3", + "sha256:2953937f83820376b5979318840f3ee47477d94c17b940fe31d9458d79ae7eea", + "sha256:2a792b2e1d3038daa83fa474d559acfd6dc1e3650ee93b2662ddc17dbff20ad1", + "sha256:2a7b2f2f56a16a6d62e55354dd329d929560442bd92e87397b7a9586a32e3e76", + "sha256:2f4eb548daf4836e3b2c662033bfbfc551db58d30fd8fe660314f86bf8510b93", + "sha256:3664d126d3388a887db44c2e293f87d500c4184ec43d5d14d2d2babdb4c64cad", + "sha256:3677fcca7fb728c86a78660c7fb1b07b69b281964673f486ae72860e13f512ad", + "sha256:380e0df2e9d5d5d339803cfc6d183a5442ad7ab3c63c2a0982e8c824566c5ccc", + "sha256:3ac732390d529d8469b831949c78085b034bff67f584559340008d0f6041a049", + "sha256:4128980a14ed805e1b91a7ed551250282a8ddf8201a4e9f8f5b7e6225f54170d", + "sha256:4341bd7579611cf50e7b20bb8c2e23512a3dc79de987a1f411cb458ab670eb90", + "sha256:436474f17733c7dca0fbf096d36ae65277e8645039df12a0fa52445ca494729d", + "sha256:4dc889a9d8a34758d0fcc9ac86adb97bab3fb7f0c4d29794357eb147536483fd", + "sha256:4e21b76075c01d65d0f0f34302b5a7457d95721d5e0667aea65e5bb3ab415c25", + "sha256:516fb8c77805159e97a689e2f1c80655c7658f5af601c34ffdb916605598cda2", + "sha256:5576ee2f3a309d2bb403ec292d5958ce03953b0e57a11d224c1f134feaf8c40f", + "sha256:5a024fa96d541fd7edaa0e9d904601c6445e95a729a2900c5aec6555fe921ed6", + "sha256:5d0e8a6434a3fbf77d11448c9c25b2f25244226cfbec1a5159947cac5b8c5fa4", + "sha256:5e7d63ec01fe7c76c2dbb7e972fece45acbb8836e72682bde138e7e039906e2c", + "sha256:60e820ee1004327609b28db8307acc27f5f2e9a0b185b2064c5f23e815f248f8", + "sha256:637b802f3f069a64436d432117a7e58fab414b4e27a7e81049817ae94de45d8d", + "sha256:65dcf105c1943cba45d19207ef51b8bc46d232a381e94dd38719d52d3980015b", + "sha256:698ea95a60c8b16b58be9d854c9f993c639f5c214cf9ba782eca53a8789d6b19", + "sha256:70fcc6c2906cfa5c6a552ba7ae2ce64b6c32f437d8f3f8eea49925b278a61453", + "sha256:720215373a280f78a1814becb1312d4e4d1077b1202a56d2b0815e95ccb99ce9", + "sha256:7450dbd659fed6dd41d1a7d47ed767e893ba402af8ae664c157c255ec6067fde", + "sha256:7b7d9ca34542099b4e185b3c2a2b2eda2e318a7dbde0b0d83357a6d4421b5296", + "sha256:7fbd70cb8b54fe745301921b0816c08b6d917593429dfc437fd024b5ba713c58", + "sha256:81038ff87a4e04c22e1d81f947c6ac46f122e0c80460b9006e6517c4d842a6ec", + "sha256:810685321f4a304b2b55577c915bece4c4a06dfe38f6e62d9cc1d6ca8ee86b99", + "sha256:82ada4a8ed9e82e443fcef87e22a3eed3654dd3adf6e3b3a0deb70f03e86142a", + "sha256:841320e1841bb53fada91c9725e766bb25009cfd4144e92298db296fb6c894fb", + "sha256:8587fd64c2a91c33cdc39d0cebdaf30e79491cc029a37fcd458ba863f8815383", + "sha256:8ffe53e1d8ef2520ebcf0c9fec15bb721da59e8ef283b6ff3079613b1e30513d", + "sha256:9051e3d2af8f55b42061603e29e744724cb5f65b128a491446cc029b3e2ea896", + "sha256:91e5a8200e65aaac342a791272c564dffcf1281abd635d304d6c4e6b495f29dc", + "sha256:93432e747fb07fa567ad9cc7aaadd6e29710e515aabf939dfbed8046041346c6", + "sha256:938eab7323a736533f015e6069a7d53ef2dcc841e4e533b782c2bfb9fb12d84b", + "sha256:9584f8f52010295a4a417221861df9bea4c72d9632562b6e59b3c7b87a1522b7", + "sha256:9737bdaa0ad33d34c0efc718741abaafce62fadae72c8b251df9b0c823c63b22", + "sha256:99da0a4686ada4ed0f778120a0ea8d066de1a0a92ab0d13ae68492a437db78bf", + "sha256:99f567dae93e10be2daaa896e07513dd4bf9c2ecf0576e0533ac36ba3b1d5394", + "sha256:9bdf1303df671179eaf2cb41e8515a07fc78d9d00f111eadbe3e14262f59c3d0", + "sha256:9f0e4dc0f17dcea4ab9d13ac5c666b6b5337042b4d8f27e01b70fae41dd65c57", + "sha256:a000133a90eea274a6f28adc3084643263b1e7c1a5a66eb0a0a7a36aa757ed74", + "sha256:a3264e3e858de4fc601741498215835ff324ff2482fd4e4af61b46512dd7fc83", + "sha256:a71169d505af63bb4d20d23a8fbd4c6ce272e7bce6cc31f617152aa784436f29", + "sha256:a967dd6afda7715d911c25a6ba1517975acd8d1092b2f326718725461a3d33f9", + "sha256:aa5bfb13f1e89151ade0eb812f7b0d7a4d643406caaad65ce1cbabe0a66d695f", + "sha256:ae35e8e6801c5ab071b992cb2da958eee76340e6926ec693b5ff7d6381441745", + "sha256:b686f25377f9c006acbac63f61614416a6317133ab7fafe5de5f7dc8a06d42eb", + "sha256:b760a56e080a826c2e5af09002c1a037382ed21d03134eb6294812dda268c811", + "sha256:b86b21b348f7e5485fae740d845c65a880f5d1eda1e063bc59bef92d1f7d0c55", + "sha256:b9412abdf0ba70faa6e2ee6c0cc62a8defb772e78860cef419865917d86c7342", + "sha256:bd345a13ce06e94c753dab52f8e71e5252aec1e4f8022d24d56decd31e1b9b23", + "sha256:be22ae34d68544df293152b7e50895ba70d2a833ad9566932d750d3625918b82", + "sha256:bf046179d011e6114daf12a534d874958b039342b347348a78b7cdf0dd9d6041", + "sha256:c3d2010656999b63e628a3c694f23020322b4178c450dc478558a2b6ef3cb9bb", + "sha256:c64602e8be701c6cfe42064b71c84ce62ce66ddc6422c15463fd8127db3d8066", + "sha256:d65e6b4f1443048eb7e833c2accb4fa7ee67cc7d54f31b4f0555b474758bee55", + "sha256:d8bbd8e56f3ba25a7d0cf980fc42b34028848a53a0e36c9918550e0280b9d0b6", + "sha256:da1ead63368c04a9bded7904757dfcae01eba0e0f9bc41d3d7f57ebf1c04015a", + "sha256:dbbb95e6fc91ea3102505d111b327004d1c4ce98d56a4a02e82cd451f9f57140", + "sha256:dbc56680ecf585a384fbd93cd42bc82668b77cb525343170a2d86dafaed2a84b", + "sha256:df3b6f45ba4515632c5064e35ca7f31d51d13d1479673185ba8f9fefbbed58b9", + "sha256:dfe07308b311a8293a0d5ef4e61411c5c20f682db6b5e73de6c7c8824272c256", + "sha256:e796051f2070f47230c745d0a77a91088fbee2cc0502e9b796b9c6471983718c", + "sha256:efa767c220d94aa4ac3a6dd3aeb986e9f229eaf5bce92d8b1b3018d06bed3772", + "sha256:f0b8bf5b8db49d8fd40f54772a1dcf262e8be0ad2ab0206b5a2ec109c176c0a4", + "sha256:f175e95a197f6a4059b50757a3dca33b32b61691bdbd22c29e8a8d21d3914cae", + "sha256:f2f3b28b40fddcb6c1f1f6c88c6f3769cd933fa493ceb79da45968a21dccc920", + "sha256:f6c43b6f97209e370124baf2bf40bb1e8edc25311a158867eb1c3a5d449ebc7a", + "sha256:f7f4cb1f173385e8a39c29510dd11a78bf44e360fb75610594973f5ea141028b", + "sha256:fad059a4bd14c45776600d223ec194e77db6c20255578bb5bcdd7c18fd169361", + "sha256:ff1dcb8e8bc2261a088821b2595ef031c91d499a0c1b031c152d43fe0a6ecec8", + "sha256:ffee088ea9b593cc6160518ba9bd319b5475e5f3e578e4552d63818773c6f56a" + ], + "markers": "python_version >= '3.8'", + "version": "==0.17.1" + }, + "ruff": { + "hashes": [ + "sha256:30ad74687e1f4a9ff8e513b20b82ccadb6bd796fe5697f1e417189c5cde6be3e", + "sha256:3826fb34c144ef1e171b323ed6ae9146ab76d109960addca730756dc19dc7b22", + "sha256:3d3c641f95f435fc6754b05591774a17df41648f0daf3de0d75ad3d9f099ab92", + "sha256:3fbaff1ba9564a2c5943f8f38bc221f04bac687cc7485e45237579fee7ccda79", + "sha256:3ff35433fcf4dff6d610738712152df6b7d92351a1bde8e00bd405b08b3d5759", + "sha256:63856b91837606c673537d2889989733d7dffde553828d3b0f0bacfa6def54be", + "sha256:638ea3294f800d18bae84a492cb5a245c8d29c90d19a91d8e338937a4c27fca0", + "sha256:6d232f99d3ab00094ebaf88e0fb7a8ccacaa54cc7fa3b8993d9627a11e6aed7a", + "sha256:8153a3e4128ed770871c47545f1ae7b055023e0c222ff72a759f5a341ee06483", + "sha256:87057dd2fdde297130ff99553be8549ca38a2965871462a97394c22ed2dfc19d", + "sha256:a7e3818698f8460bd0f8d4322bbe99db8327e9bc2c93c789d3159f5b335f47da", + "sha256:ba918e01cdd21e81b07555564f40d307b0caafa9a7a65742e98ff244f5035c59", + "sha256:bf9faafbdcf4f53917019f2c230766da437d4fd5caecd12ddb68bb6a17d74399", + "sha256:e155147199c2714ff52385b760fe242bb99ea64b240a9ffbd6a5918eb1268843", + "sha256:e8a75a98ae989a27090e9c51f763990ad5bbc92d20626d54e9701c7fe597f399", + "sha256:eceab7d85d09321b4de18b62d38710cf296cb49e98979960a59c6b9307c18cfe", + "sha256:edf23041242c48b0d8295214783ef543847ef29e8226d9f69bf96592dba82a83" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==0.2.0" + }, + "s3transfer": { + "hashes": [ + "sha256:3cdb40f5cfa6966e812209d0994f2a4709b561c88e90cf00c2696d2df4e56b2e", + "sha256:d0c8bbf672d5eebbe4e57945e23b972d963f07d82f661cabf678a5c88831595b" + ], + "markers": "python_version >= '3.8'", + "version": "==0.10.0" + }, + "send2trash": { + "hashes": [ + "sha256:a384719d99c07ce1eefd6905d2decb6f8b7ed054025bb0e618919f945de4f679", + "sha256:c132d59fa44b9ca2b1699af5c86f57ce9f4c5eb56629d5d55fbb7a35f84e2312" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==1.8.2" + }, + "setuptools": { + "hashes": [ + "sha256:385eb4edd9c9d5c17540511303e39a147ce2fc04bc55289c322b9e5904fe2c05", + "sha256:be1af57fc409f93647f2e8e4573a142ed38724b8cdd389706a867bb4efcf1e78" + ], + "markers": "python_version >= '3.8'", + "version": "==69.0.3" + }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.16.0" + }, + "sniffio": { + "hashes": [ + "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101", + "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384" + ], + "markers": "python_version >= '3.7'", + "version": "==1.3.0" + }, + "soupsieve": { + "hashes": [ + "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690", + "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7" + ], + "markers": "python_version >= '3.8'", + "version": "==2.5" + }, + "sqlparse": { + "hashes": [ + "sha256:5430a4fe2ac7d0f93e66f1efc6e1338a41884b7ddf2a350cedd20ccc4d9d28f3", + "sha256:d446183e84b8349fa3061f0fe7f06ca94ba65b426946ffebe6e3e8295332420c" + ], + "markers": "python_version >= '3.5'", + "version": "==0.4.4" + }, + "stack-data": { + "hashes": [ + "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9", + "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695" + ], + "version": "==0.6.3" + }, + "starlette": { + "hashes": [ + "sha256:13d429aa93a61dc40bf503e8c801db1f1bca3dc706b10ef2434a36123568f044", + "sha256:90a671733cfb35771d8cc605e0b679d23b992f8dcfad48cc60b38cb29aeb7080" + ], + "markers": "python_version >= '3.8'", + "version": "==0.36.3" + }, + "tabulate": { + "hashes": [ + "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c", + "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f" + ], + "markers": "python_version >= '3.7'", + "version": "==0.9.0" + }, + "terminado": { + "hashes": [ + "sha256:1ea08a89b835dd1b8c0c900d92848147cef2537243361b2e3f4dc15df9b6fded", + "sha256:87b0d96642d0fe5f5abd7783857b9cab167f221a39ff98e3b9619a788a3c0f2e" + ], + "markers": "python_version >= '3.8'", + "version": "==0.18.0" + }, + "tinycss2": { + "hashes": [ + "sha256:2b80a96d41e7c3914b8cda8bc7f705a4d9c49275616e886103dd839dfc847847", + "sha256:8cff3a8f066c2ec677c06dbc7b45619804a6938478d9d73c284b29d14ecb0627" + ], + "markers": "python_version >= '3.7'", + "version": "==1.2.1" + }, + "tomli": { + "hashes": [ + "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", + "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" + ], + "markers": "python_version < '3.11'", + "version": "==2.0.1" + }, + "tornado": { + "hashes": [ + "sha256:02ccefc7d8211e5a7f9e8bc3f9e5b0ad6262ba2fbb683a6443ecc804e5224ce0", + "sha256:10aeaa8006333433da48dec9fe417877f8bcc21f48dda8d661ae79da357b2a63", + "sha256:27787de946a9cffd63ce5814c33f734c627a87072ec7eed71f7fc4417bb16263", + "sha256:6f8a6c77900f5ae93d8b4ae1196472d0ccc2775cc1dfdc9e7727889145c45052", + "sha256:71ddfc23a0e03ef2df1c1397d859868d158c8276a0603b96cf86892bff58149f", + "sha256:72291fa6e6bc84e626589f1c29d90a5a6d593ef5ae68052ee2ef000dfd273dee", + "sha256:88b84956273fbd73420e6d4b8d5ccbe913c65d31351b4c004ae362eba06e1f78", + "sha256:e43bc2e5370a6a8e413e1e1cd0c91bedc5bd62a74a532371042a18ef19e10579", + "sha256:f0251554cdd50b4b44362f73ad5ba7126fc5b2c2895cc62b14a1c2d7ea32f212", + "sha256:f7894c581ecdcf91666a0912f18ce5e757213999e183ebfc2c3fdbf4d5bd764e", + "sha256:fd03192e287fbd0899dd8f81c6fb9cbbc69194d2074b38f384cb6fa72b80e9c2" + ], + "markers": "python_version >= '3.8'", + "version": "==6.4" + }, + "tqdm": { + "hashes": [ + "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386", + "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7" + ], + "markers": "python_version >= '3.7'", + "version": "==4.66.1" + }, + "traitlets": { + "hashes": [ + "sha256:2e5a030e6eff91737c643231bfcf04a65b0132078dad75e4936700b213652e74", + "sha256:8585105b371a04b8316a43d5ce29c098575c2e477850b62b848b964f1444527e" + ], + "markers": "python_version >= '3.8'", + "version": "==5.14.1" + }, + "types-python-dateutil": { + "hashes": [ + "sha256:1f8db221c3b98e6ca02ea83a58371b22c374f42ae5bbdf186db9c9a76581459f", + "sha256:efbbdc54590d0f16152fa103c9879c7d4a00e82078f6e2cf01769042165acaa2" + ], + "markers": "python_version >= '3.8'", + "version": "==2.8.19.20240106" + }, + "typing-extensions": { + "hashes": [ + "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783", + "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd" + ], + "markers": "python_version >= '3.8'", + "version": "==4.9.0" + }, + "uri-template": { + "hashes": [ + "sha256:0e00f8eb65e18c7de20d595a14336e9f337ead580c70934141624b6d1ffdacc7", + "sha256:a44a133ea12d44a0c0f06d7d42a52d71282e77e2f937d8abd5655b8d56fc1363" + ], + "version": "==1.3.0" + }, + "urllib3": { + "hashes": [ + "sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07", + "sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==1.26.18" + }, + "uvicorn": { + "hashes": [ + "sha256:4b85ba02b8a20429b9b205d015cbeb788a12da527f731811b643fd739ef90d5f", + "sha256:54898fcd80c13ff1cd28bf77b04ec9dbd8ff60c5259b499b4b12bb0917f22907" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==0.27.0.post1" + }, + "variation-normalizer": { + "editable": true, + "path": "." + }, + "virtualenv": { + "hashes": [ + "sha256:4238949c5ffe6876362d9c0180fc6c3a824a7b12b80604eeb8085f2ed7460de3", + "sha256:bf51c0d9c7dd63ea8e44086fa1e4fb1093a31e963b86959257378aef020e1f1b" + ], + "markers": "python_version >= '3.7'", + "version": "==20.25.0" + }, + "w3lib": { + "hashes": [ + "sha256:c4432926e739caa8e3f49f5de783f336df563d9490416aebd5d39fb896d264e7", + "sha256:ed5b74e997eea2abe3c1321f916e344144ee8e9072a6f33463ee8e57f858a4b1" + ], + "markers": "python_version >= '3.7'", + "version": "==2.1.2" + }, + "wcwidth": { + "hashes": [ + "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", + "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5" + ], + "version": "==0.2.13" + }, + "webcolors": { + "hashes": [ + "sha256:29bc7e8752c0a1bd4a1f03c14d6e6a72e93d82193738fa860cbff59d0fcc11bf", + "sha256:c225b674c83fa923be93d235330ce0300373d02885cef23238813b0d5668304a" + ], + "version": "==1.13" + }, + "webencodings": { + "hashes": [ + "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", + "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923" + ], + "version": "==0.5.1" + }, + "websocket-client": { + "hashes": [ + "sha256:10e511ea3a8c744631d3bd77e61eb17ed09304c413ad42cf6ddfa4c7787e8fe6", + "sha256:f4c3d22fec12a2461427a29957ff07d35098ee2d976d3ba244e688b8b4057588" + ], + "markers": "python_version >= '3.8'", + "version": "==1.7.0" + }, + "websockets": { + "hashes": [ + "sha256:00213676a2e46b6ebf6045bc11d0f529d9120baa6f58d122b4021ad92adabd41", + "sha256:00c870522cdb69cd625b93f002961ffb0c095394f06ba8c48f17eef7c1541f96", + "sha256:0154f7691e4fe6c2b2bc275b5701e8b158dae92a1ab229e2b940efe11905dff4", + "sha256:05a7233089f8bd355e8cbe127c2e8ca0b4ea55467861906b80d2ebc7db4d6b72", + "sha256:09a1814bb15eff7069e51fed0826df0bc0702652b5cb8f87697d469d79c23576", + "sha256:0cff816f51fb33c26d6e2b16b5c7d48eaa31dae5488ace6aae468b361f422b63", + "sha256:185929b4808b36a79c65b7865783b87b6841e852ef5407a2fb0c03381092fa3b", + "sha256:2fc8709c00704194213d45e455adc106ff9e87658297f72d544220e32029cd3d", + "sha256:33d69ca7612f0ddff3316b0c7b33ca180d464ecac2d115805c044bf0a3b0d032", + "sha256:389f8dbb5c489e305fb113ca1b6bdcdaa130923f77485db5b189de343a179393", + "sha256:38ea7b82bfcae927eeffc55d2ffa31665dc7fec7b8dc654506b8e5a518eb4d50", + "sha256:3d3cac3e32b2c8414f4f87c1b2ab686fa6284a980ba283617404377cd448f631", + "sha256:40e826de3085721dabc7cf9bfd41682dadc02286d8cf149b3ad05bff89311e4f", + "sha256:4239b6027e3d66a89446908ff3027d2737afc1a375f8fd3eea630a4842ec9a0c", + "sha256:45ec8e75b7dbc9539cbfafa570742fe4f676eb8b0d3694b67dabe2f2ceed8aa6", + "sha256:47a2964021f2110116cc1125b3e6d87ab5ad16dea161949e7244ec583b905bb4", + "sha256:48c08473563323f9c9debac781ecf66f94ad5a3680a38fe84dee5388cf5acaf6", + "sha256:4c6d2264f485f0b53adf22697ac11e261ce84805c232ed5dbe6b1bcb84b00ff0", + "sha256:4f72e5cd0f18f262f5da20efa9e241699e0cf3a766317a17392550c9ad7b37d8", + "sha256:56029457f219ade1f2fc12a6504ea61e14ee227a815531f9738e41203a429112", + "sha256:5c1289596042fad2cdceb05e1ebf7aadf9995c928e0da2b7a4e99494953b1b94", + "sha256:62e627f6b6d4aed919a2052efc408da7a545c606268d5ab5bfab4432734b82b4", + "sha256:74de2b894b47f1d21cbd0b37a5e2b2392ad95d17ae983e64727e18eb281fe7cb", + "sha256:7c584f366f46ba667cfa66020344886cf47088e79c9b9d39c84ce9ea98aaa331", + "sha256:7d27a7e34c313b3a7f91adcd05134315002aaf8540d7b4f90336beafaea6217c", + "sha256:7d3f0b61c45c3fa9a349cf484962c559a8a1d80dae6977276df8fd1fa5e3cb8c", + "sha256:82ff5e1cae4e855147fd57a2863376ed7454134c2bf49ec604dfe71e446e2193", + "sha256:84bc2a7d075f32f6ed98652db3a680a17a4edb21ca7f80fe42e38753a58ee02b", + "sha256:884be66c76a444c59f801ac13f40c76f176f1bfa815ef5b8ed44321e74f1600b", + "sha256:8a5cc00546e0a701da4639aa0bbcb0ae2bb678c87f46da01ac2d789e1f2d2038", + "sha256:8dc96f64ae43dde92530775e9cb169979f414dcf5cff670455d81a6823b42089", + "sha256:8f38706e0b15d3c20ef6259fd4bc1700cd133b06c3c1bb108ffe3f8947be15fa", + "sha256:90fcf8929836d4a0e964d799a58823547df5a5e9afa83081761630553be731f9", + "sha256:931c039af54fc195fe6ad536fde4b0de04da9d5916e78e55405436348cfb0e56", + "sha256:932af322458da7e4e35df32f050389e13d3d96b09d274b22a7aa1808f292fee4", + "sha256:942de28af58f352a6f588bc72490ae0f4ccd6dfc2bd3de5945b882a078e4e179", + "sha256:9bc42e8402dc5e9905fb8b9649f57efcb2056693b7e88faa8fb029256ba9c68c", + "sha256:a7a240d7a74bf8d5cb3bfe6be7f21697a28ec4b1a437607bae08ac7acf5b4882", + "sha256:a9f9a735deaf9a0cadc2d8c50d1a5bcdbae8b6e539c6e08237bc4082d7c13f28", + "sha256:ae5e95cfb53ab1da62185e23b3130e11d64431179debac6dc3c6acf08760e9b1", + "sha256:b029fb2032ae4724d8ae8d4f6b363f2cc39e4c7b12454df8df7f0f563ed3e61a", + "sha256:b0d15c968ea7a65211e084f523151dbf8ae44634de03c801b8bd070b74e85033", + "sha256:b343f521b047493dc4022dd338fc6db9d9282658862756b4f6fd0e996c1380e1", + "sha256:b627c266f295de9dea86bd1112ed3d5fafb69a348af30a2422e16590a8ecba13", + "sha256:b9968694c5f467bf67ef97ae7ad4d56d14be2751000c1207d31bf3bb8860bae8", + "sha256:ba089c499e1f4155d2a3c2a05d2878a3428cf321c848f2b5a45ce55f0d7d310c", + "sha256:bbccd847aa0c3a69b5f691a84d2341a4f8a629c6922558f2a70611305f902d74", + "sha256:bc0b82d728fe21a0d03e65f81980abbbcb13b5387f733a1a870672c5be26edab", + "sha256:c57e4c1349fbe0e446c9fa7b19ed2f8a4417233b6984277cce392819123142d3", + "sha256:c94ae4faf2d09f7c81847c63843f84fe47bf6253c9d60b20f25edfd30fb12588", + "sha256:c9b27d6c1c6cd53dc93614967e9ce00ae7f864a2d9f99fe5ed86706e1ecbf485", + "sha256:d210abe51b5da0ffdbf7b43eed0cfdff8a55a1ab17abbec4301c9ff077dd0342", + "sha256:d58804e996d7d2307173d56c297cf7bc132c52df27a3efaac5e8d43e36c21c48", + "sha256:d6a4162139374a49eb18ef5b2f4da1dd95c994588f5033d64e0bbfda4b6b6fcf", + "sha256:da39dd03d130162deb63da51f6e66ed73032ae62e74aaccc4236e30edccddbb0", + "sha256:db3c336f9eda2532ec0fd8ea49fef7a8df8f6c804cdf4f39e5c5c0d4a4ad9a7a", + "sha256:dd500e0a5e11969cdd3320935ca2ff1e936f2358f9c2e61f100a1660933320ea", + "sha256:dd9becd5fe29773d140d68d607d66a38f60e31b86df75332703757ee645b6faf", + "sha256:e0cb5cc6ece6ffa75baccfd5c02cffe776f3f5c8bf486811f9d3ea3453676ce8", + "sha256:e23173580d740bf8822fd0379e4bf30aa1d5a92a4f252d34e893070c081050df", + "sha256:e3a686ecb4aa0d64ae60c9c9f1a7d5d46cab9bfb5d91a2d303d00e2cd4c4c5cc", + "sha256:e789376b52c295c4946403bd0efecf27ab98f05319df4583d3c48e43c7342c2f", + "sha256:edc344de4dac1d89300a053ac973299e82d3db56330f3494905643bb68801269", + "sha256:eef610b23933c54d5d921c92578ae5f89813438fded840c2e9809d378dc765d3", + "sha256:f2c38d588887a609191d30e902df2a32711f708abfd85d318ca9b367258cfd0c", + "sha256:f55b5905705725af31ccef50e55391621532cd64fbf0bc6f4bac935f0fccec46", + "sha256:f5fc088b7a32f244c519a048c170f14cf2251b849ef0e20cbbb0fdf0fdaf556f", + "sha256:fe10ddc59b304cb19a1bdf5bd0a7719cbbc9fbdd57ac80ed436b709fcf889106", + "sha256:ff64a1d38d156d429404aaa84b27305e957fd10c30e5880d1765c9480bea490f" + ], + "markers": "python_version >= '3.7'", + "version": "==10.4" + }, + "widgetsnbextension": { + "hashes": [ + "sha256:3c1f5e46dc1166dfd40a42d685e6a51396fd34ff878742a3e47c6f0cc4a2a385", + "sha256:91452ca8445beb805792f206e560c1769284267a30ceb1cec9f5bcc887d15175" + ], + "markers": "python_version >= '3.7'", + "version": "==4.0.9" + }, + "yoyo-migrations": { + "hashes": [ + "sha256:27dabe7432859288b0bd771093f593e3dd2ff6dd4e3b8438992a07c9a7154660", + "sha256:820606a03e262cf1cd4f59e256c28fa446425224d5b82a3d1275fd78178523e4" + ], + "version": "==8.2.0" + }, + "zipp": { + "hashes": [ + "sha256:0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31", + "sha256:84e64a1c28cf7e91ed2078bb8cc8c259cb19b76942096c8d7b84947690cabaf0" + ], + "markers": "python_version >= '3.8'", + "version": "==3.17.0" + } + } +} diff --git a/Procfile b/Procfile new file mode 100644 index 0000000..3c626d3 --- /dev/null +++ b/Procfile @@ -0,0 +1 @@ +web: gunicorn -k uvicorn.workers.UvicornWorker variation.main:app --timeout 1000 --log-level debug diff --git a/README.md b/README.md index 8369301..b8f3109 100644 --- a/README.md +++ b/README.md @@ -1,203 +1,61 @@ -# Compose services for metakb +This Section deals with setting up of Variation Normalizer via docker. For detailed instructions on variation Normalizer and its developer setup , Please refer to the variation normalizer Home page : https://github.com/cancervariants/variation-normalization/tree/main + +The Variation Normalizer depends upon several Modules , therefore its recommended to setup docker containers of these Modules before starting the Variation Normalizer container.Please ensure the target machine( where the Variation Normalizer is to be deployed as docker isntalled in it.Otherwise docker commands wont work. +To Create Docker network , Please type following command. +command : docker network create <"name of the network> for e.g we have used "tulip-net" +Please follow below steps for Docker Setup of Variation Normalizer and its dependant containers. + +1. SeqRepo + Variation Normalizer depends on SeqRepo database. We need to create docker image for Seqrepo. It is recomended to start first with this image as volume attached to Seqrepo takes time to download and its size is depending upon the version is 10 GB +. + a.) Pull the image from Docker Hub Repository by typing following command in terminal. + Command : docker pull biocommons/seqrepo + b.) This will initiate display the output something like this: + Using default tag: latest + latest: Pulling from biocommons/seqrepo + 125a6e411906: Pull complete + 4da135235d92: Pull complete + abfb8a2bf499: Pull complete + c987b6c75b9d: Pull complete + 6cafe4b33812: Pull complete + 03f7d4217df5: Pull complete + Digest: sha256:0390108e54c500f72afe5b187ecfb1eb9ef14f21fdc0af18e819660e7c9430c4 + Status: Downloaded newer image for biocommons/seqrepo:latest + docker.io/biocommons/seqrepo:latest + c.) Once the image is downloaded , Start the container with the command : + docker run -net <"name of the network> --name seqrepo biocommons/seqrepo + The Name of the network is the network name which was created above. + Running the above command will start downloading the sequences file required by Variation Normalizer. By default the volume of this container is sharable. Other containers which are on same network can access it by appending this the docker command: + --volumes-from seqrepo where seqrepo is the name of the container.For efficiency , the container can be run in daemon mode or seperate terminal so that other tasks can be performed in parallel. + + 2. UTA + The Postgres UTA instance is another dependancy required for Variation Normalizer. To setup Container for UTA postgres Db instance. + Follow the following steps: + a.) Pull the image from Docker Hub Repository by typing following command in terminal. + Set the uta_v env variable by typing command uta_v=<"name of the version>. For eg uta_v=uta_20210129b. + Command : docker pull biocommons/uta:$uta_v + b.) Once the image is downnloaded, Start the container with the command : + docker run \ + -d \ + -e POSTGRES_PASSWORD=some-password-that-you-make-up \ + -v /tmp:/tmp \ + -v uta_vol:/var/lib/postgresql/data \ + --name $uta_v \ + --net=<"name of the network> \ + biocommons/uta:$uta_v + + 3. Dynamo db + The AWS provides docker image for the local instance. The Dynamo DB even though as a local instance requires AWS username and AWS + password. We can provide dummy values for these environment variables. These variables have been initialized in the docker file. + a.) Pull the image from Docker Hub Repository and Start the container with the command in terminal. + Command : docker run --net tulip-net -d --name dynamodb -p 8001:8001 amazon/dynamodb-local:1.18.0 -jar DynamoDBLocal.jar -port 8001 + + + + 4. Variation Normalizer + There is no image hosted on Docker hub for the Variation Normalizer. Hence we need to build image for Variation Normalizer from the docker File. The Docker File is already there in the repo. + a.) To build the image from the docker file. Run the command from the directory where Docker File is there. + command : docker build -t variation-normalization . + b.) Once the image is created, Start the container with the command : + command : docker run --net dnet --name variationnormalizer -p 8000:80 --volumes-from seqrepo <"image name">:<"tag name"> + -> Runs metakb services in containers using docker compose. - -## Overview - -![image](docker-compose.png) - - -## Installation - -You will need [docker-compose](https://docs.docker.com/compose/install/) - - -## Quickstart - -### After cloning this repo, you will need to clone metakb services. - -At this time, we only have one: `therapy-normalizer` - -``` -cd compose - -git clone https://github.com/cancervariants/therapy-normalization -cd therapy-normalization -# Dockerfile currently on this branch -git checkout issue-123 - -cd .. -``` - -### Configure - -All environmental variables necessary are maintained in a `.env` file you need to create in the project root folder. This file is not maintained in git. See `dot-env-example.txt` - -You will need to clone services repositories. - -``` -git clone https://github.com/cancervariants/disease-normalization -git clone https://github.com/cancervariants/gene-normalization -git clone https://github.com/cancervariants/therapy-normalization -git clone https://github.com/cancervariants/variant-normalization -``` - - -### Launch - -``` -# build services -docker-compose build - -# launch all services in the background -docker-compose up -d -``` - -### Data dependencies - -* Therapy -``` -dc exec therapy sh -c "pipenv run python3 -m therapy.cli --normalizer=\"rxnorm chemidplus ncit wikidata chembl\" --update_merged " -``` - -* Gene - -We run seqrepo as a separate step outside of the gene normalizer container: - - * See https://github.com/biocommons/biocommons.seqrepo/blob/main/docs/docker.rst#tips - - * https://github.com/biocommons/biocommons.seqrepo/blob/main/docs/mirror.rst#fetching-using-rsync-manually - - * https://github.com/cancervariants/gene-normalization#installation - - -``` -docker run --user $(id -u):$(id -g) -v $(pwd)/data/gene/seqrepo:/usr/local/share/seqrepo biocommons/seqrepo seqrepo pull -i 2020-11-27 - -# at least on a mac, this step is necessary to rename rsync's temp dir -# something seems to be removing write permissions from the temp dir? -# dr-xr-xr-x -sudo mv 2020-11-27.jqflq35k 2020-11-27 - -``` - -After running seqrepo, start the gene normalizer via `dc up -d gene` - -Then harvest genes. - -``` -dc exec gene sh -c "pipenv run python3 -m gene.cli --update_all" -``` - - -* Variant - -``` -# -# Variant normalizer will read from variant/data/seqrepo/latest -# so, in the host os, navigate to data/gene/seqrepo and `ln -s` -# -ln -s 2020-11-27 latest -# -# then, in docker compose, we map the seqrepo we setup for gene to the variant container -# `- ./data/gene/seqrepo:/app/variant/data/seqrepo` -# -``` - -* Disease - -``` - -dc exec disease sh -c "pipenv run python3 -m disease.cli --normalizer \"ncit mondo do oncotree\" --update_merged" -``` - - -### Test - -* Services should be up and running -``` -$docker-compose ps -disease /bin/sh -c pipenv run uvic ... Up (healthy) 0.0.0.0:8004->80/tcp -dynamodb /docker-entrypoint.py --sm ... Up 10000/tcp, 22/tcp, 7000/tcp, 7001/tcp, 0.0.0.0:8000->8000/tcp, 9042/tcp, 9160/tcp, 9180/tcp -gene /bin/sh -c pipenv run uvic ... Up (healthy) 0.0.0.0:8002->80/tcp -test /bin/sh -c tail -f /dev/null Up -therapy /bin/sh -c pipenv run uvic ... Up (healthy) 0.0.0.0:8001->80/tcp -variant /bin/sh -c pipenv run uvic ... Up (healthy) 0.0.0.0:8003->80/tcp -``` - - -* You should see dynamo datastore - -If using local dynamodb: - -``` -ls -l data/dynamodb/shared-local-instance.db - --rw-r--r-- 1 xxxx yyyy 24576 Mar 24 09:21 data/dynamodb/shared-local-instance.db -``` - -If using scylladb: - -``` -ls -l data/scylla/ -total 0 -drwxr-xr-x 34 xxxx yyyy 1088 Apr 7 23:37 commitlog -drwxr-xr-x 12 xxxx yyyy 384 Apr 7 18:27 data -drwxr-xr-x 3 xxxx yyyy 96 Mar 26 07:46 hints -drwxr-xr-x 3 xxxx yyyy 96 Mar 26 07:46 view_hints -``` - -* Container /app//data is mapped to ./data in the host. After running etl you can see the data dependencies -``` -du -sh ./data/* -756M ./data/disease -681M ./data/dynamodb - 14G ./data/gene -1.5G ./data/scylla - 20G ./data/therapy - 38M ./data/variant -``` - - -* You can run high level integration "smoke-tests" - -``` -# simple smoke tests; [test_server_alive, test_swagger_ui, test_query] -docker-compose exec test sh -c "pipenv run pytest tests/integration" - -tests/integration/test_disease.py ... -tests/integration/test_gene.py ... -tests/integration/test_therapy.py ... -tests/integration/test_variant.py ... - -``` - -### Backup scylladb - -``` -# see https://docs.scylladb.com/operating-scylla/procedures/backup-restore/backup/ -# save schema -cqlsh -e "DESC SCHEMA;" > /var/lib/scylla/data/backup/db_schema.cql - -# backup keystores -cqlsh --execute="DESCRIBE keyspaces;" | python3 -c "import sys;[print(f'nodetool snapshot {keystore}') for keystore in sys.stdin.read().split() if 'system' not in keystore]; " | sh -``` - -### Generate documentation image - -Following command will create docker-compose.png - -``` -docker run --rm -it --name dcv -v $(pwd):/input pmsipilot/docker-compose-viz render -m image docker-compose.yml -``` - - -### Shutdown - -``` -docker-compose down - -# if you wish to remove any volumes -# docker-compose down -v - -``` diff --git a/biomart.png b/biomart.png new file mode 100644 index 0000000..e2f3e1f Binary files /dev/null and b/biomart.png differ diff --git a/codebuild/deploy_eb_env.py b/codebuild/deploy_eb_env.py new file mode 100644 index 0000000..ac75682 --- /dev/null +++ b/codebuild/deploy_eb_env.py @@ -0,0 +1,49 @@ +"""Module to deploy to staging EB environment.""" +import time + +import boto3 + +elasticbeanstalk = boto3.client("elasticbeanstalk") +servicecatalog = boto3.client("servicecatalog") +terminate_time = 12 +eb_app_name = "VariationNormalization" +eb_env_name = "VariationNormalization-staging-env" +sc_product_id = "prod-m4b65t5jgmcm4" +print( + f"Launching new Service Catalog Product for staging environment: " f"{eb_app_name}" +) +sc_product_artifacts = servicecatalog.list_provisioning_artifacts( + ProductId=sc_product_id +) +for artifact in sc_product_artifacts["ProvisioningArtifactDetails"]: + if artifact["Active"]: + provisioning_artifact_id = artifact["Id"] +try: + eb_provisioned_product = servicecatalog.provision_product( + ProductId=sc_product_id, + ProvisioningArtifactId=provisioning_artifact_id, + ProvisionedProductName=eb_env_name, + ProvisioningParameters=[ + {"Key": "Env", "Value": eb_app_name}, + {"Key": "EnvType", "Value": "staging"}, + {"Key": "TerminateTime", "Value": str(terminate_time)}, + ], + ) + eb_provisioned_product_id = eb_provisioned_product["RecordDetail"][ + "ProvisionedProductId" + ] + product_status = servicecatalog.describe_provisioned_product( + Id=eb_provisioned_product_id + ) + eb_provisioned_product_status = product_status["ProvisionedProductDetail"]["Status"] + while eb_provisioned_product_status == "UNDER_CHANGE": + time.sleep(10) + product_status = servicecatalog.describe_provisioned_product( + Id=eb_provisioned_product_id + ) + eb_provisioned_product_status = product_status["ProvisionedProductDetail"][ + "Status" + ] + print(eb_provisioned_product_status) +except: # noqa: E722 + print("The EB environment is already running...") diff --git a/codebuild/deploy_eb_env_dev.py b/codebuild/deploy_eb_env_dev.py new file mode 100644 index 0000000..08ae38b --- /dev/null +++ b/codebuild/deploy_eb_env_dev.py @@ -0,0 +1,48 @@ +"""Module to deploy to staging EB environment.""" +import time + +import boto3 + +elasticbeanstalk = boto3.client("elasticbeanstalk") +servicecatalog = boto3.client("servicecatalog") +terminate_time = 12 +eb_app_name = "VariationNormalization" +eb_env_name = "VariationNormalization-dev-env" +sc_product_id = "prod-m4b65t5jgmcm4" +print(f"Launching new Service Catalog Product for dev environment: " f"{eb_app_name}") +sc_product_artifacts = servicecatalog.list_provisioning_artifacts( + ProductId=sc_product_id +) +for artifact in sc_product_artifacts["ProvisioningArtifactDetails"]: + if artifact["Active"]: + provisioning_artifact_id = artifact["Id"] +try: + eb_provisioned_product = servicecatalog.provision_product( + ProductId=sc_product_id, + ProvisioningArtifactId=provisioning_artifact_id, + ProvisionedProductName=eb_env_name, + ProvisioningParameters=[ + {"Key": "Env", "Value": eb_app_name}, + {"Key": "EnvType", "Value": "dev"}, + {"Key": "TerminateTime", "Value": str(terminate_time)}, + ], + ) + eb_provisioned_product_id = eb_provisioned_product["RecordDetail"][ + "ProvisionedProductId" + ] + product_status = servicecatalog.describe_provisioned_product( + Id=eb_provisioned_product_id + ) + eb_provisioned_product_status = product_status["ProvisionedProductDetail"]["Status"] + while eb_provisioned_product_status == "UNDER_CHANGE": + time.sleep(10) + product_status = servicecatalog.describe_provisioned_product( + Id=eb_provisioned_product_id + ) + eb_provisioned_product_status = product_status["ProvisionedProductDetail"][ + "Status" + ] + print(eb_provisioned_product_status) +except Exception as e: # noqa: E722 + print(e) + print("The EB environment is already running....") diff --git a/codebuild/terminate_eb_env.py b/codebuild/terminate_eb_env.py new file mode 100644 index 0000000..4f6d39d --- /dev/null +++ b/codebuild/terminate_eb_env.py @@ -0,0 +1,32 @@ +"""Module for terminating EB staging environment.""" +import json +import time + +import boto3 + +client = boto3.client("lambda") +servicecatalog = boto3.client("servicecatalog") +eb_env_name = "VariationNormalization-staging-env" +data = {"sc_provisioned_name": eb_env_name} +client.invoke( + FunctionName="igm-inf-terminate-provisioned-product", Payload=json.dumps(data) +) +time.sleep(10) +provisioned_product = servicecatalog.describe_provisioned_product(Name=eb_env_name) +eb_provisioned_product_id = provisioned_product["ProvisionedProductDetail"]["Id"] +product_status = servicecatalog.describe_provisioned_product( + Id=eb_provisioned_product_id +) +eb_provisioned_product_status = product_status["ProvisionedProductDetail"]["Status"] +while eb_provisioned_product_status == "UNDER_CHANGE": + time.sleep(10) + try: + product_status = servicecatalog.describe_provisioned_product( + Id=eb_provisioned_product_id + ) + eb_provisioned_product_status = product_status["ProvisionedProductDetail"][ + "Status" + ] + except: # noqa: E722 + eb_provisioned_product_status = "PRODUCT NOT FOUND" + print(eb_provisioned_product_status) diff --git a/codebuild/terminate_eb_env_dev.py b/codebuild/terminate_eb_env_dev.py new file mode 100644 index 0000000..56d7178 --- /dev/null +++ b/codebuild/terminate_eb_env_dev.py @@ -0,0 +1,32 @@ +"""Module for terminating EB staging environment.""" +import json +import time + +import boto3 + +client = boto3.client("lambda") +servicecatalog = boto3.client("servicecatalog") +eb_env_name = "VariationNormalization-dev-env" +data = {"sc_provisioned_name": eb_env_name} +client.invoke( + FunctionName="igm-inf-terminate-provisioned-product", Payload=json.dumps(data) +) +time.sleep(10) +provisioned_product = servicecatalog.describe_provisioned_product(Name=eb_env_name) +eb_provisioned_product_id = provisioned_product["ProvisionedProductDetail"]["Id"] +product_status = servicecatalog.describe_provisioned_product( + Id=eb_provisioned_product_id +) +eb_provisioned_product_status = product_status["ProvisionedProductDetail"]["Status"] +while eb_provisioned_product_status == "UNDER_CHANGE": + time.sleep(10) + try: + product_status = servicecatalog.describe_provisioned_product( + Id=eb_provisioned_product_id + ) + eb_provisioned_product_status = product_status["ProvisionedProductDetail"][ + "Status" + ] + except: # noqa: E722 + eb_provisioned_product_status = "PRODUCT NOT FOUND" + print(eb_provisioned_product_status) diff --git a/cron.yaml b/cron.yaml new file mode 100644 index 0000000..675190d --- /dev/null +++ b/cron.yaml @@ -0,0 +1,5 @@ +version: 1 +cron: + - name: "task1" + url: "/scheduled" + schedule: "* * * * *" diff --git a/docker-compose.png b/docker-compose.png deleted file mode 100644 index f26ebc9..0000000 Binary files a/docker-compose.png and /dev/null differ diff --git a/docker-compose.yml b/docker-compose.yml index e990f09..03b47a7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,173 +1,24 @@ -version: '3' - +version: '3.8' services: - # A local instance of dynamodb, based on sqlite which persists data between instances - # dynamodb: - # image: amazon/dynamodb-local - # hostname: dynamodb - # container_name: dynamodb - # ports: - # # map port 8000 in the container to port 8000 in the host - # - "8000:8000" - # # tell dynamo to persist data - # command: -jar DynamoDBLocal.jar -sharedDb -dbPath /home/dynamodblocal/data/ - # # map a directory in the host OS to this container - # volumes: - # - ./data/dynamodb:/home/dynamodblocal/data - # Run scylladb in dynamodb mode to provide a performant local store. Save data to ./data/scylla - dynamodb: - image: scylladb/scylla - hostname: dynamodb - container_name: dynamodb - ports: - # map port 8000 in the container to port 8000 in the host - - "8000:8000" - # tell scylla to run in dynamodb mode, see http://scylla.docs.scylladb.com/master/alternator/alternator.html#write-isolation-policies - command: --smp 1 --memory=750M --overprovisioned 1 --alternator-port=8000 --alternator-write-isolation=only_rmw_uses_lwt - # map a directory in the host OS to this container - volumes: - - ./data/scylla:/var/lib/scylla - # The therapy service - therapy: - build: therapy-normalization - hostname: therapy - container_name: therapy - environment: - - THERAPY_NORM_DB_URL=http://dynamodb:8000 - # read from .env file - - RXNORM_API_KEY=${RXNORM_API_KEY} - - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION:-dev} - - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-foo} - - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-bar} - - DRUGBANK_USER=${DRUGBANK_USER} - - DRUGBANK_PWD=${DRUGBANK_PWD} - ports: - # map port 80 in the container to free port in the host - - "8001:80" - volumes: - - ./data/therapy:/app/therapy/data - depends_on: - - "dynamodb" - # The gene service - gene: - build: gene-normalization - hostname: gene - container_name: gene - environment: - - GENE_NORM_DB_URL=http://dynamodb:8000 - # read from .env file - - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION:-dev} - - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-foo} - - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-bar} - ports: - # map port 80 in the container to free port in the host - - "8002:80" - depends_on: - - "dynamodb" - volumes: - - ./data/gene:/app/gene/data - # The variant service - variant: - build: variant-normalization - hostname: variant - container_name: variant - environment: - # variant shares gene's db - - GENE_NORM_DB_URL=http://dynamodb:8000 - # read from .env file - - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION:-dev} - - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-foo} - - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-bar} - ports: - # map port 80 in the container to free port in the host - - "8003:80" - depends_on: - - "dynamodb" - volumes: - # The variant service has packaged data at variant/data, - # so we do not map a host directory to variant/data. - # We do share seqrepo already harvested for gene - - ./data/gene/seqrepo:/app/variant/data/seqrepo - # The disease service - disease: - build: disease-normalization - hostname: disease - container_name: disease - environment: - - DISEASE_NORM_DB_URL=http://dynamodb:8000 - # read from .env file - - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION:-dev} - - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-foo} - - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-bar} - ports: - # map port 80 in the container to free port in the host - - "8004:80" - depends_on: - - "dynamodb" - volumes: - - ./data/disease:/app/disease/data - # The neo4j service see https://github.com/grand-stack/grand-stack-starter - neo4j: - build: ./neo4j - # image: neo4j:4.2.5 - volumes: - - ./data/neo4j:/data - # wget https://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/download/4.2.5/apoc-4.2.5-all.jar - # - ./data/neo4j-plugins:/plugins - ports: - - 7474:7474 - - 7687:7687 - environment: - - NEO4J_dbms_security_procedures_unrestricted=apoc.* - - NEO4J_apoc_import_file_enabled=true - - NEO4J_apoc_export_file_enabled=true - # - NEO4J_dbms_shell_enabled=true - - NEO4J_apoc_import_file_use__neo4j__config=true - - NEO4JLABS_PLUGINS=["apoc"] - # The metakb service - metakb: - build: metakb - hostname: metakb - container_name: metakb - environment: - - GENE_NORM_DB_URL=http://dynamodb:8000 - - THERAPY_NORM_DB_URL=http://dynamodb:8000 - - DISEASE_NORM_DB_URL=http://dynamodb:8000 - # read from .env file - - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION:-dev} - - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-foo} - - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-bar} - - METAKB_DB_URL=bolt://neo4j:7687 - - METAKB_DB_USERNAME=${METAKB_DB_USERNAME} - - METAKB_DB_PASSWORD=${METAKB_DB_PASSWORD} - ports: - # map port 80 in the container to free port in the host - - "8005:80" - depends_on: - - "neo4j" - - "disease" - - "variant" - - "gene" - - "therapy" - volumes: - - ./metakb/metakb:/app/metakb - - ./data/metakb:/app/metakb/data - - ./data/therapy:/app/therapy/data - - ./data/gene:/app/gene/data - - ./data/disease:/app/disease/data - - # A generic python instance to run tests - test: - build: test - hostname: test - container_name: test - volumes: - # map tests source so we can add/modify tests w/out needing to re-build - - ./test/tests:/app/tests - depends_on: - - "disease" - - "variant" - - "gene" - - "therapy" - - \ No newline at end of file + dynamodb-local: + command: "-jar DynamoDBLocal.jar -sharedDb -dbPath ./data" + image: "amazon/dynamodb-local:latest" + container_name: dynamodb-local + ports: + - "8000:8000" + volumes: + - "./docker/dynamodb:/home/dynamodblocal/data" + working_dir: /home/dynamodblocal + app-node: + image: variationnormalization:7 + container_name: app-node + ports: + - "8080:8080" + depends_on: + - "dynamodb-local" + links: + - "dynamodb-local" + environment: + AWS_ACCESS_KEY_ID: 'DUMMYIDEXAMPLE' + AWS_SECRET_ACCESS_KEY: 'DUMMYEXAMPLEKEY' + REGION: 'eu-west-1' \ No newline at end of file diff --git a/docs/hgvs_dup_del_mode.md b/docs/hgvs_dup_del_mode.md new file mode 100644 index 0000000..f17d72f --- /dev/null +++ b/docs/hgvs_dup_del_mode.md @@ -0,0 +1,29 @@ +# HGVS Dup Del Mode + +This mode helps us interpret deletions and duplications that are represented as HGVS expressions.\ +The mode can be set to `default`, `copy_number_count`, `copy_number_change`, or `allele` + + +## Default Characteristics + +- if baseline_copies is not set and endpoints are ambiguous: + - copy_number_change + - if copy_change not provided: + - copy_change = `efo:0030067` (loss) if del, `efo:0030070` (gain) if dup +- elif baseline_copies is provided: + - copy_number_count + - copies are baseline_copies + 1 for dup, baseline_copies - 1 for del + else: + - allele + +# Notes + +- Ambiguous ranges are of the form: + - `(#_#)_(#_#)` + - `(?_#)_(#_?)` + - `(?_#)_#` + - `#_(#_?)` +- We do not normalize any ambiguous ranges +- We do not change the molecular context for ambiguous ranges. +- The `/to_vrs` endpoint uses the default mode for HGVS deletions and duplications. +- The `/normalize` endpoint uses the default mode for HGVS deletions and duplications if a mode is not set. diff --git a/dot-env-example.txt b/dot-env-example.txt deleted file mode 100644 index aee6828..0000000 --- a/dot-env-example.txt +++ /dev/null @@ -1,8 +0,0 @@ -RXNORM_API_KEY=my-key -AWS_DEFAULT_REGION=dev -AWS_ACCESS_KEY_ID=my-access-key -AWS_SECRET_ACCESS_KEY=my-secret-key -DRUGBANK_USER=user@example.com -DRUGBANK_PWD=XXXX -METAKB_DB_USERNAME=XXX -METAKB_DB_PASSWORD=XXX \ No newline at end of file diff --git a/gene.log b/gene.log new file mode 100644 index 0000000..00d46b3 --- /dev/null +++ b/gene.log @@ -0,0 +1 @@ +[2024-02-06 14:46:46,399] - botocore.credentials - INFO : Found credentials in shared credentials file: ~/.aws/credentials diff --git a/neo4j/Dockerfile b/neo4j/Dockerfile deleted file mode 100644 index 8140a07..0000000 --- a/neo4j/Dockerfile +++ /dev/null @@ -1,17 +0,0 @@ - -FROM neo4j:4.2.4 -# see https://raw.githubusercontent.com/grand-stack/grand-stack-starter/master/neo4j/Dockerfile -ENV NEO4J_AUTH=neo4j/letmein \ - APOC_VERSION=4.2.0.2 \ - GRAPHQL_VERSION=3.5.21.5 - -# RUN apt update ; apt install -y curl -# ENV APOC_URI https://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/download/${APOC_VERSION}/apoc-${APOC_VERSION}-all.jar -# RUN sh -c 'cd /var/lib/neo4j/plugins && curl -L -O "${APOC_URI}"' - -# ENV GRAPHQL_URI https://github.com/neo4j-graphql/neo4j-graphql/releases/download/${GRAPHQL_VERSION}/neo4j-graphql-${GRAPHQL_VERSION}.jar -# RUN sh -c 'cd /var/lib/neo4j/plugins && curl -L -O "${GRAPHQL_URI}"' - -EXPOSE 7474 7473 7687 - -CMD ["neo4j"] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b472aaa --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,41 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta:__legacy__" + +[tool.black] +line-length = 88 + +[tool.ruff] +# pycodestyle (E, W) +# Pyflakes (F) +# flake8-annotations (ANN) +# flake8-quotes (Q) +# pydocstyle (D) +# pep8-naming (N) +# isort (I) +select = ["E", "W", "F", "ANN", "Q", "D", "N", "I"] + +fixable = ["I", "F401"] + +# D205 - blank-line-after-summary +# D400 - ends-in-period +# D415 - ends-in-punctuation +# ANN101 - missing-type-self +# ANN003 - missing-type-kwargs +# E501 - line-too-long +ignore = ["D205", "D400", "D415", "ANN101", "ANN003", "E501"] + +[tool.ruff.flake8-quotes] +docstring-quotes = "double" + +[tool.ruff.per-file-ignores] +# ANN001 - missing-type-function-argument +# ANN2 - missing-return-type +# ANN102 - missing-type-cls +# N805 - invalid-first-argument-name-for-method +# F821 - undefined-name +# F401 - unused-import +"tests/*" = ["ANN001", "ANN2", "ANN102"] +"setup.py" = ["F821"] +"*__init__.py" = ["F401"] +"variation/schemas/*" = ["ANN201", "N805", "ANN001"] diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..edcda24 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,61 @@ +[metadata] +name = variation-normalizer +author = VICC +author_email = help@cancervariants.org +description = VICC normalization routine for variations +long_description = file:README.md +long_description_content_type = text/markdown +home-page = https://github.com/cancervariants/variation-normalization +license_files = LICENSE +license = MIT +project_urls = + Changelog = https://github.com/cancervariants/variation-normalization/releases + Source = https://github.com/cancervariants/variation-normalization + Tracker = https://github.com/cancervariants/variation-normalization/issues + +classifiers = + Development Status :: 3 - Alpha + Intended Audience :: Science/Research + Intended Audience :: Developers + Topic :: Scientific/Engineering :: Bio-Informatics + License :: OSI Approved :: MIT License + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.7 + +[options] +packages = find: +python_requires = >=3.7 +zip_safe = False +include_package_data = True + +install_requires = + biocommons.seqrepo + fastapi + uvicorn + pydantic ==2.* + ga4gh.vrs[extras] ~= 2.0.0a2 + gene-normalizer ~=0.3.0.dev1 + boto3 + cool-seq-tool ~=0.4.0.dev1 + bioutils + +tests_require = + pytest + pytest-cov + pyyaml + pytest-asyncio + +[options.extras_require] +dev = + pytest + pytest-asyncio + pytest-cov + ruff + pre-commit + jupyter + ipykernel + psycopg2-binary + black + +[tool:pytest] +addopts = --ignore setup.py --ignore=codebuild/ --doctest-modules --cov-report term-missing --cov . diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..7255c51 --- /dev/null +++ b/setup.py @@ -0,0 +1,5 @@ +"""Module for package and distribution.""" +import setuptools + +exec(open("variation/version.py").read()) +setuptools.setup(version=__version__) diff --git a/test/Dockerfile b/test/Dockerfile deleted file mode 100644 index 664d38c..0000000 --- a/test/Dockerfile +++ /dev/null @@ -1,8 +0,0 @@ -FROM python:3.7 -RUN pip install pipenv -COPY . /app -WORKDIR /app -RUN if [ ! -f "Pipfile.lock" ] ; then pipenv lock ; else echo Pipfile.lock exists ; fi -RUN pipenv sync -RUN pipenv install --dev -CMD tail -f /dev/null \ No newline at end of file diff --git a/test/Pipfile b/test/Pipfile deleted file mode 100644 index ec0c16d..0000000 --- a/test/Pipfile +++ /dev/null @@ -1,34 +0,0 @@ -[[source]] -name = "pypi" -url = "https://pypi.org/simple" -verify_ssl = true - -[dev-packages] -pytest = "*" -pre-commit = "*" -flake8 = "*" -flake8-docstrings = "*" -coverage = "*" -pytest-cov = "*" -coveralls = "*" -civicpy = "*" - -[packages] -pydantic = "*" -twine = "*" -fastapi = "*" -uvicorn = "*" -click = "*" -lxml = "*" -boto3 = "*" -cython = "*" -owlready2 = "*" -rdflib = "*" -uvloop = "*" -websockets = "*" -httptools = "*" -typing-extensions = "*" -wikibaseintegrator = "*" -ipykernel = "*" -bs4 = "*" -pyyaml = "*" diff --git a/test/tests/integration/test_disease.py b/test/tests/integration/test_disease.py deleted file mode 100644 index e0b44bc..0000000 --- a/test/tests/integration/test_disease.py +++ /dev/null @@ -1,24 +0,0 @@ -"""Simple tests to confirm setup.""" - -import requests - - -def test_server_alive(): - """Disease server should return 200.""" - response = requests.get("http://disease/disease") - assert response.status_code == 200, test_server_alive.__doc__ - - -def test_swagger_ui(): - """Disease server should return swagger UI 'FastAPI - Swagger UI'.""" - response = requests.get("http://disease/disease") - assert response.status_code == 200, test_swagger_ui.__doc__ - assert 'FastAPI - Swagger UI' in response.text, test_swagger_ui.__doc__ - - -def test_query(): - """Disease server should find `common cold`.""" - url = "http://disease/disease/normalize?q=common%20cold" - headers = {'accept': 'application/json'} - response = requests.get(url, headers=headers) - assert response.status_code == 200, test_query.__doc__ diff --git a/test/tests/integration/test_gene.py b/test/tests/integration/test_gene.py deleted file mode 100644 index 79915b1..0000000 --- a/test/tests/integration/test_gene.py +++ /dev/null @@ -1,24 +0,0 @@ -"""Simple tests to confirm setup.""" - -import requests - - -def test_server_alive(): - """Gene server should return 200.""" - response = requests.get("http://gene/gene") - assert response.status_code == 200, test_server_alive.__doc__ - - -def test_swagger_ui(): - """Gene server should return swagger UI 'FastAPI - Swagger UI'.""" - response = requests.get("http://gene/gene") - assert response.status_code == 200, test_swagger_ui.__doc__ - assert 'FastAPI - Swagger UI' in response.text, test_swagger_ui.__doc__ - - -def test_query(): - """Gene server should find `BRCA2`.""" - url = "http://gene/gene/search?q=BRCA2&keyed=true" - headers = {'accept': 'application/json'} - response = requests.get(url, headers=headers) - assert response.status_code == 200, test_query.__doc__ diff --git a/test/tests/integration/test_therapy.py b/test/tests/integration/test_therapy.py deleted file mode 100644 index a883524..0000000 --- a/test/tests/integration/test_therapy.py +++ /dev/null @@ -1,24 +0,0 @@ -"""Simple tests to confirm setup.""" - -import requests - - -def test_server_alive(): - """Therapy server should return 200.""" - response = requests.get("http://therapy/therapy") - assert response.status_code == 200, test_server_alive.__doc__ - - -def test_swagger_ui(): - """Therapy server should return swagger UI 'FastAPI - Swagger UI'.""" - response = requests.get("http://therapy/therapy") - assert response.status_code == 200, test_swagger_ui.__doc__ - assert 'FastAPI - Swagger UI' in response.text, test_swagger_ui.__doc__ - - -def test_query(): - """Therapy server should find `cisplatin`.""" - url = "http://therapy/therapy/search?q=cisplatin&keyed=true" - headers = {'accept': 'application/json'} - response = requests.get(url, headers=headers) - assert response.status_code == 200, test_query.__doc__ \ No newline at end of file diff --git a/test/tests/integration/test_variant.py b/test/tests/integration/test_variant.py deleted file mode 100644 index 794a551..0000000 --- a/test/tests/integration/test_variant.py +++ /dev/null @@ -1,24 +0,0 @@ -"""Simple tests to confirm setup.""" - -import requests - - -def test_server_alive(): - """Variant server should return 200.""" - response = requests.get("http://variant/variant") - assert response.status_code == 200, test_server_alive.__doc__ - - -def test_swagger_ui(): - """Variant server should return swagger UI 'FastAPI - Swagger UI'.""" - response = requests.get("http://variant/variant") - assert response.status_code == 200, test_swagger_ui.__doc__ - assert 'FastAPI - Swagger UI' in response.text, test_swagger_ui.__doc__ - - -def test_query(): - """Variant server should find `BRAF V600E`.""" - url = "http://variant/variant/normalize?q=BRAF%20V600E" - headers = {'accept': 'application/json'} - response = requests.get(url, headers=headers) - assert response.status_code == 200, test_query.__doc__ diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..dfe3a3f --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,4 @@ +"""Test package.""" +from pathlib import Path + +PROJECT_ROOT = Path(__file__).resolve().parents[1] diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..50a2da8 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,568 @@ +"""Create methods used throughout tests.""" +import asyncio + +import pytest +from cool_seq_tool.app import CoolSeqTool +from ga4gh.vrs import models +from gene.database.dynamodb import DynamoDbDatabase +from gene.query import QueryHandler as GeneQueryHandler + +from variation.classify import Classify +from variation.query import QueryHandler +from variation.tokenize import Tokenize +from variation.tokenizers import GeneSymbol + + +@pytest.fixture(scope="session") +def event_loop(): + """Create an instance of the default event loop for each test case.""" + loop = asyncio.get_event_loop_policy().new_event_loop() + yield loop + loop.close() + + +@pytest.fixture(scope="session") +def test_tokenizer(): + """Create test fixture for tokenizer""" + return Tokenize(GeneSymbol(GeneQueryHandler(DynamoDbDatabase()))) + + +@pytest.fixture(scope="session") +def test_classifier(): + """Create test fixture for classifier""" + return Classify() + + +@pytest.fixture(scope="session") +def test_gene_normalizer(): + """Create test fixture for gene normalizer""" + return GeneQueryHandler(DynamoDbDatabase()) + + +@pytest.fixture(scope="session") +def test_cool_seq_tool(): + """Create test fixture for cool seq tool""" + return CoolSeqTool() + + +@pytest.fixture(scope="session") +def val_params(test_cool_seq_tool, test_gene_normalizer): + """Create test fixture for validator params""" + return [ + test_cool_seq_tool.seqrepo_access, + test_cool_seq_tool.transcript_mappings, + test_cool_seq_tool.uta_db, + test_gene_normalizer, + ] + + +@pytest.fixture(scope="session") +def test_query_handler(): + """Build normalize test fixture.""" + return QueryHandler() + + +@pytest.fixture(scope="session") +def test_cnv_handler(test_query_handler): + """Create test fixture for copy number variation handler""" + return test_query_handler.to_copy_number_handler + + +@pytest.fixture(scope="session") +def braf_ncbi_seq_loc(): + """Create test fixture for BRAF ncbi priority sequence location""" + return { + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + }, + "start": 140713327, + "end": 140924929, + "id": "ga4gh:SL.uNBZoxhjhohl24VlIut-JxPJAGfJ7EQE", + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="session") +def prpf8_ncbi_seq_loc(): + """Create test fixture for PRPF8 ncbi priority sequence location""" + return { + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.dLZ15tNO1Ur0IcGjwc3Sdi_0A6Yf4zm7", + }, + "start": 1650628, + "end": 1684867, + "id": "ga4gh:SL.1i49iv3wcBq7SaOA14cs1Kz7SR6DkCw1", + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="session") +def braf_600loc(): + """Create test fixture for BRAF 600 location""" + return { + "id": "ga4gh:SL.ZA1XNKhCT_7m2UtmnYb8ZYOVS4eplMEK", + "end": 600, + "start": 599, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.cQvw4UsHHRRlogxbWCB8W-mKD4AraM9y", + }, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="session") +def braf_v600e(braf_600loc): + """Create BRAF V600E protein test fixture.""" + params = { + "id": "ga4gh:VA.4XBXAxSAk-WyAu5H0S1-plrk_SCTW1PO", + "location": braf_600loc, + "state": {"sequence": "E", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="session") +def vhl_reference_agree(): + """Create NP_000542.1:p.Pro61 fixture.""" + params = { + "id": "ga4gh:VA.RMmwTvhrPVwfMZ6knsf5zMWQn_F1ukYh", + "location": { + "id": "ga4gh:SL.8TZYB8Oqqn93q07zrsNhvRW1JjNpaQXc", + "end": 61, + "start": 60, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.z-Oa0pZkJ6GHJHOYM7h5mY_umc0SJzTu", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "P", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="session") +def protein_insertion(): + """Create test fixture for NP protein insertion.""" + params = { + "id": "ga4gh:VA.AOCCh_BU5wKkdgoDNqkORF_x4GQwWh1T", + "location": { + "id": "ga4gh:SL.ciWb1ylkqUxiviU1djijiuYVZcgsnQnV", + "end": 770, + "start": 770, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.vyo55F6mA6n2LgN4cagcdRzOuh38V4mE", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "GL", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="session") +def protein_deletion_np_range(): + """Create test fixture for protein deletion using NP accession and + range for deletion. + """ + params = { + "id": "ga4gh:VA.3Rk_RElDfX820edkQOHsTTYRogr0EMEY", + "location": { + "id": "ga4gh:SL.kOTzy0aLlw0yqnmf29Zk8wh65zHQwere", + "end": 759, + "start": 754, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.AF1UFydIo02-bMplonKSfxlWY2q6ze3m", + }, + "type": "SequenceLocation", + }, + "state": { + "length": 0, + "repeatSubunitLength": 5, + "sequence": "", + "type": "ReferenceLengthExpression", + }, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="session") +def braf_v600e_genomic_sub(): + """Create test fixture for NC_000007.14:g.140753336A>T""" + params = { + "id": "ga4gh:VA.LX3ooHBAiZdKY4RfTXcliUmkj48mnD_M", + "location": { + "id": "ga4gh:SL.XutGzMvqbzN-vnxmPt2MJf7ehxmB0opi", + "end": 140753336, + "start": 140753335, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "T", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="session") +def genomic_dup1_seq_loc_normalized(): + """Create test fixture containing genomic dup1 sequence location normalized""" + return { + "id": "ga4gh:SL.f0nAiaxOC3rPToQEYRRhbVBNO6HKutyc", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", + }, + "start": 49531260, + "end": 49531262, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="session") +def genomic_dup1_seq_loc_not_normalized(): + """Create test fixture containing genomic dup1 sequence location that was + normalized + """ + return { + "id": "ga4gh:SL.y4-cVA2VxMCDxb9gV2oFrzC386yrEVqh", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", + }, + "start": 49531261, + "end": 49531262, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="session") +def genomic_dup1_38_cn(genomic_dup1_seq_loc_not_normalized): + """Create test fixture for copy number count dup1 on GRCh38""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.07iM14yvZ80N_AiaM7G_V4f1pCkmFYz4", + "location": genomic_dup1_seq_loc_not_normalized, + "copies": 3, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="session") +def genomic_dup2_seq_loc_normalized(): + """Create genomic dup2 sequence location""" + return { + "id": "ga4gh:SL.rVXa8TXm6WTEw-_Lom6A347Q45SB7CON", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + }, + "start": 33211289, + "end": 33211293, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="session") +def genomic_dup2_38_cn(genomic_dup2_seq_loc_normalized): + """Create test fixture for copy number count dup2 on GRCh38""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.C8WuNCba5AN1RoXK1enXgALlM1Qz6X6i", + "location": genomic_dup2_seq_loc_normalized, + "copies": 3, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="session") +def genomic_del3_dup3_loc_not_normalized(): + """Create genomic del3 dup3 sequence location""" + return { + "id": "ga4gh:SL.-zCp7JBaKQ0niPDueJkuCgQhRIQ50hKw", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + }, + "start": [31060226, 31100350], + "end": [33274278, 33417151], + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="session") +def genomic_dup4_loc(): + """Create genomic dup4 sequence location""" + return { + "id": "ga4gh:SL.o8sCaAaW2a2f_HsNBTsHOCnWRvIyru0y", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.-A1QmD_MatoqxvgVxBLZTONHz9-c7nQo", + }, + "start": [None, 30417575], + "end": [31394018, None], + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="session") +def genomic_dup5_loc(): + """Create genomic dup5 sequence location""" + return { + "id": "ga4gh:SL.O__pyYq_u7R__2NUbI3koxxkeCBL7WXq", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + }, + "start": [None, 154021811], + "end": 154092209, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="session") +def genomic_dup6_loc(): + """Create genomic dup6 sequence location""" + return { + "id": "ga4gh:SL.Ls2wfxI-2V2OdMY5HHttwlSwgbpNf_j2", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + }, + "start": 154021811, + "end": [154092209, None], + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="session") +def genomic_del1_seq_loc(): + """Create genomic del1 sequence location""" + return { + "id": "ga4gh:SL.zMba5wGtQWQmdFd70yEqMYszGoRaYX25", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", + }, + "start": 10149810, + "end": 10149811, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="session") +def genomic_del1_lse(genomic_del1_seq_loc): + """Create a test fixture for genomic del LSE.""" + params = { + "type": "Allele", + "id": "ga4gh:VA.gztc0BFS6p5V1_QVnEYIJ6DwzZQeDCd2", + "location": genomic_del1_seq_loc, + "state": { + "length": 0, + "repeatSubunitLength": 1, + "type": "ReferenceLengthExpression", + "sequence": "", + }, + } + return models.Allele(**params) + + +@pytest.fixture(scope="session") +def genomic_del1_38_cn(genomic_del1_seq_loc): + """Create test fixture for copy number count del1 on GRCh38""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.wRj3ZKNriLtPDVj0VlPaTCQfklj2ocGU", + "location": genomic_del1_seq_loc, + "copies": 1, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="session") +def genomic_del2_seq_loc(): + """Create genomic del2 sequence location""" + return { + "id": "ga4gh:SL.usVkXRvjfX0cEXLvP87Oi8eJJGyizjQF", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", + }, + "start": 10146594, + "end": 10146613, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="session") +def genomic_del2_lse(genomic_del2_seq_loc): + """Create a test fixture for genomic del LSE.""" + params = { + "type": "Allele", + "id": "ga4gh:VA.9NmH0sRYerurt-CE6WlF9UaxZiujByIE", + "location": genomic_del2_seq_loc, + "state": { + "type": "ReferenceLengthExpression", + "sequence": "", + "length": 0, + "repeatSubunitLength": 19, + }, + } + return models.Allele(**params) + + +@pytest.fixture(scope="session") +def genomic_del2_38_cn(genomic_del2_seq_loc): + """Create test fixture for copy number count del1 on GRCh38""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.i7HRf9gge1HJKzazgvtinosa0bE3gHJu", + "location": genomic_del2_seq_loc, + "copies": 1, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="session") +def genomic_del4_seq_loc(): + """Create genomic del4 sequence location""" + return { + "id": "ga4gh:SL.bWbNmdT__ptImBwTAIYdyNfazhwvEtXD", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + }, + "start": [None, 31120495], + "end": [33339477, None], + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="session") +def genomic_del5_seq_loc(): + """Create genomic del5 sequence location""" + return { + "id": "ga4gh:SL.WDxMzftZLrwp2eQJrlasKuY4ns99wG0v", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + }, + "start": [None, 18575353], + "end": 18653629, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="session") +def genomic_del6_seq_loc(): + """Create genomic del6 sequence location""" + return { + "id": "ga4gh:SL.TKIwU5OzGgOWIpnzAHfkCLB7vrKupKhD", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.0iKlIQk2oZLoeOG9P1riRU6hvL5Ux8TV", + }, + "start": 133462763, + "end": [133464858, None], + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="session") +def grch38_genomic_insertion_seq_loc(): + """Create test fixture for GRCh38 genomic insertion seq location""" + return { + "id": "ga4gh:SL.oVzSkGhh3QJ0FAgihm-kNr9CJbF_7Ln2", + "end": 39724743, + "start": 39724731, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.dLZ15tNO1Ur0IcGjwc3Sdi_0A6Yf4zm7", + }, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="session") +def grch38_genomic_insertion_variation(grch38_genomic_insertion_seq_loc): + """Create a test fixture for NC_000017.10:g.37880993_37880994insGCTTACGTGATG""" + params = { + "id": "ga4gh:VA.eorsJMgis9uDdRRPVd3srYofdPaM_xn2", + "location": grch38_genomic_insertion_seq_loc, + "state": { + "length": 24, + "repeatSubunitLength": 12, + "sequence": "TACGTGATGGCTTACGTGATGGCT", + "type": "ReferenceLengthExpression", + }, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="session") +def braf_amplification(braf_ncbi_seq_loc): + """Create test fixture for BRAF Amplification""" + params = { + "id": "ga4gh:CX.89PECTeQjhhXnNW9yg24DheWOQMgmKk2", + "location": braf_ncbi_seq_loc, + "copyChange": "efo:0030072", + "type": "CopyNumberChange", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="session") +def prpf8_amplification(prpf8_ncbi_seq_loc): + """Create test fixture for PRPF8 Amplification""" + params = { + "id": "ga4gh:CX.KH_rYvTqg5Hq0ysqbrh8JR20oeLYa7bk", + "location": prpf8_ncbi_seq_loc, + "copyChange": "efo:0030072", + "type": "CopyNumberChange", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del3_dup3_cn_38(genomic_del3_dup3_loc_not_normalized): + """Create test fixture copy number variation for del/dup 3 on GRCh38""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.rPsK0krAHgmXhDZEw4fqymR0iDQa3UCJ", + "location": genomic_del3_dup3_loc_not_normalized, + "copies": 2, + } + return models.CopyNumberCount(**params) + + +def assertion_checks(normalize_response, test_variation): + """Check that normalize_response and test_variation are equal.""" + actual = normalize_response.variation.model_dump(exclude_none=True) + expected = test_variation.model_dump(exclude_none=True) + assert actual == expected, "variation" + + +def cnv_assertion_checks(resp, test_fixture): + """Check that actual response for to copy number matches expected""" + try: + getattr(resp, "copy_number_count") + except AttributeError: + actual = resp.copy_number_change.model_dump(exclude_none=True) + else: + actual = resp.copy_number_count.model_dump(exclude_none=True) + expected = test_fixture.model_dump(exclude_none=True) + assert actual == expected + assert resp.warnings == [] diff --git a/tests/fixtures/classifiers.yml b/tests/fixtures/classifiers.yml new file mode 100644 index 0000000..55ab028 --- /dev/null +++ b/tests/fixtures/classifiers.yml @@ -0,0 +1,230 @@ +protein_substitution: + should_match: + - query: BRAF V600E + - query: braf V600E + - query: BRAF (V600E) + - query: NRAS G13V + - query: NP_004324.2:p.Val600Glu + - query: NP_065681.1:p.Met918Thr + should_not_match: + - query: BRAFV600E + - query: v600z + - query: braf v600e + - query: V600E BRAF + - query: BRAF V600E foo + - query: BRAF + - query: V600E + - query: (V600E) + - query: NP_065681.1:c.Met918Thr + +protein_stop_gain: + should_match: + - query: ENSP00000343204.4:p.Trp690Ter + should_not_match: + - query: ENS00000343204.4:c.Trp690Ter + +protein_reference_agree: + should_match: + - query: NP_000213.1:p.Leu862= + should_not_match: + - query: Leu862== + - query: NP_000213.1:p.Leu862==" + +cdna_substitution: + should_match: + - query: NM_000551.3:c.292T>C + - query: BRAF V600E c.23T>A + should_not_match: + - query: V170 (c.509F>A) + - query: RX_:g.292TC + - query: V170D (c.509T>A) + - query: NM_000551.3:c.292TC + - query: foo Y98H (c.292T>C) + - query: LRG_199t1:c.54G>H + +genomic_substitution: + should_match: + - query: NC_000017.10:g.292T>C + - query: BRAF V600E g.23T>A + - query: 7-292-A-C + - query: chrX-292-A-T + - query: chromosome10-292-G-A + - query: 5-112175770-GGAA-AGAA + should_not_match: + - query: V170 (g.509F>A) + - query: RX_:c.292TC + - query: V170D (g.509T>A) + - query: NC_000017.10:g.292TC + - query: foo Y98H (g.292T>C) + +cdna_reference_agree: + should_match: + - query: NM_004006.2:c.123= + should_not_match: + - query: CODING_DNA_:c.123= + - query: g.123= + - query: foo VHL c.123= + +genomic_reference_agree: + should_match: + - query: NC_000017.10:g.123= + - query: chr11-252-t-t + - query: 5-1295250-GAGG-GAGG + should_not_match: + - query: GENOMIC_:g.123= + - query: c.123= + - query: foo VHL g.123= + +protein_delins: + should_match: + - query: NP_001333827.1:p.Leu747_Thr751delinsPro + - query: NP_001333827.1:p.Leu747delinsProArg + - query: NP_005219.2:p.Glu746_Thr751delinsValAla + - query: NP_005219.2:p.G776delinsVC + should_not_match: + - query: N:p.Leu747_Thr751delinsPro + - query: EGFR E709_G719delins11 + +cdna_delins: + should_match: + - query: NM_005157.6:c.1423_1424delinsGT + - query: ENST00000277541.6:c.7330delinsACA + - query: NM_000551.3:c.615delinsAA + - query: ENST00000257290.5:c.2524_2525delinsAT + should_not_match: + - query: N_005157.6:g.1423_1424delinsGT + - query: c.1423delinsX + - query: LRG_199t1:c.79_80delinsTT + - query: LRG_199:c.79_80delinsTT + - query: NM_000797.3:c.812_829delins908_925 + - query: foo c.131_234delinsA + - query: foo NM_005157.6:c.1423_1424delinsGT + +genomic_delins: + should_match: + - query: NC_000017.10:g.1423_1424delinsGT + - query: NC_000017.10:g.7330delinsACA + - query: NC_000003.12:g.10149938delinsAA + - query: 3-37050340-AAAAGCTTTA-GAGGCTTT + - query: 16-68846036-AG-TGAGTTT + - query: X-70350063-AG-AGGCAGCGCATAAAGCGCATTCTCCG + - query: Y-1313-ATTGAC-a + - query: chr1-2-ca-C + - query: 1-55509715-AC-A + - query: chr17-131543-G-GA + - query: 20-14223252-T-TATGCATG + should_not_match: + - query: N_000017.10:c.1423_1424delinsGT + - query: g.1423delinsX + - query: NC_000017.10:g.812_829delins908_925 + - query: foo g.131_234delinsA + - query: foo NC_000017.10:g.1423_1424delinsGT + +protein_deletion: + should_match: + - query: NP_004439.2:p.Leu755_Thr759del + - query: NP_000213.1:p.Val560del + - query: NP_000213.1:p.Lys550_Lys558del + - query: KIT D419del + - query: KIT E554_V559del + - query: CTNNB1 Y30_I35del + - query: ENSP00000256474.2:p.Phe76del + - query: EGFR L747_T751delLREAT + should_not_match: + - query: fakegene g.Leu755_Thr759delLeu + - query: GENE c.L755del + - query: LRG_199p1:p.Val7del + - query: LRG_199p1:p.(Val7del) + - query: NP_004439.2:c.Leu755_Thr759del + - query: NP_003997.1:p.(Lys23_Val25del + +cdna_deletion: + should_match: + - query: ENST00000269571.5:c.2263_2277del + - query: NM_004448.3:c.2263_2277delTTGAGGGAAAACACA + - query: NM_000535.6:c.2117delA + - query: ENST00000256474.2:c.163delG + - query: MLH1 c.1852_1854delAAG + should_not_match: + - query: GENE c.1799_1800delTGinsAT + - query: GENE c.2524_2525delinsAT + - query: NM_004333.4:c.1799_1800delTGinsAT + +genomic_deletion: + should_match: + - query: NC_000017.10:g.37880219_37880233del + - query: NC_000004.11:g.55593610_55593615delTTGTTG + - query: NC_000003.11:g.10183645del + - query: NC_000003.11:g.10188302delG + should_not_match: + - query: GENE g.152419920_152419921delinsAG + - query: GENE g.152419920_152419921delAinsG + +protein_insertion: + should_match: + - query: NP_005219.2:p.Cys770_Gly771insGlyLeu + - query: NP_001333827.1:p.Ala763_Tyr764insPheGlnGluAla + - query: BRAF T599_V600insV + - query: EGFR A763_Y764insFQEA + should_not_match: + - query: GENE p.Lys23insAsp + - query: GENE Lys23insAsp + - query: GENE p.His4_Gln5insAlaG + - query: ACCESSION_23042.2:p.His4_Gln5insAla + +cdna_insertion: + should_match: + - query: NM_000551.3:c.230_231insTCT + - query: NM_000551.3:c.358_359insAC + should_not_match: + - query: GENE 358_359insAC + - query: accession:c.358_359insAC + - query: NM_004006.2:c.849_850ins858_895 + - query: NM_000551.3:c.358_359insAC foo + +genomic_insertion: + should_match: + - query: NC_000023.10:g.32867861_32867862insT + - query: NC_000023.10:g.32862923_32862924insCCT + - query: NC_000009.11:g.5070053_5070054insG + should_not_match: + - query: GENE 32867861_32867862insT + - query: accession:g.32867861_32867862insT + - query: NC_000023.10:g.32867861_32867862insT foo + +genomic_deletion_ambiguous: + should_match: + - query: NC_000023.11:g.(?_31120496)_(33339477_?)del + - query: NC_000023.11:g.(?_155980375)_(156013167_?)del + - query: NC_000023.11:g.(31060227_31100351)_(33274278_33417151)del + - query: BRAF g.(31060227_31100351)_(33274278_33417151)del + should_not_match: + - query: GENE (?_155980375)_(156013167_?)del + - query: accession:g.(?_155980375)_(156013167_?)del + - query: NC_000023.11:g.(?_155980375)_(156013167_?)del foo + - query: GENE (?_31120496)_(33339477_?)del + +genomic_duplication: + should_match: + - query: NC_000003.12:g.49531262dup + - query: NC_000016.10:g.2087938_2087948dup + - query: BRAF g.2087938_2087948dup + should_not_match: + - query: foo (?_30417576)_(31394018_?)del + - query: Accession:g.49531262dup + +genomic_duplication_ambiguous: + should_match: + - query: NC_000020.11:g.(?_30417576)_(31394018_?)dup + - query: NC_000023.11:g.(31060227_31100351)_(33274278_33417151)dup + - query: NC_000023.11:g.(?_154021812)_154092209dup + should_not_match: + - query: GENE (?_31120496)_(33339477_?)dup + - query: NC_000023.11:g.(31060227_33274278)_(31100351_33417151) + +amplification: + should_match: + - query: BRAF amplification + - query: braf AMPLIFICATION + should_not_match: + - query: gene amplification diff --git a/tests/fixtures/tokenizers.yml b/tests/fixtures/tokenizers.yml new file mode 100644 index 0000000..f66df43 --- /dev/null +++ b/tests/fixtures/tokenizers.yml @@ -0,0 +1,363 @@ +gene: + should_match: + - token: flt3 + - token: abl + - token: abl1 + - token: HGNC:1097 + - token: ncbigene:673 + - token: ensembl:ENSG00000157764 + should_not_match: + - token: not-a-gene + +protein_substitution: + should_match: + - token: V600E + - token: Arg54Ser + - token: p.Met918Thr + - token: p.Asp842Val + - token: p.(Arg54Ser) + - token: p.Arg54Ser + - token: p.Val600Glu + - token: p.Trp24Cys + - token: p.(Trp24Cys) + - token: p.Ala146Val + - token: V0E + - token: (p.TrpVal24CysArg) + should_not_match: + - token: foo + - token: p.(Arg54Ser + - token: p.(Z600B) + - token: p.(Gly56Ala^Ser^Cys) + - token: NP_004324.2:c.Val600Glu + - token: NM_004985.4:c.437C>T + - token: BRAF V600E foo + - token: NP_004324.2:p.Val600Glu + - token: NP_006197.1:p.Asp842Val + - token: NP_ + - token: NP_:p. + - token: c.Met918Thr + +protein_stop_gain: + should_match: + - token: Tyr365Ter + - token: (p.Tyr365*) + - token: (Tyr365Ter) + - token: p.Tyr365Ter + - token: p.(Tyr365Ter) + - token: Tyr0Ter + should_not_match: + - token: BRAF V600E + - token: p.Tyr365Terr + - token: NP_060842.3:p.Tyr365Terr + - token: NP_060842.3:p.Tyr365Ter (p.Tyr365Tyr) + - token: (p.Tyr365Tyr) + +protein_reference_agree: + should_match: + - token: p.Cys188= + - token: Cys188= + - token: p.Glu55= + - token: p.(G55=) + - token: Glu55= + - token: Glu0= + should_not_match: + - token: c.Cys188= + - token: p.Leu862== + - token: p.Xyz223= + +cdna_reference_agree: + should_match: + - token: c.123= + - token: (c.123=) + should_not_match: + - token: c.292T>C + - token: g.292T>C + - token: g.123 + - token: (c.123= + - token: c.123=) + - token: c.123 + - token: c.123== + +genomic_reference_agree: + should_match: + - token: g.123= + - token: (g.123=) + should_not_match: + - token: c.292T>C + - token: c.292T>C + - token: c.123 + - token: (g.123= + - token: g.123=) + - token: g.123 + - token: g.123== + +hgvs: + should_match: + - token: NC_000007.13:g.36561662C>T + - token: NM_01234.5:c.22+1A>T + - token: NP_000918.2:p.Ile1145= + should_not_match: + - token: NP004324.2 + - token: LRGp1 + - token: ERBB2:c.2326_2327delinsCT + - token: LRG_199t1:c.54G>H + +cdna_substitution: + should_match: + - token: (c.292T>C) + - token: c.292T>C + - token: (c.233A>G) + - token: c.509T>A + - token: c.54G>N + should_not_match: + - token: (c.292T>C + - token: g.292T>C + - token: c.292TC + - token: c.j324T + - token: c.509>A + - token: c.T>A + - token: c.54G>H + +genomic_substitution: + should_match: + - token: (g.292T>C) + - token: g.292T>C + - token: (g.233A>G) + - token: g.509T>A + - token: g.54G>N + should_not_match: + - token: (g.292T>C + - token: c.292T>C + - token: g.292TC + - token: g.j324T + - token: g.509>A + - token: g.T>A + - token: g.54G>H + +protein_delins: + should_match: + - token: p.Cys28delinsTrpVal + - token: Cys28delinsTrpVal + - token: p.Cys28_Lys29delinsTrp + - token: p.(Pro578_Lys579delinsLeuTer) + - token: p.Leu747_Thr751delinsPro + - token: p.Glu746_Thr751delinsValAla + - token: G776delinsVC + should_not_match: + - token: p.Cys28delinsTrpVals + - token: p.Cys28delinsTrpValSup + - token: p.Cys28Lys29delinsTrp + - token: p._Lys29delinsTrp + - token: p.Cys28_delinsTrp + - token: p.Cys28_Lys29delinsTrpPrt + - token: p.12314delinsArg + - token: 32386323delinsGA + - token: Cys28delins + - token: delinsTrpVals + - token: Glu776delinsVC + +cdna_delins: + should_match: + - token: c.32386323delinsGA + - token: c.6775_6777delinsC + - token: c.145_147delinsTGG + - token: c.9002_9009delinsTTT + - token: c.850_901delinsTTCCTCGATGCCTG + # - token: c.42522624_42522669delins42536337_42536382 + # - token: c.812_829delins908_925 + - token: (c.301_302delinsGG) + - token: c.615delinsAA + should_not_match: + - token: 32386323delinsGA + - token: c.145_147delinsTGGS + - token: c.145_147delTGG + - token: g.32386323delinsGA + - token: NM_000797.3:c.812_829delins908_ + - token: c.42522624_42522669delins_42536382 + - token: c.delinsGA + - token: c.32386323delins + - token: (c.301_302delinsGG + - token: c.delins + - token: delins + - token: c._147delinsTGG + - token: c.145_delinsTGG + - token: c.delinsTGG + - token: c.d_delinsTG + +genomic_delins: + should_match: + - token: g.32386323delinsGA + - token: g.6775_6777delinsC + - token: g.145_147delinsTGG + - token: g.9002_9009delinsTTT + - token: g.850_901delinsTTCCTCGATGCCTG + - token: (g.301_302delinsGG) + - token: g.10149938delinsAA + - token: g.10149938delinsAAN + should_not_match: + - token: 32386323delinsGA + - token: g.145_147delinsTGGS + - token: g.145_147delTGG + - token: c.32386323delinsGA + - token: NM_000797.3:g.812_829delins908_ + - token: g.42522624_42522669delins_42536382 + - token: g.delinsGA + - token: g.32386323delins + - token: (g.301_302delinsGG + - token: g.delins + - token: delins + - token: g._147delinsTGG + - token: g.145_delinsTGG + - token: g.delinsTGG + - token: g.d_delinsTG + +protein_deletion: + should_match: + - token: p.Val7del + - token: Val7del + - token: p.(Val7del) + - token: (Val7del) + - token: p.V7del + - token: V7del + - token: p.(V7del) + - token: (V7del) + - token: Lys23_Val25del + - token: p.Lys23_Val25del + - token: p.(Lys23_Val25del) + - token: (Lys23_Val25del) + - token: p.Phe76del + - token: L747_T751delLREAT + - token: p.L747_T751delLREAT + should_not_match: + - token: Val7deletion + - token: (Val7deletion) + - token: p.Val7deletion + - token: c.Val7del + - token: ValSup7del + - token: delVal7 + - token: 7Valdel + - token: Val7_Glydel + - token: Val_Gly7del + - token: p.(V7del + - token: p.V7del) + +cdna_deletion: + should_match: + - token: c.2263_2277del + - token: c.1608del + - token: c.2263_2277delTTGAGGGAAAACACA + - token: c.163delG + - token: c.435_436delGC + should_not_match: + - token: g.2263_2277del + - token: g.1608del + - token: c.del + - token: 2263_2277del + - token: c._2277del + - token: c.2263_del + - token: c.2263 + +genomic_deletion: + should_match: + - token: g.37880219_37880233del + - token: g.139390650del + - token: g.55593610_55593615delTTGTTG + - token: g.10191495delT + should_not_match: + - token: c.37880219_37880233del + - token: c.139390650del + - token: g.del + - token: 37880219_37880233del + - token: g._37880233del + - token: g._37880233del + - token: g.37880219 + +protein_insertion: + should_match: + - token: p.(Lys23_Leu24insArgSerGln) + - token: p.His4_Gln5insAla + - token: p.Lys2_Gly3insGlnSerLys + - token: p.(Met3_His4insGlyTer) + - token: T599_V600insV + - token: A763_Y764insFQEA + should_not_match: + - token: p.Lys23insAsp + - token: Lys23insAsp + - token: p.His4_Gln5insAlaG + +cdna_insertion: + should_match: + - token: c.169_170insA + - token: c.240_241insAGG + should_not_match: + - token: c.849_850ins858_895 + - token: p.His4_Gln5insAla + - token: c.His4_Gln5insAla + - token: 849_850ins858_895 + - token: p.849_850ins858_ + - token: p.849_850ins_895 + - token: p.169_insA + - token: p._170insA + - token: p.849_850ins858_895 + +genomic_insertion: + should_match: + - token: g.123_124insAGC + - token: g.32867861_32867862insT + - token: g.37881011_37881012insGGCTCCCCA + - token: g.7572948_7572949insN + should_not_match: + - token: g.123_124insAGCX + - token: c.169_170insAla + - token: g.32867861_32867862delinsT + - token: 123_124insAGCX + +genomic_duplication: + should_match: + - token: g.2087938_2087948dup + - token: g.49531262dup + should_not_match: + - token: 49531262dup + - token: dupdup + - token: g.(?_30417576)_(31394018_?)dup + - token: g.(31060227_31100351)_(33274278_33417151)dup + - token: g.(?_154021812)_154092209dup + +genomic_duplication_ambiguous: + should_match: + - token: g.(?_30417576)_(31394018_?)dup + - token: g.(31060227_31100351)_(33274278_33417151)dup + - token: g.(?_154021812)_154092209dup + should_not_match: + - token: g.2087938_2087948dup + - token: g.49531262dup + - token: g.(?_31394018)_(_30417576_?)dup + +genomic_deletion_ambiguous: + should_match: + - token: g.(?_31120496)_(33339477_?)del + - token: g.(?_155980375)_(156013167_?)del + - token: g.(?_18575354)_18653629del + - token: g.133462764_(133464858_?)del + - token: g.(31060227_31100351)_(33274278_33417151)del + - token: g.(31120496_?)_(?_33339477)del + should_not_match: + - token: c.(?_169)_(170_?)del + - token: g.(?_31120496)_(33339477_?)delins + - token: (?_31120496)_(33339477_?)del + - token: g.(x_31100351)_(33274278_33417151)del + +amplification: + should_match: + - token: Amplification + - token: amplification + - token: AMPLIFICATION + should_not_match: + - token: amplificatoin + - token: amp diff --git a/tests/fixtures/translators.yml b/tests/fixtures/translators.yml new file mode 100644 index 0000000..802a244 --- /dev/null +++ b/tests/fixtures/translators.yml @@ -0,0 +1,1485 @@ +protein_substitution: + tests: + - query: BRAF V600E + variations: + [ + { + "id": "ga4gh:VA.PJu8CCaVzEyqXMAEcMNegyDWyvT_jzNn", + "location": + { + "id": "ga4gh:SL.EpHaD2ygDuPMvyURI9L4yetEwF3W0G7G", + "end": 600, + "start": 599, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.ZJwurRo2HLY018wghYjDKSfIlEH0Y8At", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "E", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + { + "id": "ga4gh:VA.4XBXAxSAk-WyAu5H0S1-plrk_SCTW1PO", + "location": + { + "id": "ga4gh:SL.ZA1XNKhCT_7m2UtmnYb8ZYOVS4eplMEK", + "end": 600, + "start": 599, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.cQvw4UsHHRRlogxbWCB8W-mKD4AraM9y", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "E", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + { + "id": "ga4gh:VA.c-oRhbu7nDrBrSW2fPbFlDM15V6jiaho", + "location": + { + "id": "ga4gh:SL.gkevJbLNOScKXhxhzOZXiG3hW8zeyo-q", + "start": 599, + "end": 600, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.lKdPZpuT-VNvRuKDjsUItNgutfWYgWQd", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "E", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + { + "id": "ga4gh:VA.3ex0cvKXjHbq8NLuitOAfVwSPzqZUFrR", + "location": + { + "id": "ga4gh:SL.Q4MXez2kHFPQqGJKLP8quVHAskuCrOAA", + "start": 599, + "end": 600, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.0Q-SgJX1V3seUUIu3qVUtEa55CQsGmEU", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "E", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + - query: NP_004324.2:p.Val600Glu + variations: + [ + { + "id": "ga4gh:VA.4XBXAxSAk-WyAu5H0S1-plrk_SCTW1PO", + "location": + { + "id": "ga4gh:SL.ZA1XNKhCT_7m2UtmnYb8ZYOVS4eplMEK", + "end": 600, + "start": 599, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.cQvw4UsHHRRlogxbWCB8W-mKD4AraM9y", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "E", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + +protein_stop_gain: + tests: + - query: NP_000542.1:p.Tyr185Ter + variations: + [ + { + "id": "ga4gh:VA.KsGULBqRCUFNA89_9LErBWStMsBIXvlt", + "location": + { + "id": "ga4gh:SL.1qfXpIQd0Z4bAIpanqdhGpXmFd8_-Hb9", + "start": 184, + "end": 185, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.z-Oa0pZkJ6GHJHOYM7h5mY_umc0SJzTu", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "*", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + - query: NP_000542.1:p.Y185* + variations: + [ + { + "id": "ga4gh:VA.KsGULBqRCUFNA89_9LErBWStMsBIXvlt", + "location": + { + "id": "ga4gh:SL.1qfXpIQd0Z4bAIpanqdhGpXmFd8_-Hb9", + "start": 184, + "end": 185, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.z-Oa0pZkJ6GHJHOYM7h5mY_umc0SJzTu", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "*", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + - query: NP_000542.1:p.(Tyr185Ter) + variations: + [ + { + "id": "ga4gh:VA.KsGULBqRCUFNA89_9LErBWStMsBIXvlt", + "location": + { + "id": "ga4gh:SL.1qfXpIQd0Z4bAIpanqdhGpXmFd8_-Hb9", + "start": 184, + "end": 185, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.z-Oa0pZkJ6GHJHOYM7h5mY_umc0SJzTu", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "*", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + - query: NP_000539.2:p.Gln1178Ter + variations: + [ + { + "id": "ga4gh:VA.sGxJmxbzTpw6qqz2Kij3aXSZAPOge_G8", + "location": + { + "id": "ga4gh:SL.oGuWLVOr5Rdgradq5GJpMXEfcU7JQMDd", + "start": 1177, + "end": 1178, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.OBQRUmiVewAVPYXV5ACyxczHF1Q4YGOm", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "*", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + +protein_reference_agree: + tests: + - query: NP_000542.1:p.Pro154= + variations: + [ + { + "id": "ga4gh:VA.bBwytVniGA1kDz3eIh99tpaPR_RcJnEC", + "location": + { + "id": "ga4gh:SL.4jSgOyI22QGrwXaC3MaGeqF7A4JAg2-r", + "start": 153, + "end": 154, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.z-Oa0pZkJ6GHJHOYM7h5mY_umc0SJzTu", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "P", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + - query: NP_000542.1:p.Pro61= + variations: + [ + { + "id": "ga4gh:VA.RMmwTvhrPVwfMZ6knsf5zMWQn_F1ukYh", + "location": + { + "id": "ga4gh:SL.8TZYB8Oqqn93q07zrsNhvRW1JjNpaQXc", + "start": 60, + "end": 61, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.z-Oa0pZkJ6GHJHOYM7h5mY_umc0SJzTu", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "P", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + - query: NP_000542.1:p.Glu55= + variations: + [ + { + "id": "ga4gh:VA.149Sy4H4lHbL_l3Gy1G2z5hghz_1JNI2", + "location": + { + "id": "ga4gh:SL.-kQnuBxB3QrbbZBSTAoAbz0azJT821Sk", + "start": 54, + "end": 55, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.z-Oa0pZkJ6GHJHOYM7h5mY_umc0SJzTu", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "E", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + +cdna_substitution: + tests: + - query: NM_004333.4:c.1799T>A + variations: + [ + { + "id": "ga4gh:VA.toS6q-Htpv6CpLPtbSo9E_zi8E7jFLqL", + "location": + { + "id": "ga4gh:SL.brDsydg4aD2TZqwqc3wZWKxWRx144A89", + "start": 1859, + "end": 1860, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.jkiXxxRjK7uTMiW2KQFjpgvF3VQi-HhX", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "A", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + - query: ENST00000288602.10:c.1799T>A + variations: + [ + { + "id": "ga4gh:VA.u4IphDRcnedzHjNGRx_yFJGD3JtOc_db", + "location": + { + "id": "ga4gh:SL.jRkp8fqzOE6Z1az_hmeDvmhzKVbCL6j-", + "start": 1859, + "end": 1860, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.VTW7KhA6-0s3_nxgkGq05eUGiDFnItW0", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "A", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + - query: NM_005502.4:c.900A>G + variations: [] + +genomic_substitution: + tests: + - query: NC_000007.13:g.140453136A>T + variations: + [ + { + "id": "ga4gh:VA.ztz4yxckrW1j7YFSprOz_T9gwLdMc6LB", + "location": + { + "id": "ga4gh:SL.txr-jqnTLuz_3RVrPamx9cYniAFJg977", + "start": 140453135, + "end": 140453136, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.IW78mgV5Cqf6M24hy52hPjyyo5tCCd86", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "T", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + - query: BRAF g.140453136A>T + variations: + [ + { + "id": "ga4gh:VA.ztz4yxckrW1j7YFSprOz_T9gwLdMc6LB", + "location": + { + "id": "ga4gh:SL.txr-jqnTLuz_3RVrPamx9cYniAFJg977", + "start": 140453135, + "end": 140453136, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.IW78mgV5Cqf6M24hy52hPjyyo5tCCd86", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "T", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + - query: BRAF V600E g.140453136A>T + variations: + [ + { + "id": "ga4gh:VA.ztz4yxckrW1j7YFSprOz_T9gwLdMc6LB", + "location": + { + "id": "ga4gh:SL.txr-jqnTLuz_3RVrPamx9cYniAFJg977", + "start": 140453135, + "end": 140453136, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.IW78mgV5Cqf6M24hy52hPjyyo5tCCd86", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "T", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + +cdna_reference_agree: + tests: + - query: NM_004333.4:c.1799= + variations: + [ + { + "id": "ga4gh:VA.7UUAyVS_YyU4DLlP6RiVB268ZAy8zyyY", + "location": + { + "id": "ga4gh:SL.brDsydg4aD2TZqwqc3wZWKxWRx144A89", + "start": 1859, + "end": 1860, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.jkiXxxRjK7uTMiW2KQFjpgvF3VQi-HhX", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "T", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + +genomic_reference_agree: + tests: + - query: NC_000007.13:g.140453136= + variations: + [ + { + "id": "ga4gh:VA.kEu66A38U6zxpNXCXqc6Dmuezir1S6Tu", + "location": + { + "id": "ga4gh:SL.txr-jqnTLuz_3RVrPamx9cYniAFJg977", + "start": 140453135, + "end": 140453136, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.IW78mgV5Cqf6M24hy52hPjyyo5tCCd86", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "A", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + - query: BRAF g.140453136= + variations: + [ + { + "id": "ga4gh:VA.JcLZ2KGaPcO1LEFYcjkmdAdo-CgsYhig", + "location": + { + "id": "ga4gh:SL.GXZ4UlQBqqfn29XEHoCllU7T7OIlkMVS", + "start": 140453135, + "end": 140453136, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "T", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + { + "id": "ga4gh:VA.kEu66A38U6zxpNXCXqc6Dmuezir1S6Tu", + "location": + { + "id": "ga4gh:SL.txr-jqnTLuz_3RVrPamx9cYniAFJg977", + "start": 140453135, + "end": 140453136, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.IW78mgV5Cqf6M24hy52hPjyyo5tCCd86", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "A", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + +protein_delins: + tests: + - query: NP_001333827.1:p.Leu747_Thr751delinsPro + variations: + [ + { + "id": "ga4gh:VA.92P0i1m5gfT85HTuCXfintN2xFESjnB-", + "location": + { + "id": "ga4gh:SL.3Ap3XAZC0N5mHCgqbtdiwWOLjGFqbIep", + "end": 751, + "start": 746, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.OWicZUhcw_9nRH2SQjawk7BAEkrUIe__", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "P", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + - query: ERBB2 G776delinsVC + variations: + [ + { + "id": "ga4gh:VA.sm7fIUxzDjOAM2acs5f_7lz4vaTfRiZQ", + "location": + { + "id": "ga4gh:SL.ajzD3sda-U5t75BQFVSTbSjnpB_JIhGW", + "end": 776, + "start": 775, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.fuC5BAgMxoOgTKKQTpwtT807ZVF2-zdF", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "VC", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + { + "id": "ga4gh:VA.r6Mt7QqHNHoCAP0BYkIdowDyo_Qd39HO", + "location": + { + "id": "ga4gh:SL.tQGMp_RLZJAzG7vHXSlzAe3PdWin8z6Y", + "end": 776, + "start": 775, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.AF1UFydIo02-bMplonKSfxlWY2q6ze3m", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "VC", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + { + "id": "ga4gh:VA.fYIoRKYHJB8h6G7U1T-hh6Cbhm7MNred", + "location": + { + "id": "ga4gh:SL.-TJqX5psxFSUnviBkufV7UndtWgiMWmf", + "end": 776, + "start": 775, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.attSSnFSyeB6FFNsZL1uulpdISBVBuXQ", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "VC", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + { + "id": "ga4gh:VA.-B8y2zJ9sD1DF_Mz0HQWb0IJpBKNPjjF", + "location": + { + "id": "ga4gh:SL.eE9ee9uUCHtyeXFsTz2vSqJqy2f1IY-T", + "end": 776, + "start": 775, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.gWQeaH0zeaCffRawhGhXkDeP3ZcPIlyF", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "VC", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + { + "id": "ga4gh:VA.iHwbQmIlhwkMjfPHESDwsnC8ZStgM7vq", + "location": + { + "id": "ga4gh:SL.8XQgBX5Olmnf96S7LhxaLyokCJ48PNcm", + "end": 776, + "start": 775, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.9DqU06SJwLX1WjvlmcZFBZAbRFojVMpp", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "VC", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + { + "id": "ga4gh:VA.l0EUqPsxH4Xg43ifUnsDTcB_cc9uTTOT", + "location": + { + "id": "ga4gh:SL.26fpCjMjI8UpsE-Vt0ADOfXbr-nfa4o4", + "end": 776, + "start": 775, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.oDpVzgNlz91eIwDnT1Boi7vgtCMdhKG8", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "VC", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + +cdna_delins: + tests: + - query: NM_001289937.1:c.2326_2327delinsCT + variations: + [ + { + "id": "ga4gh:VA.GjDWdDFru9m7P-xYI9rTRxq3upJ4bI20", + "location": + { + "id": "ga4gh:SL.GIfFNwrm6SZMeaGqqnSobxtr0kZGgGAl", + "start": 2586, + "end": 2588, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.Djq77klvL3m3p0xzraFfQPyuFhFgN_9w", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "CT", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + - query: ENST00000256474.2:c.364_365delinsAT + variations: + [ + { + "id": "ga4gh:VA.iQjEr3dKDMX1n2FWweAvIca6QWoDEOv9", + "location": + { + "id": "ga4gh:SL.hQQhl1lEB8c9jZYvMAlp7597fbIZhRzE", + "start": 1203, + "end": 1205, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.guuEF-tM_kFZodRliLEI35w_k-DqTaDs", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "AT", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + - query: NM_000551.3:c.615delinsAA + variations: + [ + { + "id": "ga4gh:VA.C8ciS2eoX3gN2pPixmhBi2o-u0r0RTN0", + "location": + { + "id": "ga4gh:SL.vPHmwGWL5xXR9fFJA4Ksv4s2ziyOf1nF", + "start": 827, + "end": 828, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.v_QTc1p-MUYdgrRv4LMT6ByXIOsdw3C_", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "AA", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + +genomic_delins: + tests: + - query: NC_000007.13:g.140453135_140453136delinsAT + variations: + [ + { + "id": "ga4gh:VA.LkWsKQZid3ELe70zT_msZ0ML_OxHrjXO", + "location": + { + "id": "ga4gh:SL.XmwKDUQWWIGs3XDkRMvsTAH8gL_jey6E", + "start": 140453134, + "end": 140453136, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.IW78mgV5Cqf6M24hy52hPjyyo5tCCd86", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "AT", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + - query: BRAF g.140453135_140453136delinsAT + variations: + [ + { + "id": "ga4gh:VA.LkWsKQZid3ELe70zT_msZ0ML_OxHrjXO", + "location": + { + "id": "ga4gh:SL.XmwKDUQWWIGs3XDkRMvsTAH8gL_jey6E", + "start": 140453134, + "end": 140453136, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.IW78mgV5Cqf6M24hy52hPjyyo5tCCd86", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "AT", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + { + "id": "ga4gh:VA.nWJAMU9SIxAjVXvf6hjy7CdOoRYpFKo-", + "location": + { + "id": "ga4gh:SL.ECL9ksWf3zttFix7aFjLk3oMGIT2r1T_", + "start": 140453134, + "end": 140453135, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "A", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + - query: NC_000003.12:g.10149938delinsAA + variations: + [ + { + "id": "ga4gh:VA.m3N5EEDcudS3thzXlwRDxiffKNgGukzv", + "location": + { + "id": "ga4gh:SL.acAqImEWvHwbUHaJi7L8yOyrSsc1DlW-", + "start": 10149937, + "end": 10149938, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "AA", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + - query: VHL g.10149938delinsAA + variations: + [ + { + "id": "ga4gh:VA.m3N5EEDcudS3thzXlwRDxiffKNgGukzv", + "location": + { + "id": "ga4gh:SL.acAqImEWvHwbUHaJi7L8yOyrSsc1DlW-", + "start": 10149937, + "end": 10149938, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "AA", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + { + "id": "ga4gh:VA.4B9dWHwIS_Nek8yIC7rcVpsTyklMSmTK", + "location": + { + "id": "ga4gh:SL.mLNGrp5jlDKQRw3d4-XklOHh8DBGggMI", + "start": 10149937, + "end": 10149938, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.VNBualIltAyi2AI_uXcKU7M9XUOuA7MS", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "AA", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + - query: 17-7578455-CGCGG-CGCG + variations: + [ + { + "id": "ga4gh:VA.3Q0qa7xolRvjtdm7HsJdhwnARl40PZQC", + "type": "Allele", + "location": + { + "id": "ga4gh:SL.ayQ3u0mtegVPCcfwjOtn1I1w8-kNm3jM", + "type": "SequenceLocation", + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.AjWXsI7AkTK35XW9pgd3UbjpC3MAevlz", + }, + "start": 7578457, + "end": 7578459, + }, + "state": + { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 1, + "length": 1, + "sequence": "G", + }, + }, + ] + +protein_deletion: + tests: + - query: NP_000542.1:p.Phe76del + variations: + [ + { + "id": "ga4gh:VA.Cd6Z8tjYtsg60dO7xxaWDoTyXoS6SAc7", + "location": + { + "id": "ga4gh:SL.-l_WRD5XcRkY4frBSeQdljLrUTzYUAxE", + "end": 76, + "start": 75, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.z-Oa0pZkJ6GHJHOYM7h5mY_umc0SJzTu", + }, + "type": "SequenceLocation", + }, + "state": + { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 1, + "length": 0, + "sequence": "", + }, + "type": "Allele", + }, + ] + - query: NP_000542.1:p.Arg82_Val84del + variations: + [ + { + "id": "ga4gh:VA.PWbmz24TupE8h8g2Vw-gLWAjLiBw85N0", + "location": + { + "id": "ga4gh:SL.NzcGLbsfRAy4bLNS88kyjzUdqDGRVGd4", + "end": 84, + "start": 81, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.z-Oa0pZkJ6GHJHOYM7h5mY_umc0SJzTu", + }, + "type": "SequenceLocation", + }, + "state": + { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 3, + "length": 0, + "sequence": "", + }, + "type": "Allele", + }, + ] + - query: EGFR L747_T751delLREAT + variations: + [ + { + "id": "ga4gh:VA.N7F0WqJs4dqp9ndh2YW7u0t6xRiHccpQ", + "location": + { + "id": "ga4gh:SL.RKZW47OI5rv2-9gU3rJKV9dMfq7BYT_u", + "start": 746, + "end": 751, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.vyo55F6mA6n2LgN4cagcdRzOuh38V4mE", + }, + "type": "SequenceLocation", + }, + "state": { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 5, + "length": 0, + "sequence": "", + }, + "type": "Allele", + }, + { + "id": "ga4gh:VA.zyl9zFPjMMOtIdsfCDIUHvpyoQrDlRc7", + "location": + { + "id": "ga4gh:SL.3Ap3XAZC0N5mHCgqbtdiwWOLjGFqbIep", + "start": 746, + "end": 751, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.OWicZUhcw_9nRH2SQjawk7BAEkrUIe__", + }, + "type": "SequenceLocation", + }, + "state": { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 5, + "length": 0, + "sequence": "", + }, + "type": "Allele", + }, + ] + +cdna_deletion: + tests: + - query: NM_004448.3:c.2263_2277delTTGAGGGAAAACACA + variations: + [ + { + "id": "ga4gh:VA.ts4dehcMOYyN7u7CKO8fF4C80rZxALBY", + "location": + { + "id": "ga4gh:SL.8Buo3uzjQAY2kwIsaeiJgQLYqJc9wAVX", + "end": 2539, + "start": 2523, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.3Ob8qgT17gn62p5Yhdy5yoCacMqHcGBG", + }, + "type": "SequenceLocation", + }, + "state": + { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 15, + "length": 1, + "sequence": "T", + }, + "type": "Allele", + }, + ] + +genomic_deletion: + tests: + - query: NC_000003.11:g.10191486_10191487delAG + variations: + [ + { + "id": "ga4gh:VA.oPjzfyjE7AmkHxvgVKFm3fr8Etu0Mp9b", + "location": + { + "id": "ga4gh:SL.cxEFtsg09n8eNXIWuRetrmWTUV1nm8_f", + "end": 10191487, + "start": 10191483, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.VNBualIltAyi2AI_uXcKU7M9XUOuA7MS", + }, + "type": "SequenceLocation", + }, + "state": + { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 2, + "length": 2, + "sequence": "AG", + }, + "type": "Allele", + }, + ] + - query: VHL g.10191486_10191487delAG + variations: + [ + { + "id": "ga4gh:VA.oPjzfyjE7AmkHxvgVKFm3fr8Etu0Mp9b", + "location": + { + "id": "ga4gh:SL.cxEFtsg09n8eNXIWuRetrmWTUV1nm8_f", + "end": 10191487, + "start": 10191483, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.VNBualIltAyi2AI_uXcKU7M9XUOuA7MS", + }, + "type": "SequenceLocation", + }, + "state": { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 2, + "length": 2, + "sequence": "AG", + }, + "type": "Allele", + }, + ] + +protein_insertion: + tests: + - query: NP_005219.2:p.Cys770_Gly771insGlyLeu + variations: [] + - query: NP_005219.2:p.Asp770_Asn771insGlyLeu + variations: + [ + { + "id": "ga4gh:VA.AOCCh_BU5wKkdgoDNqkORF_x4GQwWh1T", + "location": + { + "id": "ga4gh:SL.ciWb1ylkqUxiviU1djijiuYVZcgsnQnV", + "end": 770, + "start": 770, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.vyo55F6mA6n2LgN4cagcdRzOuh38V4mE", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "GL", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + - query: BRAF T599_V600insV + variations: + [ + { + "id": "ga4gh:VA.5WCguhOtzU0gZGEW9mAlqtzROgJaEr5w", + "location": + { + "id": "ga4gh:SL.EpHaD2ygDuPMvyURI9L4yetEwF3W0G7G", + "end": 600, + "start": 599, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.ZJwurRo2HLY018wghYjDKSfIlEH0Y8At", + }, + "type": "SequenceLocation", + }, + "state": + { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 1, + "length": 2, + "sequence": "VV", + }, + "type": "Allele", + }, + { + "id": "ga4gh:VA.Di6aHQPATSe_jG54fI2ZtLDtvrqWn1U0", + "location": + { + "id": "ga4gh:SL.Q4MXez2kHFPQqGJKLP8quVHAskuCrOAA", + "end": 600, + "start": 599, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.0Q-SgJX1V3seUUIu3qVUtEa55CQsGmEU", + }, + "type": "SequenceLocation", + }, + "state": + { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 1, + "length": 2, + "sequence": "VV", + }, + "type": "Allele", + }, + { + "id": "ga4gh:VA.7ApUCILWJUCNIZcKq4oUurWEqyL3gyi9", + "location": + { + "id": "ga4gh:SL.gkevJbLNOScKXhxhzOZXiG3hW8zeyo-q", + "end": 600, + "start": 599, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.lKdPZpuT-VNvRuKDjsUItNgutfWYgWQd", + }, + "type": "SequenceLocation", + }, + "state": + { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 1, + "length": 2, + "sequence": "VV", + }, + "type": "Allele", + }, + { + "id": "ga4gh:VA.adBs3KK7T1yz8i9kfh1NFsGQQoaSLHJK", + "location": + { + "id": "ga4gh:SL.ZA1XNKhCT_7m2UtmnYb8ZYOVS4eplMEK", + "end": 600, + "start": 599, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.cQvw4UsHHRRlogxbWCB8W-mKD4AraM9y", + }, + "type": "SequenceLocation", + }, + "state": + { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 1, + "length": 2, + "sequence": "VV", + }, + "type": "Allele", + }, + ] + +cdna_insertion: + tests: + - query: NM_007294.3:c.2902_2903insTC + variations: + [ + { + "id": "ga4gh:VA.rUAwTHwBYQRPXbzBWTi6QbqoF1bAUjtn", + "location": + { + "id": "ga4gh:SL.-qRr3HfBajBTmzAVJPwyMb6wo9qfHqgm", + "end": 3134, + "start": 3131, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.jj1RAXMGdOU-D39IRYnDX_fXcM7LQvb6", + }, + "type": "SequenceLocation", + }, + "state": + { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 2, + "length": 5, + "sequence": "CTCTC", + }, + "type": "Allele", + }, + ] + - query: NM_007294.3:c.2902_2903insTCN + variations: + [ + { + "id": "ga4gh:VA.I4EqcfOYhpPRgeNbwM1HhKqcA-gdmsrB", + "location": + { + "id": "ga4gh:SL.sFsPhPS3JCJkA_LObVXnrYeTSHp8GGT2", + "end": 3134, + "start": 3134, + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.jj1RAXMGdOU-D39IRYnDX_fXcM7LQvb6", + }, + "type": "SequenceLocation", + }, + "state": { "sequence": "TCN", "type": "LiteralSequenceExpression" }, + "type": "Allele", + }, + ] + +genomic_insertion: + tests: + - query: NC_000022.10:g.30051593_30051594insT + variations: + [ + { + "id": "ga4gh:VA.cY1c7lbwhHmg_b7cAxMpjQT6X-UYmcK9", + "type": "Allele", + "location": + { + "id": "ga4gh:SL.7zWWbLgDc_VzA3G3oIdEu1v8EbKNd5Um", + "type": "SequenceLocation", + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.XOgHwwR3Upfp5sZYk6ZKzvV25a4RBVu8", + }, + "start": 30051592, + "end": 30051593, + }, + "state": + { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 1, + "length": 2, + "sequence": "TT", + }, + }, + ] + - query: NF2 g.30051593_30051594insT + variations: + [ + { + "id": "ga4gh:VA.cY1c7lbwhHmg_b7cAxMpjQT6X-UYmcK9", + "type": "Allele", + "location": + { + "id": "ga4gh:SL.7zWWbLgDc_VzA3G3oIdEu1v8EbKNd5Um", + "type": "SequenceLocation", + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.XOgHwwR3Upfp5sZYk6ZKzvV25a4RBVu8", + }, + "start": 30051592, + "end": 30051593, + }, + "state": + { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 1, + "length": 2, + "sequence": "TT", + }, + }, + { + "id": "ga4gh:VA.PqonQkZk4h0bkVplJPZEBRUjUqY2fxXn", + "type": "Allele", + "location": + { + "id": "ga4gh:SL.VGXyaPw8eXR1ut445bprXOX6WHmh-bXa", + "type": "SequenceLocation", + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.7B7SHsmchAR0dFcDCuSFjJAo7tX87krQ", + }, + "start": 30051593, + "end": 30051593, + }, + "state": { "type": "LiteralSequenceExpression", "sequence": "T" }, + }, + ] + - query: NC_000017.10:g.7572948_7572949insTTTTTTTTTNNNNN + variations: + [ + { + "id": "ga4gh:VA.cxPRxlfPGzZD0pIdxu9EhAPhLZTzDNT0", + "type": "Allele", + "location": + { + "id": "ga4gh:SL.LyA3prPbL8U0xJ3P6tO1w295QvRGxYnK", + "type": "SequenceLocation", + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.AjWXsI7AkTK35XW9pgd3UbjpC3MAevlz", + }, + "start": 7572948, + "end": 7572948, + }, + "state": + { "type": "LiteralSequenceExpression", "sequence": "TTTTTTTTTNNNNN" }, + }, + ] + +genomic_deletion_ambiguous: + tests: + - query: NC_000023.11:g.(?_31120496)_(33339477_?)del + variations: + [ + { + "id": "ga4gh:CX.1DiUzraiKZLJb8oF8ynARS816fthsJpV", + "type": "CopyNumberChange", + "location": + { + "id": "ga4gh:SL.bWbNmdT__ptImBwTAIYdyNfazhwvEtXD", + "type": "SequenceLocation", + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + }, + "start": [null, 31120495], + "end": [33339477, null], + }, + "copyChange": "efo:0030067", + }, + ] + - query: NC_000023.10:g.(?_31138613)_(33357594_?)del + variations: + [ + { + "id": "ga4gh:CX.CJdXMZXSEE4hCIwjGxke4EWY7lMENYPj", + "type": "CopyNumberChange", + "location": + { + "id": "ga4gh:SL.2ORImLGRcezhWXDYI9sZvLwFGPDH9WKS", + "type": "SequenceLocation", + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.v7noePfnNpK8ghYXEqZ9NukMXW7YeNsm", + }, + "start": [null, 31138612], + "end": [33357594, null], + }, + "copyChange": "efo:0030067", + }, + ] + - query: NC_000002.12:g.(?_110104900)_(110207160_?)del + variations: + [ + { + "id": "ga4gh:CX.q3OPPp2fWM5uM60RNHY_jDThCyxV3URW", + "type": "CopyNumberChange", + "location": + { + "id": "ga4gh:SL.aRKiRW6-lS9CCLfcPJpQIGihZqoIOCZ_", + "type": "SequenceLocation", + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.pnAqCRBrTsUoBghSD1yp_jXWSmlbdh4g", + }, + "start": [null, 110104899], + "end": [110207160, null], + }, + "copyChange": "efo:0030067", + }, + ] + - query: NC_000024.10:g.(?_14076802)_(57165209_?)del + variations: + [ + { + "id": "ga4gh:CX.vR12PHS1zCnoYUi9CSX3ZwhGG38xa-RA", + "type": "CopyNumberChange", + "location": + { + "id": "ga4gh:SL.N44ez-5301ZoNdLoiblcUvm__BS4-4Jv", + "type": "SequenceLocation", + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + }, + "start": [null, 14076801], + "end": [57165209, null], + }, + "copyChange": "efo:0030067", + }, + ] + +genomic_duplication: + tests: + - query: NC_000003.12:g.49531262dup + variations: + [ + { + "id": "ga4gh:VA.CHNQRjx52keAGF5WcbvKORtfLiitZKE4", + "type": "Allele", + "location": + { + "id": "ga4gh:SL.f0nAiaxOC3rPToQEYRRhbVBNO6HKutyc", + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", + }, + "start": 49531260, + "end": 49531262, + "type": "SequenceLocation", + }, + "state": + { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 1, + "length": 3, + "sequence": "GGG", + }, + }, + ] + - query: NC_000016.10:g.2087938_2087948dup + variations: + [ + { + "id": "ga4gh:VA.X_j-DHSSrgaCw1gfXzpIyG-I5e0PMGL3", + "type": "Allele", + "location": + { + "id": "ga4gh:SL.1_LGkYC5Ytn5Vreye8dEPLM_uGKrvuAA", + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.yC_0RBj3fgBlvgyAuycbzdubtLxq-rE0", + }, + "start": 2087937, + "end": 2087948, + "type": "SequenceLocation", + }, + "state": + { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 11, + "length": 22, + "sequence": "AAAGGTAGGGCAAAGGTAGGGC", + }, + }, + ] + +genomic_duplication_ambiguous: + tests: + - query: NC_000023.11:g.(31060227_31100351)_(33274278_33417151)dup + variations: + [ + { + "id": "ga4gh:CX.gsV4KrWvNQ_c0UT8M31mqa0HJ-IAHL8q", + "type": "CopyNumberChange", + "location": + { + "id": "ga4gh:SL.-zCp7JBaKQ0niPDueJkuCgQhRIQ50hKw", + "type": "SequenceLocation", + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + }, + "start": [31060226, 31100350], + "end": [33274278, 33417151], + }, + "copyChange": "efo:0030070", + }, + ] + - query: NC_000023.11:g.(?_154021812)_154092209dup + variations: + [ + { + "id": "ga4gh:CX.6KvwSUu1Vp3FcC2VzbZxqpLAouOMCPi9", + "type": "CopyNumberChange", + "location": + { + "id": "ga4gh:SL.O__pyYq_u7R__2NUbI3koxxkeCBL7WXq", + "type": "SequenceLocation", + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + }, + "start": [null, 154021811], + "end": 154092209, + }, + "copyChange": "efo:0030070", + }, + ] + - query: NC_000020.11:g.(?_30417576)_(31394018_?)dup + variations: + [ + { + "id": "ga4gh:CX.4JLs2ICAAvj5JgG0xHJk1voSKLb8gNQ9", + "type": "CopyNumberChange", + "location": + { + "id": "ga4gh:SL.o8sCaAaW2a2f_HsNBTsHOCnWRvIyru0y", + "type": "SequenceLocation", + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.-A1QmD_MatoqxvgVxBLZTONHz9-c7nQo", + }, + "start": [null, 30417575], + "end": [31394018, null], + }, + "copyChange": "efo:0030070", + }, + ] + +amplification: + tests: + - query: BRAF Amplification + variations: + [ + { + "id": "ga4gh:CX.89PECTeQjhhXnNW9yg24DheWOQMgmKk2", + "type": "CopyNumberChange", + "location": + { + "id": "ga4gh:SL.uNBZoxhjhohl24VlIut-JxPJAGfJ7EQE", + "type": "SequenceLocation", + "sequenceReference": + { + "type": "SequenceReference", + "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + }, + "start": 140713327, + "end": 140924929, + }, + "copyChange": "efo:0030072", + }, + ] diff --git a/tests/fixtures/validators.yml b/tests/fixtures/validators.yml new file mode 100644 index 0000000..5b10f61 --- /dev/null +++ b/tests/fixtures/validators.yml @@ -0,0 +1,235 @@ +protein_substitution: + should_match: + - query: BRAF V600E + - query: NP_004324.2:p.Val600Glu + - query: NP_005219.2:p.Thr790Met + - query: EGFR Leu858Arg + should_not_match: + - query: NP_004324.2:p.Val600000000000Glu + - query: NP_004324.2:p.Glu600Val + - query: NP_005148.2:p.Leu2733Gln + - query: NP_000000000542.1:p.Val66Gly + - query: BRAF V9999999999999999999999999999999E + +protein_stop_gain: + should_match: + - query: NP_060842.3:p.Tyr365Ter + - query: NP_000542.1:p.Tyr185Ter + - query: NP_000542.1:p.Tyr185* + should_not_match: + - query: NP_060842.3:p.Tyr3650000000000Ter + +protein_reference_agree: + should_match: + - query: NP_000542.1:p.Pro61= + - query: NP_000918.2:p.Ile1145= + should_not_match: + - query: NP_000542.1:p.Pro62= + +cdna_substitution: + should_match: + - query: NM_004333.4:c.1799T>A + - query: ENST00000288602.10:c.1799T>A + - query: BRAF (c.1799T>A) + - query: BRAF c.1799T>A + - query: BRAF V600E c.1799T>A + should_not_match: + - query: BRAF c.18000000000000T>A + - query: NM_004333.4:c.17699T>A + +genomic_substitution: + should_match: + - query: NC_000007.13:g.140453136A>T + - query: NC_000007.13:g.55259515T>G + - query: 7-140453136-A-T + - query: 7-55259515-T-G + - query: 5-112175770-GGAA-AGAA + should_not_match: + - query: NC_000007.13:g.1436A>T + - query: NC_000007.13:g.4T>A + - query: 7-140453136-G-T + - query: 5-112175770-TGAA-AGAA + - query: 5-112175770-GGAT-AGAA + +cdna_reference_agree: + should_match: + - query: NM_004006.2:c.123= + - query: NM_004333.4:c.1799= + - query: ENST00000288602.11:c.1799= + - query: BRAF c.1799= + - query: BRAF V600E c.1799= + should_not_match: + - query: NM_004006.2:c.13994= + - query: BRAF c.18000000000000= + - query: NM_000412.5:c.1930= # pos out of index + +genomic_reference_agree: + should_match: + - query: NC_000007.13:g.140453136= + - query: NC_000007.13:g.55259515= + - query: 7-140453136-A-A + - query: 7-55259515-T-T + - query: 5-1295250-GAGG-GAGG + should_not_match: + - query: NC_000007.13:g.159138664= + - query: 7-140453136-C-C + - query: 5-1295250-GCGG-GAGG + - query: 5-1295250-GAGA-GAGG + +protein_delins: + should_match: + - query: NP_001333827.1:p.Leu747_Thr751delinsPro + - query: NP_000542.1:p.Gln96_Pro97delinsHis + - query: NP_005219.2:p.Glu746_Thr751delinsValAla + - query: ERBB2 G776delinsVC + - query: KIT P577_W582delinsPYD + should_not_match: + - query: ERBB2 K776delinsVC + - query: NP_001333827.1:p.Cys747_Thr751delinsPro + - query: NP_001333827.1:p.Leu747_Pro751delinsPro + +cdna_delins: + should_match: + - query: NM_001289937.1:c.2326_2327delinsCT + - query: NM_000551.3:c.615delinsAA + - query: ENST00000440973.5:c.1607_1608delinsAG + - query: ENST00000318560.5:c.1423_1424delinsGT + - query: ENST00000256474.2:c.364_365delinsAT + - query: NM_000551.3:c.615delinsAA + should_not_match: + - query: NM_005228:c.2237_2253delinsTTGCT + - query: ENST00000277541.6:c.7330479587395delinsACA + - query: NM_000551.3:c.4561delinsAA + - query: NM_000551.3:c.4561_4562delinsAA + - query: NM_000551.3:c.4560_4561delinsAA + - query: NM_001289937.1:c.2327_2326delinsCT + - query: NM_000551.3:c.4559delinsAA # pos out of index + +genomic_delins: + should_match: + - query: NC_000007.13:g.140453135_140453136delinsAT + - query: NC_000007.13:g.159138662delinsAT + - query: NC_000023.11:g.32386323delinsGA + - query: NC_000003.12:g.10149938delinsAA + - query: 3-37050340-AAAAGCTTTA-GAGGCTTT + - query: 16-68846036-AG-TGAGTTT + - query: X-70350063-AG-AGGCAGCGCATAAAGCGCATTCTCCG + - query: 16-2138199-GTGAG-G + - query: 1-55509715-AC-A + - query: chr6-31239170-C-CA + should_not_match: + - query: NC_000023.21:g.32386323delinsGA + - query: NC_000007.13:g.159138664delinsAT + - query: NC_000007.13:g.159138663_159138664delinsAT + - query: NC_000023.11:g.3238646549879323delinsGA + - query: NC_000007.13:g.140453136_140453134delinsAT + - query: 3-37050340-AAAAGCGTTA-GAGGCTTT + - query: 16-68846036-AC-TGAGTTT + - query: X-70350063-CC-AGGCAGCGCATAAAGCGCATTCTCCG + - query: 1-55509715-TC-A + - query: 16-2138199-GTGAT-G + +protein_deletion: + should_match: + - query: NP_003997.1:p.Lys23_Val25del + - query: NP_003997.1:p.(Lys23_Val25del) + - query: NP_000542.1:p.Glu186del + - query: NP_000542.1:p.(Glu186del) + - query: NP_000542.1:p.Arg82_Val84del + - query: ENSP00000256474.2:p.Phe76del + - query: KIT D419del + - query: KIT E554_V559del + - query: EGFR L747_T751del + - query: EGFR L747_T751delLREAT + should_not_match: + - query: EGFR L747_T751delLREATS + - query: KIT V419del + +cdna_deletion: + should_match: + - query: ENST00000269571.9:c.2263_2277del + - query: NM_004448.3:c.2263_2277delTTGAGGGAAAACACA + - query: ERBB2 c.2263_2277delTTGAGGGAAAACACA + - query: NM_004448.3:c.2263_2277del + - query: NM_000535.6:c.2117delA + - query: NM_000535.6:c.2117del + should_not_match: + - query: NM_000535.6:c.21174568delT + - query: NM_000535.6:c.21145457delA + - query: ENST00000269571.9:c.2277_2263del + +genomic_deletion: + should_match: + - query: NC_000003.11:g.10188279_10188297del + - query: NC_000003.11:g.10191486_10191487delAG + - query: NC_000003.12:g.10146527_10146528del + - query: NC_000003.11:g.10191495delT + - query: VHL g.10188279_10188297del + should_not_match: + - query: NC_000003.11:g.10191454654654654495delT + - query: NC_000003.11:g.10188297_10188279del + +protein_insertion: + should_match: + - query: NP_005219.2:p.Gly1209_Ala1210insGlyLeu + - query: NP_005219.2:p.Asp770_Gly1209insGlyLeu + - query: NP_001333827.1:p.Ala763_Tyr764insPheGlnGluAla + - query: BRAF T599_V600insV + - query: EGFR A763_Y764insFQEA + should_not_match: + - query: NP_005219.2:p.Gly1209_Gly1211insGlyLeu + - query: NP_005219.2:p.Cys1211_Gly1256insGlyLeu + - query: NP_005219.2:p.Asn770_Gly771insGlyLeu + - query: NP_005219.2:p.Asp770_Gly771insGlyLeu + - query: BRAF E599_V600insV + +cdna_insertion: + should_match: + - query: ENST00000000442.11:c.426_500insT + - query: NM_007294.3:c.2902_2903insTC + - query: ENST00000331728.9:c.2049_2050insA + - query: LIMK2 c.2049_2050insA + should_not_match: + - query: NM_007294.3:c.7224_7225insTC + - query: LIMK2 c.486488_48649545656530insA + +genomic_insertion: + should_match: + - query: NC_000022.10:g.30051593_30051594insT + - query: NC_000017.10:g.37880993_37880994insGCTTACGTGATG + - query: ERBB2 g.37880993_37880994insGCTTACGTGATG + should_not_match: + - query: NC_000022.10:g.51304566_51304567insT + - query: NC_000022.10:g.51304567_51304568insT + +genomic_deletion_ambiguous: + should_match: + - query: NC_000023.11:g.(?_155980375)_(156013167_?)del + - query: NC_000002.12:g.(?_110104900)_(110207160_?)del + - query: NC_000024.10:g.(?_14076802)_(57165209_?)del + should_not_match: + - query: NC_000023.11:g.(?_156013167)_(155980375_?)del + - query: NC_000024.10:g.(14076805_14076804)_(14076803_14076802)del + +genomic_duplication: + should_match: + - query: NC_000003.12:g.49531262dup + - query: NC_000016.10:g.2087938_2087948dup + should_not_match: + - query: NC_000003.12:g.495312625165465465465dup + - query: NC_000016.10:g.2087948_2087938dup + +genomic_duplication_ambiguous: + should_match: + - query: NC_000020.11:g.(?_30417576)_(31394018_?)dup + - query: NC_000023.11:g.(?_154021812)_154092209dup + - query: NC_000023.11:g.154021812_(154092209_?)dup + - query: NC_000023.11:g.(31060227_31100351)_(33274278_33417151)dup + - query: NC_000023.10:g.(31078344_31118468)_(33292395_33435268)dup + should_not_match: + - query: NC_000023.11:g.(?_154092209)_154021812dup + +amplification: + should_match: + - query: BRAF Amplification + - query: egfr amplification diff --git a/tests/test_classifier.py b/tests/test_classifier.py new file mode 100644 index 0000000..82f45c3 --- /dev/null +++ b/tests/test_classifier.py @@ -0,0 +1,318 @@ +"""Module for testing classifiers""" +import pytest +import yaml + +from tests import PROJECT_ROOT +from variation.schemas.classification_response_schema import ( + AmplificationClassification, + CdnaDeletionClassification, + CdnaDelInsClassification, + CdnaInsertionClassification, + CdnaReferenceAgreeClassification, + CdnaSubstitutionClassification, + GenomicDeletionAmbiguousClassification, + GenomicDeletionClassification, + GenomicDelInsClassification, + GenomicDuplicationAmbiguousClassification, + GenomicDuplicationClassification, + GenomicInsertionClassification, + GenomicReferenceAgreeClassification, + GenomicSubstitutionClassification, + ProteinDeletionClassification, + ProteinDelInsClassification, + ProteinInsertionClassification, + ProteinReferenceAgreeClassification, + ProteinStopGainClassification, + ProteinSubstitutionClassification, +) + + +@pytest.fixture(scope="module") +def all_fixtures(): + """Create fixture for classifiers""" + with open(f"{PROJECT_ROOT}/tests/fixtures/classifiers.yml") as stream: + return yaml.safe_load(stream) + + +def classifier_checks( + all_fixtures, test_tokenizer, test_classifier, fixture_name, expected_classification +): + """Ensure that fixtures exist for fixture name and that classifier response matches + expected + """ + fixtures = all_fixtures.get( + fixture_name, {"should_match": [], "should_not_match": []} + ) + + for label in ["should_match", "should_not_match"]: + assert fixtures[label], f"{fixture_name} has no {label} queries" + + for x in fixtures[label]: + query = x["query"] + tokens = test_tokenizer.perform(query, []) + classification = test_classifier.perform(tokens) + + if label == "should_match": + assert isinstance(classification, expected_classification), query + else: + assert classification is None, query + + +def test_amplification(all_fixtures, test_tokenizer, test_classifier): + """Test that amplification classifier works""" + fixture_name = "amplification" + expected_classification = AmplificationClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) + + +def test_protein_substitution(all_fixtures, test_tokenizer, test_classifier): + """Test that protein substitution classifier works""" + fixture_name = "protein_substitution" + expected_classification = ProteinSubstitutionClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) + + +def test_cdna_substitution(all_fixtures, test_tokenizer, test_classifier): + """Test that cdna substitution classifier works""" + fixture_name = "cdna_substitution" + expected_classification = CdnaSubstitutionClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) + + +def test_genomic_substitution(all_fixtures, test_tokenizer, test_classifier): + """Test that genomic substitution classifier works""" + fixture_name = "genomic_substitution" + expected_classification = GenomicSubstitutionClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) + + +def test_protein_stop_gain(all_fixtures, test_tokenizer, test_classifier): + """Test that protein stop gain classifier works""" + fixture_name = "protein_stop_gain" + expected_classification = ProteinStopGainClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) + + +def test_protein_reference_agree(all_fixtures, test_tokenizer, test_classifier): + """Test that protein reference agree classifier works""" + fixture_name = "protein_reference_agree" + expected_classification = ProteinReferenceAgreeClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) + + +def test_cdna_reference_agree(all_fixtures, test_tokenizer, test_classifier): + """Test that cdna reference agree classifier works""" + fixture_name = "cdna_reference_agree" + expected_classification = CdnaReferenceAgreeClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) + + +def test_genomic_reference_agree(all_fixtures, test_tokenizer, test_classifier): + """Test that genomic reference agree classifier works""" + fixture_name = "genomic_reference_agree" + expected_classification = GenomicReferenceAgreeClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) + + +def test_protein_delins(all_fixtures, test_tokenizer, test_classifier): + """Test that protein delins classifier works""" + fixture_name = "protein_delins" + expected_classification = ProteinDelInsClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) + + +def test_cdna_delins(all_fixtures, test_tokenizer, test_classifier): + """Test that cdna delins classifier works""" + fixture_name = "cdna_delins" + expected_classification = CdnaDelInsClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) + + +def test_genomic_delins(all_fixtures, test_tokenizer, test_classifier): + """Test that genomic delins classifier works""" + fixture_name = "genomic_delins" + expected_classification = GenomicDelInsClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) + + +def test_protein_deletion(all_fixtures, test_tokenizer, test_classifier): + """Test that protein deletion classifier works""" + fixture_name = "protein_deletion" + expected_classification = ProteinDeletionClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) + + +def test_cdna_deletion(all_fixtures, test_tokenizer, test_classifier): + """Test that cdna deletion classifier works""" + fixture_name = "cdna_deletion" + expected_classification = CdnaDeletionClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) + + +def test_genomic_deletion(all_fixtures, test_tokenizer, test_classifier): + """Test that genomic deletion classifier works""" + fixture_name = "genomic_deletion" + expected_classification = GenomicDeletionClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) + + +def test_genomic_deletion_ambiguous(all_fixtures, test_tokenizer, test_classifier): + """Test that genomic deletion ambiguous classifier works""" + fixture_name = "genomic_deletion_ambiguous" + expected_classification = GenomicDeletionAmbiguousClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) + + +def test_protein_insertion(all_fixtures, test_tokenizer, test_classifier): + """Test that protein insertion classifier works""" + fixture_name = "protein_insertion" + expected_classification = ProteinInsertionClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) + + +def test_cdna_insertion(all_fixtures, test_tokenizer, test_classifier): + """Test that cdna insertion classifier works""" + fixture_name = "cdna_insertion" + expected_classification = CdnaInsertionClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) + + +def test_genomic_insertion(all_fixtures, test_tokenizer, test_classifier): + """Test that genomic insertion classifier works""" + fixture_name = "genomic_insertion" + expected_classification = GenomicInsertionClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) + + +def test_genomic_duplication(all_fixtures, test_tokenizer, test_classifier): + """Test that genomic duplication classifier works""" + fixture_name = "genomic_duplication" + expected_classification = GenomicDuplicationClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) + + +def test_genomic_duplication_ambiguous(all_fixtures, test_tokenizer, test_classifier): + """Test that genomic duplication ambiguous classifier works""" + fixture_name = "genomic_duplication_ambiguous" + expected_classification = GenomicDuplicationAmbiguousClassification + classifier_checks( + all_fixtures, + test_tokenizer, + test_classifier, + fixture_name, + expected_classification, + ) diff --git a/tests/test_hgvs_dup_del_mode.py b/tests/test_hgvs_dup_del_mode.py new file mode 100644 index 0000000..576b9b8 --- /dev/null +++ b/tests/test_hgvs_dup_del_mode.py @@ -0,0 +1,1639 @@ +"""Module for testing HGVS Dup Del mode.""" +import pytest +from ga4gh.vrs import models + +from tests.conftest import assertion_checks +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption + + +@pytest.fixture(scope="module") +def test_handler(test_query_handler): + """Create test fixture for normalize handler""" + return test_query_handler.normalize_handler + + +@pytest.fixture(scope="module") +def genomic_dup1_lse(genomic_dup1_seq_loc_normalized): + """Create a test fixture for genomic dup LSE.""" + params = { + "type": "Allele", + "id": "ga4gh:VA.CHNQRjx52keAGF5WcbvKORtfLiitZKE4", + "location": genomic_dup1_seq_loc_normalized, + "state": { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 1, + "length": 3, + "sequence": "GGG", + }, + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def genomic_dup1_cx(genomic_dup1_seq_loc_not_normalized): + """Create a test fixture for genomic dup copy number change.""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.7WKEz2E_jwZZdyRc2Gw-_LIbHDJyRXwr", + "location": genomic_dup1_seq_loc_not_normalized, + "copyChange": "efo:0030072", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_dup1_free_text_seq_loc_normalized(): + """Create genomic dup1 free text sequence location""" + return { + "id": "ga4gh:SL.iyddzpD5lYY2Ayv87Np462l6P8QH7rH9", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.tpvbnWsfEGqip8gJQZnWJAF8-bWDUDKd", + }, + "start": 1032, + "end": 1034, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_dup1_free_text_seq_loc_not_normalized(): + """Create genomic dup1 free text sequence location""" + return { + "id": "ga4gh:SL.L89XFOyAxF-wdQHXUV8OAAkx80Mltokc", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.tpvbnWsfEGqip8gJQZnWJAF8-bWDUDKd", + }, + "start": 1033, + "end": 1034, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_dup1_free_text_lse(genomic_dup1_free_text_seq_loc_normalized): + """Create a test fixture for genomic dup LSE.""" + params = { + "type": "Allele", + "id": "ga4gh:VA.muSdvI3Q126oFLKx3DrVkbzGfQ40kFhx", + "location": genomic_dup1_free_text_seq_loc_normalized, + "state": { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 1, + "length": 3, + "sequence": "GGG", + }, + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def genomic_dup1_free_text_cn(genomic_dup1_free_text_seq_loc_not_normalized): + """Create a test fixture for genomic dup copy number count.""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.-97yN7Nq98gCVA7s0VslwuNdDVFLW6Af", + "location": genomic_dup1_free_text_seq_loc_not_normalized, + "copies": 3, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_dup2_lse(genomic_dup2_seq_loc_normalized): + """Create a test fixture for genomic dup LSE.""" + params = { + "type": "Allele", + "id": "ga4gh:VA.u4ffOvroo0SV1X13zWMA41EOdu1QSO9B", + "location": genomic_dup2_seq_loc_normalized, + "state": { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 4, + "length": 8, + "sequence": "TCTATCTA", + }, + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def genomic_dup2_cx(genomic_dup2_seq_loc_normalized): + """Create a test fixture for genomic dup copy number change.""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.g4l6d1hb3Rd1slsYWSe4Z4x3ocKdCB3w", + "location": genomic_dup2_seq_loc_normalized, + "copyChange": "efo:0030070", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def seq_loc_gt_100_bp(): + """Create seq loc for positions 33211290, 33211490 on NC_000023.11""" + return { + "id": "ga4gh:SL.HYv7UB8dh8paRuy_Sb3g4sHQaTqJ3m8Q", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + }, + "start": 33211289, + "end": 33211490, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_dup2_rle2(seq_loc_gt_100_bp): + """Create a test fixture for genomic dup RSE where bp > 100.""" + params = { + "type": "Allele", + "id": "ga4gh:VA.gOA4L7Juk4KcUZnq4CBOk32-gkuz5keM", + "location": seq_loc_gt_100_bp, + "state": { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 201, + "length": 402, + }, + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def genomic_dup2_free_text_seq_loc(): + """Create genomic dup2 free text sequence location""" + return { + "id": "ga4gh:SL.D4MxySRp4-wlbC3whkRZIhcfON2pKKgx", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.1DeZLYHMnd-smp3GDlpRxETb9_0AokO7", + }, + "start": 256, + "end": 260, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_dup2_free_text_default(genomic_dup2_free_text_seq_loc): + """Create a test fixture for genomic dup default and LSE.""" + params = { + "type": "Allele", + "id": "ga4gh:VA.rEgZI-6A3SdNKhqqNapVYlcF_mzUeUGg", + "location": genomic_dup2_free_text_seq_loc, + "state": { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 4, + "length": 8, + "sequence": "TAGATAGA", + }, + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def genomic_dup2_free_text_cn(genomic_dup2_free_text_seq_loc): + """Create a test fixture for genomic dup copy number count.""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.Dg3jMd1lsFKpXiJAPfzrh_50PQM2g1C3", + "location": genomic_dup2_free_text_seq_loc, + "copies": 3, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_dup3_cx(genomic_del3_dup3_loc_not_normalized): + """Create a test fixture for genomic dup copy number change.""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.gsV4KrWvNQ_c0UT8M31mqa0HJ-IAHL8q", + "location": genomic_del3_dup3_loc_not_normalized, + "copyChange": "efo:0030070", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_dup3_free_text_subject(): + """Create test fixture for genomic dup3 free text location""" + return { + "id": "ga4gh:SL.OFGMAP2dUKRbBk5Q3MroJzbvcjEJQfyZ", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + }, + "start": [31147273, 31147277], + "end": [31182737, 31182739], + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_dup3_free_text_cx(genomic_dup3_free_text_subject): + """Create a test fixture for genomic dup copy number change.""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.qf8-7kAverUttRlwQBXFPeuVq5o2-bVa", + "location": genomic_dup3_free_text_subject, + "copyChange": "efo:0030070", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_dup3_free_text_cn(genomic_dup3_free_text_subject): + """Create a test fixture for genomic dup copy number count.""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.3yNGNFGVAO5DGc0sPTThdUwTfJLPyWfM", + "location": genomic_dup3_free_text_subject, + "copies": 4, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_dup4_cx(genomic_dup4_loc): + """Create a test fixture for genomic dup copy number change.""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.4JLs2ICAAvj5JgG0xHJk1voSKLb8gNQ9", + "location": genomic_dup4_loc, + "copyChange": "efo:0030070", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_dup4_cn(genomic_dup4_loc): + """Create a test fixture for genomic dup copy number count.""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.KqbQewUgZYfmottbgn1xYq58DiPVU5SZ", + "location": genomic_dup4_loc, + "copies": 3, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_dup4_free_text_subject(): + """Create test fixture for genomic dup4 free text location""" + return { + "id": "ga4gh:SL.SIeDb2iPT5pM-1SDKM9ew8NjzZAgF8nb", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.dLZ15tNO1Ur0IcGjwc3Sdi_0A6Yf4zm7", + }, + "start": [None, 1674441], + "end": [1684571, None], + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_dup4_free_text_cx(genomic_dup4_free_text_subject): + """Create a test fixture for genomic dup copy number change.""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.A7iAltzEFjlPJPCBfdMwjsis-vt51o3L", + "location": genomic_dup4_free_text_subject, + "copyChange": "efo:0030070", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_dup4_free_text_cn(genomic_dup4_free_text_subject): + """Create a test fixture for genomic dup copy number count.""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.hnSfHkympkuTJQlfJHjHUqvUMU-EM2_Z", + "location": genomic_dup4_free_text_subject, + "copies": 3, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_dup5_cx(genomic_dup5_loc): + """Create a test fixture for genomic dup5 copy number change.""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.6KvwSUu1Vp3FcC2VzbZxqpLAouOMCPi9", + "location": genomic_dup5_loc, + "copyChange": "efo:0030070", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_dup5_cn(genomic_dup5_loc): + """Create a test fixture for genomic dup5 copy number count.""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.nlDhmSyOYeLZ8Fv2_F0niIraPbHUvpOU", + "location": genomic_dup5_loc, + "copies": 3, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_dup6_cx(genomic_dup6_loc): + """Create a test fixture for genomic dup copy number change.""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.yMUFkF1QBwq3mA2tUS8wTLH3--dEHJJD", + "location": genomic_dup6_loc, + "copyChange": "efo:0030070", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_dup6_cn(genomic_dup6_loc): + """Create a test fixture for genomic dup copy number count.""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.KSFn5KQIPuPVJ6FjWaF0vzl7eRwwHbX9", + "location": genomic_dup6_loc, + "copies": 2, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_del1_lse(genomic_del1_seq_loc): + """Create a test fixture for genomic del LSE.""" + params = { + "type": "Allele", + "id": "ga4gh:VA.gztc0BFS6p5V1_QVnEYIJ6DwzZQeDCd2", + "location": genomic_del1_seq_loc, + "state": { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 1, + "length": 0, + "sequence": "", + }, + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def genomic_del1_cx(genomic_del1_seq_loc): + """Create a test fixture for genomic del copy number change.""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.LWRBNtBgcETMXEKezrr7WUPjO9WoOaqL", + "location": genomic_del1_seq_loc, + "copyChange": "efo:0030064", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del1_rle(genomic_del1_seq_loc): + """Create a test fixture for genomic del RSE.""" + params = { + "type": "Allele", + "id": "ga4gh:VA.Kg0FrJBjKRtIDsIKO0LxAwOPiXIOowoc", + "location": genomic_del1_seq_loc, + "state": { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 1, + "length": 2, + }, + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def genomic_del1_free_text_seq_loc(): + """Create genomic del1 free text sequence location""" + return { + "id": "ga4gh:SL.072FoTQ7ZWLfOOOdyTI3Vj5pc2qwDii6", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.xBKOKptLLDr-k4hTyCetvARn16pDS_rW", + }, + "start": 557, + "end": 558, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_del1_free_text_lse(genomic_del1_free_text_seq_loc): + """Create a test fixture for genomic del LSE.""" + params = { + "type": "Allele", + "id": "ga4gh:VA.8xbobLhnVeBLQ6ANUur7BcPNdXrLsSja", + "location": genomic_del1_free_text_seq_loc, + "state": { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 1, + "length": 0, + "sequence": "", + }, + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def genomic_del1_free_text_cn(genomic_del1_free_text_seq_loc): + """Create a test fixture for genomic del copy number count.""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.OakJW5ITpO4m1ffP4tGoBK72_IIqEBM6", + "location": genomic_del1_free_text_seq_loc, + "copies": 1, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_del1_free_text_rle(genomic_del1_free_text_seq_loc): + """Create a test fixture for genomic del RSE.""" + params = { + "type": "Allele", + "id": "ga4gh:VA.XZMMF_xhn76bLMxN5RnewNgrXkYuK-ni", + "location": genomic_del1_free_text_seq_loc, + "state": { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 1, + "length": 0, + }, + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def genomic_del2_lse(genomic_del2_seq_loc): + """Create a test fixture for genomic del LSE.""" + params = { + "type": "Allele", + "id": "ga4gh:VA.9NmH0sRYerurt-CE6WlF9UaxZiujByIE", + "location": genomic_del2_seq_loc, + "state": { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 19, + "length": 0, + "sequence": "", + }, + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def genomic_del2_lse2(seq_loc_gt_100_bp): + """Create a test fixture for genomic del LSE where bp > 100.""" + params = { + "type": "Allele", + "id": "ga4gh:VA.1_cveYe6e74MEUt8EdTQmEtW5t6nA5bU", + "location": seq_loc_gt_100_bp, + "state": { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 201, + "length": 0, + "sequence": "", + }, + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def genomic_del2_cx(genomic_del2_seq_loc): + """Create a test fixture for genomic del copy number change.""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.xOZeCpcgWTj-xTYJdIeXbRy8h48qfbQ5", + "location": genomic_del2_seq_loc, + "copyChange": "efo:0030069", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del2_rle(genomic_del2_seq_loc): + """Create a test fixture for genomic del RSE.""" + params = { + "type": "Allele", + "id": "ga4gh:VA.J6hHiLw-qq27H8CZ8aQRdJwBGHqd3BvB", + "location": genomic_del2_seq_loc, + "state": { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 19, + "length": 0, + }, + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def genomic_del2_free_text_seq_loc(): + """Create genomic del2 free text sequence location""" + return { + "id": "ga4gh:SL.b06yJ2UPwSSo-4bmYE8ZqHkDfo6_KZuu", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.xBKOKptLLDr-k4hTyCetvARn16pDS_rW", + }, + "start": 491, + "end": 510, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_del2_free_text_default(genomic_del2_free_text_seq_loc): + """Create a test fixture for genomic del default and LSE.""" + params = { + "type": "Allele", + "id": "ga4gh:VA.ZmmZ_3it-b0nl8pdxaIG5ROYwTYhhRfk", + "location": genomic_del2_free_text_seq_loc, + "state": { + "type": "ReferenceLengthExpression", + "repeatSubunitLength": 19, + "length": 0, + "sequence": "", + }, + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def genomic_del2_free_text_cnv(genomic_del2_free_text_seq_loc): + """Create a test fixture for genomic del CNV.""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.DdDmUshRGGSxugHHqGI8agdffFmvwjFm", + "location": genomic_del2_free_text_seq_loc, + "copies": 1, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_del3_cx(genomic_del3_dup3_loc_not_normalized): + """Create a test fixture for genomic del copy number change.""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.BWTPMUku6nwuWhULJogKyxEk64XDIYGm", + "location": genomic_del3_dup3_loc_not_normalized, + "copyChange": "efo:0030067", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del3_free_text_subject(): + """Create test fixture for genomic del3 free text location""" + return { + "id": "ga4gh:SL.5_TZXeJhFejft3jmfkqdNutVO2tenSeB", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + }, + "start": [68839264, 68839267], + "end": [68841120, 68841125], + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_del3_free_text_cx(genomic_del3_free_text_subject): + """Create a test fixture for genomic del copy number change.""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.SS7Ywi8yq2fb7acAdbs1a-H6ELw4QxLy", + "location": genomic_del3_free_text_subject, + "copyChange": "efo:0030067", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del3_free_text_cn(genomic_del3_free_text_subject): + """Create a test fixture for genomic del copy number count.""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.nYr-z9MXHGx8p3hP0Wht3WDw0gju9QDL", + "location": genomic_del3_free_text_subject, + "copies": 2, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_del4_cx(genomic_del4_seq_loc): + """Create a test fixture for genomic del copy number change.""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.1DiUzraiKZLJb8oF8ynARS816fthsJpV", + "location": genomic_del4_seq_loc, + "copyChange": "efo:0030067", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del4_cn(genomic_del4_seq_loc): + """Create a test fixture for genomic del copy number count.""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.6RKML7P4zTx1U8EpJ1q7L23OXDEKFihS", + "location": genomic_del4_seq_loc, + "copies": 1, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_del4_free_text_subject(): + """Create test fixture for genomic del4 free text location""" + return { + "id": "ga4gh:SL.ebOW5blAtyPPVH512rIYi6cGsyKI2990", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.pnAqCRBrTsUoBghSD1yp_jXWSmlbdh4g", + }, + "start": [None, 227022027], + "end": [227025830, None], + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_del4_free_text_cx(genomic_del4_free_text_subject): + """Create a test fixture for genomic del copy number change.""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.iI0rwBfPht6XCBhYPsfWUUbTwDIRycFi", + "location": genomic_del4_free_text_subject, + "copyChange": "efo:0030067", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del4_free_text_cn(genomic_del4_free_text_subject): + """Create a test fixture for genomic del copy number count.""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.ps-GD4HSeJZjxnS1dhQrn4ntJFaA97a3", + "location": genomic_del4_free_text_subject, + "copies": 1, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_uncertain_del_2(): + """Create a genomic uncertain deletion on chr 2 test fixture.""" + params = { + "id": "ga4gh:CX.q3OPPp2fWM5uM60RNHY_jDThCyxV3URW", + "location": { + "id": "ga4gh:SL.aRKiRW6-lS9CCLfcPJpQIGihZqoIOCZ_", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.pnAqCRBrTsUoBghSD1yp_jXWSmlbdh4g", + }, + "start": [None, 110104899], + "end": [110207160, None], + "type": "SequenceLocation", + }, + "copyChange": "efo:0030067", + "type": "CopyNumberChange", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_uncertain_del_y(): + """Create a genomic uncertain deletion on chr Y test fixture.""" + params = { + "id": "ga4gh:CX.vR12PHS1zCnoYUi9CSX3ZwhGG38xa-RA", + "location": { + "id": "ga4gh:SL.N44ez-5301ZoNdLoiblcUvm__BS4-4Jv", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + }, + "start": [None, 14076801], + "end": [57165209, None], + "type": "SequenceLocation", + }, + "copyChange": "efo:0030067", + "type": "CopyNumberChange", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del5_cn_var(genomic_del5_seq_loc): + """Create genomic del5 copy number count""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.B2qGS47pqvyvBjFRQEj3MjdsqfXpnhhC", + "location": genomic_del5_seq_loc, + "copies": 3, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_del5_cx_var(genomic_del5_seq_loc): + """Create genomic del5 copy number change""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.oUhToHxDGpH5NkuFaQmKTmbijF9z_Esb", + "location": genomic_del5_seq_loc, + "copyChange": "efo:0030067", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del6_cx_var(genomic_del6_seq_loc): + """Create genomic del6 copy number change""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.gmwszbknrMmklvVuu2yOqu5nOKV_fp72", + "location": genomic_del6_seq_loc, + "copyChange": "efo:0030067", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del6_cn_var(genomic_del6_seq_loc): + """Create genomic del6 copy number count""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.CZEc44pX7Dh9yJARvvz6EW9oQvgkbwYf", + "location": genomic_del6_seq_loc, + "copies": 1, + } + return models.CopyNumberCount(**params) + + +def no_variation_check(resp, q): + """Check that variation is None in normalize response""" + assert resp.variation is None, q + + +@pytest.mark.asyncio +async def invalid_query_list_checks(query_list, test_handler): + """Check that invalid queries in query list do not normalize""" + for q in query_list: + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + no_variation_check(resp, q) + + +@pytest.mark.asyncio +async def test_genomic_dup1( + test_handler, + genomic_dup1_lse, + genomic_dup1_38_cn, + genomic_dup1_cx, + genomic_dup1_free_text_lse, + genomic_dup1_free_text_cn, +): + """Test that genomic duplication works correctly.""" + # https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/allele?hgvsOrDescriptor=NC_000003.12%3Ag.49531262dup + q = "NC_000003.12:g.49531262dup" # 38 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup1_lse) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup1_lse) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_dup1_38_cn) + + resp = await test_handler.normalize( + q, + HGVSDupDelModeOption.COPY_NUMBER_CHANGE, + copy_change=models.CopyChange.EFO_0030072, + ) + assertion_checks(resp, genomic_dup1_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + assertion_checks(resp, genomic_dup1_lse) + + q = "NC_000003.11:g.49568695dup" # 37 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup1_lse) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_dup1_38_cn) + + resp = await test_handler.normalize( + q, + HGVSDupDelModeOption.COPY_NUMBER_CHANGE, + copy_change=models.CopyChange.EFO_0030072, + ) + assertion_checks(resp, genomic_dup1_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + assertion_checks(resp, genomic_dup1_lse) + + # Free Text + for q in ["DAG1 g.49568695dup", "DAG1 g.49531262dup"]: # 37 # 38 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup1_free_text_lse) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup1_free_text_lse) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_dup1_free_text_cn) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + assertion_checks(resp, genomic_dup1_free_text_lse) + + # Invalid + invalid_queries = [ + "NC_000007.13:g.159138670dup", + "NC_000007.14:g.159345976dup", + "BRAF g.140219337dup", + "BRAF g.141024929dup", + ] + await invalid_query_list_checks(invalid_queries, test_handler) + + +@pytest.mark.asyncio +async def test_genomic_dup2( + test_handler, + genomic_dup2_lse, + genomic_dup2_38_cn, + genomic_dup2_cx, + genomic_dup2_free_text_default, + genomic_dup2_free_text_cn, + genomic_dup2_rle2, +): + """Test that genomic duplication works correctly.""" + # https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/allele?hgvsOrDescriptor=NM_004006.2%3Ac.20_23dup + q = "NC_000023.11:g.33211290_33211293dup" # 38 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup2_lse) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_dup2_38_cn) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE) + assertion_checks(resp, genomic_dup2_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + assertion_checks(resp, genomic_dup2_lse) + + q = "NC_000023.10:g.33229407_33229410dup" # 37 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup2_lse) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_dup2_38_cn) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE) + assertion_checks(resp, genomic_dup2_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + assertion_checks(resp, genomic_dup2_lse) + + # Free text + for q in ["DMD g.33211290_33211293dup", "DMD g.33229407_33229410dup"]: # 37 # 38 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup2_free_text_default) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_dup2_free_text_cn) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + assertion_checks(resp, genomic_dup2_free_text_default) + + # Greater than 100 bps -> rse + q = "NC_000023.11:g.33211290_33211490dup" + resp = await test_handler.normalize(q) + assertion_checks(resp, genomic_dup2_rle2) + + # Invalid + invalid_queries = [ + "NC_000007.13:g.140413127_159138670dup", + "NC_000007.14:g.140413127_159345976dup", + "BRAF g.140219337_140924929dup", + "BRAF g.140719326_141024929dup", + ] + await invalid_query_list_checks(invalid_queries, test_handler) + + +@pytest.mark.asyncio +async def test_genomic_dup3( + test_handler, + genomic_dup3_cx, + genomic_del3_dup3_cn_38, + genomic_dup3_free_text_cn, + genomic_dup3_free_text_cx, +): + """Test that genomic duplication works correctly.""" + q = "NC_000023.11:g.(31060227_31100351)_(33274278_33417151)dup" # 38 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup3_cx) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=1 + ) + assertion_checks(resp, genomic_del3_dup3_cn_38) + + resp = await test_handler.normalize( + q, + HGVSDupDelModeOption.COPY_NUMBER_CHANGE, + copy_change=models.CopyChange.EFO_0030070, + ) + assertion_checks(resp, genomic_dup3_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + q = "NC_000023.10:g.(31078344_31118468)_(33292395_33435268)dup" # 37 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup3_cx) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=1 + ) + assertion_checks(resp, genomic_del3_dup3_cn_38) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE) + assertion_checks(resp, genomic_dup3_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + # Free Text + for q in ["DMD g.(31147274_31147278)_(31182737_31182739)dup"]: # 38 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup3_free_text_cx) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=3 + ) + assertion_checks(resp, genomic_dup3_free_text_cn) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + # Invalid + invalid_queries = [ + "NC_000023.10:g.(31119221_31119227)_(31119300_155270562)dup", + "NC_000023.11:g.(31119221_31119227)_(31119300_156040899)dup", + "DMD g.(31060227_31100351)_(33274278_33417151)dup", + ] + await invalid_query_list_checks(invalid_queries, test_handler) + + +@pytest.mark.asyncio +async def test_genomic_dup4( + test_handler, + genomic_dup4_cn, + genomic_dup4_cx, + genomic_dup4_free_text_cn, + genomic_dup4_free_text_cx, +): + """Test that genomic duplication works correctly.""" + q = "NC_000020.11:g.(?_30417576)_(31394018_?)dup" # 38 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup4_cx) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_dup4_cn) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE) + assertion_checks(resp, genomic_dup4_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + q = "NC_000020.10:g.(?_29652252)_(29981821_?)dup" # 37 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup4_cx) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_dup4_cn) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE) + assertion_checks(resp, genomic_dup4_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + # Free Text + for q in [ + "PRPF8 g.(?_1577736)_(1587865_?)dup", # 37 + "PRPF8 g.(?_1674442)_(1684571_?)dup", # 38 + ]: + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup4_free_text_cx) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_dup4_free_text_cn) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + # Invalid + invalid_queries = [ + "NC_000020.10:g.(?_29652252)_(63025530_?)dup", + "NC_000020.11:g.(?_29652252)_(64444169_?)dup", + "PRPF8 g.(?_1650628)_(1684571_?)dup", + ] + await invalid_query_list_checks(invalid_queries, test_handler) + + +@pytest.mark.asyncio +async def test_genomic_dup5( + test_handler, + genomic_dup5_cn, + genomic_dup5_cx, +): + """Test that genomic duplication works correctly.""" + q = "NC_000023.11:g.(?_154021812)_154092209dup" # 38 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup5_cx) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_dup5_cn) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE) + assertion_checks(resp, genomic_dup5_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + q = "NC_000023.10:g.(?_153287263)_153357667dup" # 37 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup5_cx) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_dup5_cn) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE) + assertion_checks(resp, genomic_dup5_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + # Free Text + for q in [ + "MECP2 g.(?_153287263)_153357667dup", # 37 + "MECP2 g.(?_154021812)_154092209dup", # 38 + ]: + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup5_cx) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_dup5_cn) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + # Invalid + for q in [ + "NC_000023.10:g.(?_153287263)_155270561dup", + "NC_000023.11:g.(?_154021812)_156040896dup", + "MECP2 g.(?_154021812)_154097733dup" # 37 + "MECP2 g.(?_154021572)_154092209dup", # 38 + ]: + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assert resp.variation is None, q + + +@pytest.mark.asyncio +async def test_genomic_dup6( + test_handler, + genomic_dup6_cn, + genomic_dup6_cx, +): + """Test that genomic duplication works correctly.""" + q = "NC_000023.11:g.154021812_(154092209_?)dup" # 38 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup6_cx) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=1 + ) + assertion_checks(resp, genomic_dup6_cn) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE) + assertion_checks(resp, genomic_dup6_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + q = "NC_000023.10:g.153287263_(153357667_?)dup" # 37 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup6_cx) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=1 + ) + assertion_checks(resp, genomic_dup6_cn) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE) + assertion_checks(resp, genomic_dup6_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + # Free Text + for q in [ + "MECP2 g.153287263_(153357667_?)dup", # 37 + "MECP2 g.154021812_(154092209_?)dup", # 38 + ]: + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_dup6_cx) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=1 + ) + assertion_checks(resp, genomic_dup6_cn) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + # Invalid + for q in [ + "NC_000023.10:g.153287263_(155270561_?)dup", + "NC_000023.11:g.154021812_(156040896_?)dup", + "MECP2 g.154021812_(154097733_?)dup" # 37 + "MECP2 g.154021572_(154092209_?)dup", # 38 + ]: + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assert resp.variation is None, q + + +@pytest.mark.asyncio +async def test_genomic_del1( + test_handler, + genomic_del1_lse, + genomic_del1_38_cn, + genomic_del1_cx, + genomic_del1_free_text_lse, + genomic_del1_free_text_cn, +): + """Test that genomic deletion works correctly.""" + q = "NC_000003.12:g.10149811del" # 38 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_del1_lse) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_del1_38_cn) + + resp = await test_handler.normalize( + q, + HGVSDupDelModeOption.COPY_NUMBER_CHANGE, + copy_change=models.CopyChange.EFO_0030064, + ) + assertion_checks(resp, genomic_del1_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + assertion_checks(resp, genomic_del1_lse) + + q = "NC_000003.11:g.10191495del" # 37 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_del1_lse) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_del1_38_cn) + + resp = await test_handler.normalize( + q, + HGVSDupDelModeOption.COPY_NUMBER_CHANGE, + copy_change=models.CopyChange.EFO_0030064, + ) + assertion_checks(resp, genomic_del1_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + assertion_checks(resp, genomic_del1_lse) + + # Free text + for q in ["VHL g.10191495del", "VHL g.10149811del"]: # 37 # 38 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_del1_free_text_lse) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_del1_free_text_cn) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + assertion_checks(resp, genomic_del1_free_text_lse) + + # Invalid + invalid_queries = [ + "NC_000003.11:g.198022431del", + "NC_000003.12:g.198295567del", + "BRAF g.140413127del", + "BRAF g.141024929del", + ] + await invalid_query_list_checks(invalid_queries, test_handler) + + +@pytest.mark.asyncio +async def test_genomic_del2( + test_handler, + genomic_del2_lse, + genomic_del2_38_cn, + genomic_del2_cx, + genomic_del2_free_text_default, + genomic_del2_free_text_cnv, + genomic_del2_lse2, +): + """Test that genomic deletion works correctly.""" + q = "NC_000003.12:g.10146595_10146613del" # 38 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_del2_lse) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_del2_38_cn) + + resp = await test_handler.normalize( + q, + HGVSDupDelModeOption.COPY_NUMBER_CHANGE, + copy_change=models.CopyChange.EFO_0030069, + ) + assertion_checks(resp, genomic_del2_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + assertion_checks(resp, genomic_del2_lse) + + q = "NC_000003.11:g.10188279_10188297del" # 37 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_del2_lse) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_del2_38_cn) + + resp = await test_handler.normalize( + q, + HGVSDupDelModeOption.COPY_NUMBER_CHANGE, + copy_change=models.CopyChange.EFO_0030069, + ) + assertion_checks(resp, genomic_del2_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + assertion_checks(resp, genomic_del2_lse) + + # Free text + for q in ["VHL g.10188279_10188297del", "VHL g.10146595_10146613del"]: # 37 # 38 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_del2_free_text_default) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_del2_free_text_cnv) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + assertion_checks(resp, genomic_del2_free_text_default) + + # Check that del > 100 bps returns LSE + q = "NC_000023.11:g.33211290_33211490del" + resp = await test_handler.normalize(q) + assertion_checks(resp, genomic_del2_lse2) + + # gnomad vcf + q = "3-10146594-AATGTTGACGGACAGCCTAT-A" + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_del2_lse) + + q = "3-10188278-AATGTTGACGGACAGCCTAT-A" + resp = await test_handler.normalize(q) + assertion_checks(resp, genomic_del2_lse) + + # Invalid + invalid_queries = [ + "NC_000003.12:g.10146595_198295580del", + "NC_000003.11:g.198022435_198022437del", + "BRAF g.140413127_140419136del", + "BRAF g.140719326_141024929del", + ] + await invalid_query_list_checks(invalid_queries, test_handler) + + +@pytest.mark.asyncio +async def test_genomic_del3( + test_handler, + genomic_del3_dup3_cn_38, + genomic_del3_cx, + genomic_del3_free_text_cn, + genomic_del3_free_text_cx, +): + """Test that genomic deletion works correctly.""" + q = "NC_000023.11:g.(31060227_31100351)_(33274278_33417151)del" # 38 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_del3_cx) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=3 + ) + assertion_checks(resp, genomic_del3_dup3_cn_38) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE) + assertion_checks(resp, genomic_del3_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + q = "NC_000023.10:g.(31078344_31118468)_(33292395_33435268)del" # 37 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_del3_cx) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=3 + ) + assertion_checks(resp, genomic_del3_dup3_cn_38) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE) + assertion_checks(resp, genomic_del3_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + # Free Text + for q in [ + "EFNB1 g.(68059108_68059111)_(68060963_68060968)del", # 37 + "EFNB1 g.(68839265_68839268)_(68841120_68841125)del", # 38 + ]: + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_del3_free_text_cx) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=3 + ) + assertion_checks(resp, genomic_del3_free_text_cn) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + # Invalid + invalid_queries = [ + "NC_000023.11:g.(156040880_156040883)_(156040896_156040899)del", + "NC_000023.10:g.(155270550_155270555)_(155270560_155270562)del", + "EFNB1 g.(68048863_68048870)_(68842150_68842152)del", # 37 + "EFNB1 g.(68829022_68829030)_(68842150_68842161)del", # 38 + ] + await invalid_query_list_checks(invalid_queries, test_handler) + + +@pytest.mark.asyncio +async def test_genomic_del4( + test_handler, + genomic_del4_cn, + genomic_del4_cx, + genomic_uncertain_del_2, + genomic_uncertain_del_y, + genomic_del4_free_text_cn, + genomic_del4_free_text_cx, +): + """Test that genomic deletion works correctly.""" + q = "NC_000023.11:g.(?_31120496)_(33339477_?)del" # 38 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_del4_cx) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_del4_cn) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE) + assertion_checks(resp, genomic_del4_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + q = "NC_000023.10:g.(?_31138613)_(33357594_?)del" # 37 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_del4_cx) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_del4_cn) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE) + assertion_checks(resp, genomic_del4_cx) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + q = "NC_000002.12:g.(?_110104900)_(110207160_?)del" + resp = await test_handler.normalize(q) + assertion_checks(resp, genomic_uncertain_del_2) + + q = "NC_000024.10:g.(?_14076802)_(57165209_?)del" + resp = await test_handler.normalize(q) + assertion_checks(resp, genomic_uncertain_del_y) + + # Free Text + for q in ["COL4A4 g.(?_227022028)_(227025830_?)del"]: # 38 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_del4_free_text_cx) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_del4_free_text_cn) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + # Invalid + invalid_queries = [ + "NC_000023.11:g.(?_156040899)_(156040900_?)del", + "NC_000024.10:g.(?_155270565)_(155270568_?)del", + "COL4A4 g.(?_227002710)_(227003710_?)del", + "COL4A4 g.(?_227867430)_(228029276_?)del", + ] + await invalid_query_list_checks(invalid_queries, test_handler) + + +@pytest.mark.asyncio +async def test_genomic_del5( + test_handler, + genomic_del5_cn_var, + genomic_del5_cx_var, +): + """Test that genomic deletion works correctly.""" + q = "NC_000023.11:g.(?_18575354)_18653629del" # 38 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_del5_cx_var) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=4 + ) + assertion_checks(resp, genomic_del5_cn_var) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE) + assertion_checks(resp, genomic_del5_cx_var) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + q = "NC_000023.10:g.(?_18593474)_18671749del" # 37 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_del5_cx_var) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=4 + ) + assertion_checks(resp, genomic_del5_cn_var) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE) + assertion_checks(resp, genomic_del5_cx_var) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + # Free text + for q in ["CDKL5 g.(?_18575354)_18653629del"]: + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_del5_cx_var) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=4 + ) + assertion_checks(resp, genomic_del5_cn_var) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + # Invalid + invalid_queries = [ + "NC_000023.10:g.(?_155270550)_155270570del", + "NC_000023.11:g.(?_18593474)_18671749del" + "CDKL5 g.(?_18443702)_18671700del", # 37 + "CDKL5 g.(?_18425585)_18653631del", # 38 + "CDKL5 g.(?_18425582)_18653500del", # 38 + ] + await invalid_query_list_checks(invalid_queries, test_handler) + + +@pytest.mark.asyncio +async def test_genomic_del6( + test_handler, + genomic_del6_cn_var, + genomic_del6_cx_var, +): + """Test that genomic deletion works correctly.""" + q = "NC_000006.12:g.133462764_(133464858_?)del" # 38 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_del6_cx_var) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_del6_cn_var) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE) + assertion_checks(resp, genomic_del6_cx_var) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + q = "NC_000006.11:g.133783902_(133785996_?)del" # 37 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_del6_cx_var) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_del6_cn_var) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_CHANGE) + assertion_checks(resp, genomic_del6_cx_var) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + # Free text + for q in ["EYA4 g.133462764_(133464858_?)del"]: # 38 + resp = await test_handler.normalize(q, HGVSDupDelModeOption.DEFAULT) + assertion_checks(resp, genomic_del6_cx_var) + + resp = await test_handler.normalize( + q, HGVSDupDelModeOption.COPY_NUMBER_COUNT, baseline_copies=2 + ) + assertion_checks(resp, genomic_del6_cn_var) + + resp = await test_handler.normalize(q, HGVSDupDelModeOption.ALLELE) + no_variation_check(resp, q) + + # Invalid + invalid_queries = [ + "NC_000006.11:g.171115069_(171115080_?)del", + "NC_000006.12:g.170805981_(170805989_?)del" + "EYA4 g.133561700_(133853270_?)del", # 37 + "EYA4 g.133561651_(133561708_?)del", # 37 + "EYA4 g.133240513_(133240600_?)del", # 38 + "EYA4 g.133240515_(133532130_?)del", # 38 + ] + await invalid_query_list_checks(invalid_queries, test_handler) + + +@pytest.mark.asyncio +async def test_parameters(test_handler): + """Check that valid and invalid parameters work as intended.""" + resp = await test_handler.normalize("7-140453136-A-T") + assert resp.variation + assert resp.warnings == [] + + q = "NC_000003.12:g.49531262dup" + resp = await test_handler.normalize(q) + assert resp.variation + assert resp.warnings == [] + + resp = await test_handler.normalize(q, hgvs_dup_del_mode=None) + assert resp.variation + assert resp.warnings == [] + + resp = await test_handler.normalize( + q, hgvs_dup_del_mode=HGVSDupDelModeOption.COPY_NUMBER_COUNT + ) + assert resp.variation is None + assert resp.warnings == ["copy_number_count mode requires `baseline_copies`"] diff --git a/tests/test_normalize.py b/tests/test_normalize.py new file mode 100644 index 0000000..066da0f --- /dev/null +++ b/tests/test_normalize.py @@ -0,0 +1,1004 @@ +"""Module for testing the normalize endpoint.""" +from datetime import datetime + +import pytest +from ga4gh.vrs import models + +from tests.conftest import assertion_checks +from variation.main import normalize as normalize_get_response +from variation.main import to_vrs as to_vrs_get_response +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption + + +@pytest.fixture(scope="module") +def test_handler(test_query_handler): + """Create test fixture for normalize handler""" + return test_query_handler.normalize_handler + + +@pytest.fixture(scope="module") +def dis3_p63a(): + """Create DIS3 P63A test fixture.""" + params = { + "id": "ga4gh:VA.HSJaPh-tYk0SxPPenZ2wP9db1sTWPiEb", + "location": { + "id": "ga4gh:SL.2mNB74aM_uxrSpjVQ66vJv4l60QqLw92", + "end": 63, + "start": 62, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.mlWsxfPKINN3o300stAI8oqN5U7P6kEu", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "A", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def tp53_g262c(): + """Create TP53 G262C test fixture.""" + params = { + "id": "ga4gh:VA.-ZtQ7h9wnK9RgVtd7YuiISNh80Mpp4c_", + "location": { + "id": "ga4gh:SL.NUBjoy9wz7qcu-eM7vDEfm2oT0OBqEhu", + "start": 261, + "end": 262, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.YIlmVwD0rxIqnlvb-8WujHPbR0j3WEGI", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "C", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def vhl(): + """Create VHL Tyr185Ter fixture.""" + params = { + "id": "ga4gh:VA.KsGULBqRCUFNA89_9LErBWStMsBIXvlt", + "location": { + "id": "ga4gh:SL.1qfXpIQd0Z4bAIpanqdhGpXmFd8_-Hb9", + "end": 185, + "start": 184, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.z-Oa0pZkJ6GHJHOYM7h5mY_umc0SJzTu", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "*", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def nm_004448_cdna_delins(): + """Create test fixture for NM_004448.4:c.2326_2327delinsCT.""" + params = { + "id": "ga4gh:VA.higf2Phdt6HsJIEFKtaKEb5EbTZP9tLX", + "location": { + "id": "ga4gh:SL.voK3jkwJiGfUsGVm8P_A0claqeI35Jnv", + "end": 2502, + "start": 2500, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.y9b4LVMiCXpZxOg9Xt1NwRtssA03MwWM", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "CT", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def nm_000551(): + """Create test fixture for NM_000551.4:c.615delinsAA.""" + params = { + "id": "ga4gh:VA.MseO0j0sgMt73Jdzawul0JKsxFrJWODv", + "location": { + "id": "ga4gh:SL.dFTM865y_W2iM6IPaKhW8E2ezHeo446u", + "end": 685, + "start": 684, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.xBKOKptLLDr-k4hTyCetvARn16pDS_rW", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "AA", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def braf_cdna_seq_loc(): + """Create test fixture for BRAF V600E cDNA representation sequence location""" + return { + "id": "ga4gh:SL.d6fchgxsIiR1R_4IY2lBAhE1wb9zVtrp", + "end": 2025, + "start": 2024, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.aKMPEJgmlZXt_F6gRY5cUG3THH2n-GUa", + }, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def braf_v600e_nucleotide(braf_cdna_seq_loc): + """Create a test fixture for BRAF V600E MANE select nucleotide hgvs.""" + params = { + "id": "ga4gh:VA.pL0tb7_iYp9A_opzwFMxRAPg6gTM_9A-", + "location": braf_cdna_seq_loc, + "state": {"sequence": "A", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def cdna_reference_agree(braf_cdna_seq_loc): + """Create test fixture for NM_004333.4:c.1799=.""" + params = { + "id": "ga4gh:VA.itm3XgekfKho2tZq0L_mzocuyHrx4i6c", + "location": braf_cdna_seq_loc, + "state": {"sequence": "T", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def protein_delins(): + """Create test fixture for protein delins.""" + params = { + "id": "ga4gh:VA.J291lhv1vb2iu5fti8SSymq2_dWjYvN4", + "location": { + "id": "ga4gh:SL.RKZW47OI5rv2-9gU3rJKV9dMfq7BYT_u", + "end": 751, + "start": 746, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.vyo55F6mA6n2LgN4cagcdRzOuh38V4mE", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "P", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def cdna_deletion(): + """Create test fixture for cdna deletion range with deleted + sequence. + """ + params = { + "id": "ga4gh:VA.-It83ZpjGFmgUvRk4i65-YJyD_zFWG4e", + "location": { + "id": "ga4gh:SL.5GI6gputEL8E1NDu5kxbs3smO1JO-d1a", + "end": 2453, + "start": 2437, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.y9b4LVMiCXpZxOg9Xt1NwRtssA03MwWM", + }, + "type": "SequenceLocation", + }, + "state": { + "length": 1, + "repeatSubunitLength": 15, + "sequence": "T", + "type": "ReferenceLengthExpression", + }, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def genomic_deletion(): + """Create test fixture for genomic deletion range with deleted sequence. + (CA915940709) + """ + params = { + "id": "ga4gh:VA.HUI0n5I7cmo1FqIxVLUo9edaRR5S_kME", + "location": { + "id": "ga4gh:SL.jbutnbFRaj3Y6XQexjkN12Bd7HbWlloG", + "end": 10146528, + "start": 10146524, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", + }, + "type": "SequenceLocation", + }, + "state": { + "length": "2", + "repeatSubunitLength": 2, + "sequence": "CT", + "type": "ReferenceLengthExpression", + }, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def cdna_insertion(): + """Create test fixture for coding DNA insertion.""" + params = { + "id": "ga4gh:VA.-X8kNU4aUF3MUaBkGD_G_Lusl7vuqcgg", + "location": { + "id": "ga4gh:SL.5sH0Wh00jrGB9DUlIxJUSPZ_ZraTlJ9p", + "end": 2160, + "start": 2160, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.7_mlQyDN-uWH0RlxTQFvFEv6ykd2D-xF", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "A", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def genomic_insertion(): + """Create a gene insertion test fixture.""" + params = { + "id": "ga4gh:VA.7wTialdnDHIG9DTDtJAxERngeDUGCyNk", + "location": { + "id": "ga4gh:SL.EMMJdP_rekeHOpIVVHWJSutPXdsjQqQ0", + "end": 2500, + "start": 2488, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.y9b4LVMiCXpZxOg9Xt1NwRtssA03MwWM", + }, + "type": "SequenceLocation", + }, + "state": { + "length": 24, + "repeatSubunitLength": 12, + "sequence": "TACGTGATGGCTTACGTGATGGCT", + "type": "ReferenceLengthExpression", + }, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def genomic_substitution(): + """Create a gene insertion test fixture.""" + params = { + "id": "ga4gh:VA.6KE54LHahQUK8rWz_dp1iU_aliXqW93B", + "location": { + "id": "ga4gh:SL.qW8hZuSITudo3OlBahEwI-dSIvRKN9jQ", + "end": 2630, + "start": 2629, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.d_QsP29RWJi6bac7GOC9cJ9AO7s_HUMN", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "T", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def gnomad_vcf_genomic_sub_mnv(): + """Create a genomic substitution mnv test fixture for 5-112175770-GGAA-AGAA.""" + params = { + "id": "ga4gh:VA.naygzq3x2gWaX4NfCXcT5aJyxaGKAwZ3", + "location": { + "id": "ga4gh:SL.L0moV8BjwdeSYkLsxLPhkJLG85x3hkKb", + "end": 112840073, + "start": 112840072, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "A", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def genomic_sub_grch38(): + """Create a genomic substitution GRCh38 test fixture.""" + params = { + "id": "ga4gh:VA.OvEfBRaS34JkfM0_ZHJVDQEjqtwzyjyp", + "location": { + "id": "ga4gh:SL.ZCgOjF-_T0EOBXGc-6yICYui-jgFzJfY", + "end": 55181378, + "start": 55181377, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "T", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def grch38_braf_genom_reference_agree(): + """Create a genomic reference agree GRCh38 test fixture for BRAF.""" + params = { + "id": "ga4gh:VA.J8DRaIofpFLaS3HE_C1xGLHoWje5INuQ", + "location": { + "id": "ga4gh:SL.XutGzMvqbzN-vnxmPt2MJf7ehxmB0opi", + "end": 140753336, + "start": 140753335, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "A", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def grch38_genomic_delins1(): + """Create a test fixture for NC_000007.13:g.140453135_140453136delinsAT.""" + params = { + "id": "ga4gh:VA.nojfgZgtcwQ9Ylm0GuBuGnUT7Ug-_AKX", + "location": { + "id": "ga4gh:SL.jW40gDuxQ9chCROKZs12FE7cHlX538EU", + "end": 140753336, + "start": 140753334, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "AT", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def grch38_genomic_delins2(): + """Create a test fixture for NC_000003.12:g.10149938delinsAA.""" + params = { + "id": "ga4gh:VA.m3N5EEDcudS3thzXlwRDxiffKNgGukzv", + "location": { + "id": "ga4gh:SL.acAqImEWvHwbUHaJi7L8yOyrSsc1DlW-", + "start": 10149937, + "end": 10149938, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "AA", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def genomic_delins_gene(): + """Create a test fixture for BRAF g.140453135_140453136delinsAT (CA16602419).""" + params = { + "id": "ga4gh:VA.oEPIg5_z6DZXl2ak6EQWzyIrDB1j5ylc", + "location": { + "id": "ga4gh:SL.mD0_LS4ja0LTofG0ovgdxfbIv5ho8huY", + "start": 2024, + "end": 2026, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.aKMPEJgmlZXt_F6gRY5cUG3THH2n-GUa", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "AT", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def gnomad_vcf_genomic_delins1(): + """Create a test fixture for 3-37050340-AAAAGCTTTA-GAGGCTTT. + + https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/ + allele?hgvsOrDescriptor=NM_000249.3%3Ac.489_498delinsGAGGCTTT + """ + params = { + "id": "ga4gh:VA.k08iD4Yuq6YE2TGJDdk-8ZGI1N7q17NI", + "location": { + "id": "ga4gh:SL.eHVQX0JhZML5VLIx_1Pn7biJCq7QmSQv", + "start": 37008848, + "end": 37008858, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "GAGGCTTT", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def gnomad_vcf_genomic_delins2(): + """Create a test fixture for 16-68846036-AG-TGAGTTT (CA396459910)""" + params = { + "id": "ga4gh:VA.FRahr9wBmzpiO9mWEvU0HuLln9VL56UO", + "location": { + "id": "ga4gh:SL.jZ6Tcqgap6uclFwtcAQfWIUWTz8-mBuj", + "start": 68812132, + "end": 68812134, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.yC_0RBj3fgBlvgyAuycbzdubtLxq-rE0", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "TGAGTTT", "type": "LiteralSequenceExpression"}, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def gnomad_vcf_genomic_delins3(): + """Create a test fixture for X-70350063-AG-AGGCAGCGCATAAAGCGCATTCTCCG""" + params = { + "id": "ga4gh:VA.g2Lk1KFnr5zaMINhYI98tvHTsA8YVLVw", + "location": { + "id": "ga4gh:SL.B_U4dRIuJb_rHIMBWajTHkGdb_yTZyZZ", + "start": 71130213, + "end": 71130215, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.w0WZEvgJF0zf_P4yyTzjjv9oW1z61HHP", + }, + "type": "SequenceLocation", + }, + "state": { + "length": 26, + "repeatSubunitLength": 24, + "sequence": "GGCAGCGCATAAAGCGCATTCTCCGG", + "type": "ReferenceLengthExpression", + }, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def gnomad_vcf_genomic_delins4(): + """Create a test fixture for 1-55509715-AC-A""" + params = { + "id": "ga4gh:VA.0zpgAsfWgv-MVZvHyg6kloySRIuTnLtz", + "location": { + "id": "ga4gh:SL.JWTBfRuZF52vff0NDPeMcuwJ2-BrAszw", + "end": 55044045, + "start": 55044042, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO", + }, + "type": "SequenceLocation", + }, + "state": { + "length": 2, + "repeatSubunitLength": 1, + "sequence": "CC", + "type": "ReferenceLengthExpression", + }, + "type": "Allele", + } + return models.Allele(**params) + + +@pytest.fixture(scope="module") +def gnomad_vcf_genomic_delins5(): + """Create test fixture for 17-7578455-CGCGG-CGCG (CA497925643)""" + params = { + "id": "ga4gh:VA.2MWzLByOm1h0sOgwUM-7UCemXWj5q66c", + "type": "Allele", + "location": { + "id": "ga4gh:SL.YHVgy44d-HtZibm08DlcHwCthT8oALxE", + "type": "SequenceLocation", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.dLZ15tNO1Ur0IcGjwc3Sdi_0A6Yf4zm7", + }, + "start": 7675139, + "end": 7675141, + }, + "state": { + "type": "ReferenceLengthExpression", + "sequence": "G", + "length": 1, + "repeatSubunitLength": 1, + }, + } + return models.Allele(**params) + + +@pytest.mark.asyncio +async def test_protein_substitution(test_handler, braf_v600e, dis3_p63a, tp53_g262c): + """Test that protein substitutions normalize correctly.""" + resp = await test_handler.normalize(" BRAF V600E ") + assertion_checks(resp, braf_v600e) + + resp = await test_handler.normalize("NP_004324.2:p.Val600Glu") + assertion_checks(resp, braf_v600e) + + resp = await test_handler.normalize("braf V512E") + assertion_checks(resp, braf_v600e) + + resp = await test_handler.normalize(" NP_001365404.1:p.Val512Glu ") + assertion_checks(resp, braf_v600e) + + resp = await test_handler.normalize("DIS3 P63A") + assertion_checks(resp, dis3_p63a) + + +@pytest.mark.asyncio +async def test_polypeptide_truncation(test_handler, vhl): + """Test that polypeptide truncations normalize correctly.""" + resp = await test_handler.normalize("NP_000542.1:p.Tyr185Ter") + assertion_checks(resp, vhl) + + +@pytest.mark.asyncio +async def test_reference_agree(test_handler, vhl_reference_agree): + """Test that reference agrees normalize correctly.""" + resp = await test_handler.normalize("NP_000542.1:p.Pro61=") + assertion_checks(resp, vhl_reference_agree) + + +@pytest.mark.asyncio +async def test_cdna_and_genomic_substitution( + test_handler, + braf_v600e_nucleotide, + genomic_substitution, + genomic_sub_grch38, + braf_v600e_genomic_sub, + gnomad_vcf_genomic_sub_mnv, +): + """Test that cdna and genomic substitutions normalize correctly.""" + resp = await test_handler.normalize("NM_004333.4:c.1799T>A") + assertion_checks(resp, braf_v600e_nucleotide) + + # MANE transcript + resp = await test_handler.normalize("ENST00000288602.10:c.1799T>A") + assertion_checks(resp, braf_v600e_nucleotide) + + resp = await test_handler.normalize("BRAF V600E c.1799T>A") + assertion_checks(resp, braf_v600e_nucleotide) + + resp = await test_handler.normalize("BRAF V600E (c.1799T>A)") + assertion_checks(resp, braf_v600e_nucleotide) + + resp = await test_handler.normalize("BRAF c.1799T>A") + assertion_checks(resp, braf_v600e_nucleotide) + + resp = await test_handler.normalize("NC_000007.13:g.140453136A>T") + assertion_checks(resp, braf_v600e_genomic_sub) + + resp = await test_handler.normalize("7-140453136-A-T") # 37 + assertion_checks(resp, braf_v600e_genomic_sub) + + resp = await test_handler.normalize("7-140753336-A-T") # 38 + assertion_checks(resp, braf_v600e_genomic_sub) + + resp = await test_handler.normalize("BRAF V600E (g.140453136A>T)") + assertion_checks(resp, braf_v600e_nucleotide) + + resp = await test_handler.normalize("BRAF g.140453136A>T") + assertion_checks(resp, braf_v600e_nucleotide) + + # More than 1 gene (EGFR and EGFR-AS1) + resp = await test_handler.normalize("NC_000007.13:g.55249071C>T") + assertion_checks(resp, genomic_sub_grch38) + + resp = await test_handler.normalize("EGFR g.55249071C>T") + assertion_checks(resp, genomic_substitution) + + # MNV genomic substitution (CA009580) + q = "5-112175770-GGAA-AGAA" + resp = await test_handler.normalize(q) + assertion_checks(resp, gnomad_vcf_genomic_sub_mnv) + + +@pytest.mark.asyncio +async def test_cdna_reference_agree(test_handler, cdna_reference_agree): + """Test that cdna Reference Agree normalizes correctly.""" + resp = await test_handler.normalize("NM_004333.4:c.1799= ") + assertion_checks(resp, cdna_reference_agree) + + resp = await test_handler.normalize("ENST00000288602.11:c.1799=") + assertion_checks(resp, cdna_reference_agree) + + resp = await test_handler.normalize("BRAF c.1799=") + assertion_checks(resp, cdna_reference_agree) + + resp = await test_handler.normalize(" BRAF V600E c.1799= ") + assertion_checks(resp, cdna_reference_agree) + + +@pytest.mark.asyncio +async def test_genomic_reference_agree( + test_handler, cdna_reference_agree, grch38_braf_genom_reference_agree +): + """Test that genomic reference agree normalizes correctly.""" + resp = await test_handler.normalize("NC_000007.13:g.140453136=") + assertion_checks( + resp, + grch38_braf_genom_reference_agree, + ) + + resp = await test_handler.normalize("7-140453136-A-A") + assertion_checks(resp, grch38_braf_genom_reference_agree) + + resp = await test_handler.normalize("7-140753336-A-A") + assertion_checks(resp, grch38_braf_genom_reference_agree) + + q = "7-140753336-ACT-ACT" + resp = await test_handler.normalize(q) + assertion_checks(resp, grch38_braf_genom_reference_agree) + + resp = await test_handler.normalize("BRAF g.140453136=") + assertion_checks(resp, cdna_reference_agree) + + +@pytest.mark.asyncio +async def test_cdna_delins(test_handler, nm_004448_cdna_delins, nm_000551): + """Test that cdna DelIns normalizes correctly.""" + resp = await test_handler.normalize(" NM_004448.4:c.2326_2327delinsCT ") + assertion_checks( + resp, + nm_004448_cdna_delins, + ) + + resp = await test_handler.normalize("NM_000551.3:c.615delinsAA") + assertion_checks(resp, nm_000551) + + +@pytest.mark.asyncio +async def test_genomic_delins( + test_handler, + grch38_genomic_delins1, + grch38_genomic_delins2, + genomic_delins_gene, + gnomad_vcf_genomic_delins1, + gnomad_vcf_genomic_delins2, + gnomad_vcf_genomic_delins3, + gnomad_vcf_genomic_delins4, + gnomad_vcf_genomic_delins5, + genomic_del1_lse, + genomic_del2_lse, +): + """Test that Genomic DelIns normalizes correctly.""" + resp = await test_handler.normalize("NC_000007.13:g.140453135_140453136delinsAT") + assertion_checks(resp, grch38_genomic_delins1) + + resp = await test_handler.normalize("NC_000003.12:g.10149938delinsAA") + assertion_checks(resp, grch38_genomic_delins2) + + q = "3-10149938-C-AA" + resp = await test_handler.normalize(q) + assertion_checks(resp, grch38_genomic_delins2) + + q = "BRAF g.140453135_140453136delinsAT" + resp = await test_handler.normalize(q) + assertion_checks(resp, genomic_delins_gene) + + # https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/ + # allele?hgvsOrDescriptor=NM_000249.3%3Ac.489_498delinsGAGGCTTT + q = "3-37050340-AAAAGCTTTA-GAGGCTTT" + resp = await test_handler.normalize(q) + assertion_checks(resp, gnomad_vcf_genomic_delins1) + + q = "16-68846036-AG-TGAGTTT" + resp = await test_handler.normalize(q) + assertion_checks(resp, gnomad_vcf_genomic_delins2) + + # NC_000023.10:g.70350063_70350064delinsAGGCAGCGCATAAAGCGCATTCTCCG + # NC_000023.10:g.70350063_70350064insGGCAGCGCATAAAGCGCATTCTCC + # https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/ + # allele?hgvsOrDescriptor=NC_000023.11%3Ag.71130213_71130214insGGCAGCGCATAAAGCGCATTCTCC noqa: E501 + q = "X-70350063-AG-AGGCAGCGCATAAAGCGCATTCTCCG" + resp = await test_handler.normalize(q) + assertion_checks(resp, gnomad_vcf_genomic_delins3) + + # CA523275412 + q = "1-55509715-AC-A" + resp = await test_handler.normalize(q) + assertion_checks(resp, gnomad_vcf_genomic_delins4) + + # CA497925643 + q = "17-7578455-CGCGG-CGCG" + resp = await test_handler.normalize(q) + assertion_checks(resp, gnomad_vcf_genomic_delins5) + + q = "3-10146594-AATGTTGACGGACAGCCTAT-A" + resp = await test_handler.normalize(q) + assertion_checks(resp, genomic_del2_lse) + + q = "3-10188278-AATGTTGACGGACAGCCTAT-A" + resp = await test_handler.normalize(q) + assertion_checks(resp, genomic_del2_lse) + + q = "3-10149810-CT-C" # 38 + resp = await test_handler.normalize(q) + assertion_checks(resp, genomic_del1_lse) + + # gnomad should always return lse even if provided other hgvs dup del mode option + resp = await test_handler.normalize(q, HGVSDupDelModeOption.COPY_NUMBER_COUNT) + assertion_checks(resp, genomic_del1_lse) + + +@pytest.mark.asyncio +async def test_protein_delins(test_handler, protein_delins): + """Test that Amnio Acid DelIns normalizes correctly.""" + resp = await test_handler.normalize("NP_001333827.1:p.Leu747_Thr751delinsPro") + assertion_checks(resp, protein_delins) + + resp = await test_handler.normalize("EGFR p.Leu747_Thr751delinsPro") + assertion_checks(resp, protein_delins) + + resp = await test_handler.normalize("EGFR Leu747_Thr751delinsPro") + assertion_checks(resp, protein_delins) + + resp = await test_handler.normalize("EGFR L747_T751delinsP") + assertion_checks(resp, protein_delins) + + +@pytest.mark.asyncio +async def test_protein_deletion(test_handler, protein_deletion_np_range): + """Test that Protein Deletion normalizes correctly.""" + resp = await test_handler.normalize("NP_004439.2:p.Leu755_Thr759del") + assertion_checks(resp, protein_deletion_np_range) + + resp = await test_handler.normalize("ERBB2 p.Leu755_Thr759del") + assertion_checks(resp, protein_deletion_np_range) + + resp = await test_handler.normalize("ERBB2 Leu755_Thr759del") + assertion_checks(resp, protein_deletion_np_range) + + resp1 = await test_handler.normalize("EGFR L747_T751del") + resp2 = await test_handler.normalize("EGFR L747_T751delLREAT") + assert resp1.variation.id == resp2.variation.id + + # incorrect deleted sequence + resp = await test_handler.normalize("EGFR L747_T751delLREA") + assert resp.variation is None + + +@pytest.mark.asyncio +async def test_cdna_deletion(test_handler, cdna_deletion): + """Test that cdna deletion normalizes correctly.""" + # https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_caid?caid=CA645372623 # noqa: E501 + q = "NM_004448.3:c.2264_2278delTGAGGGAAAACACAT" + resp1 = await test_handler.normalize(q) + assertion_checks(resp1, cdna_deletion) + + # incorrected deleted sequence + resp = await test_handler.normalize("NM_004448.3:c.2264_2278delTGAGGGAAAACACTA") + assert resp.variation is None + + resp2 = await test_handler.normalize("NM_004448.3:c.2264_2278del") + assert resp1.variation.id == resp2.variation.id + + q = "ERBB2 c.2264_2278delTGAGGGAAAACACAT" + resp = await test_handler.normalize(q) + assertion_checks(resp, cdna_deletion) + + +@pytest.mark.asyncio +async def test_genomic_deletion(test_handler, genomic_deletion): + """Test that genomic deletion normalizes correctly""" + # CA915940709 + q = "NC_000003.12:g.10146527_10146528del" + resp1 = await test_handler.normalize(q) + assertion_checks(resp1, genomic_deletion) + + resp2 = await test_handler.normalize("NC_000003.12:g.10146527_10146528delCT") + assert resp2.variation.id == resp1.variation.id + + resp3 = await test_handler.normalize("3-10146526-TCT-T") + assert resp3.variation.id == resp2.variation.id + + # incorrect deleted sequence + resp = await test_handler.normalize("NC_000003.12:g.10146527_10146528delCC") + assert resp.variation is None + + +@pytest.mark.asyncio +async def test_protein_insertion(test_handler, protein_insertion): + """Test that protein insertion normalizes correctly.""" + resp = await test_handler.normalize("NP_005219.2:p.Asp770_Asn771insGlyLeu") + assertion_checks(resp, protein_insertion) + + resp = await test_handler.normalize("EGFR D770_N771insGL") + assertion_checks(resp, protein_insertion) + + resp = await test_handler.normalize("EGFR p.D770_N771insGL") + assertion_checks(resp, protein_insertion) + + resp = await test_handler.normalize("EGFR Asp770_Asn771insGlyLeu") + assertion_checks(resp, protein_insertion) + + resp = await test_handler.normalize("EGFR p.Asp770_Asn771insGlyLeu") + assertion_checks(resp, protein_insertion) + + +@pytest.mark.asyncio +async def test_cdna_insertion(test_handler, cdna_insertion): + """Test that cdna insertion normalizes correctly.""" + resp = await test_handler.normalize("ENST00000331728.9:c.2049_2050insA") + assertion_checks(resp, cdna_insertion) + + +@pytest.mark.asyncio +async def test_genomic_insertion( + test_handler, genomic_insertion, grch38_genomic_insertion_variation +): + """Test that genomic insertion normalizes correctly.""" + resp = await test_handler.normalize( + "NC_000017.10:g.37880993_37880994insGCTTACGTGATG" + ) + assertion_checks(resp, grch38_genomic_insertion_variation) + + resp = await test_handler.normalize("ERBB2 g.37880993_37880994insGCTTACGTGATG") + assertion_checks(resp, genomic_insertion) + + q = "17-37880993-G-GGCTTACGTGATG" + resp = await test_handler.normalize(q) + assertion_checks(resp, grch38_genomic_insertion_variation) + + +@pytest.mark.asyncio +async def test_amplification(test_handler, braf_amplification, prpf8_amplification): + """Test that amplification normalizes correctly.""" + q = "BRAF Amplification" + resp = await test_handler.normalize(q) + assertion_checks(resp, braf_amplification) + + # Gene with > 1 sequence location + q = "PRPF8 AMPLIFICATION" + resp = await test_handler.normalize(q) + assertion_checks(resp, prpf8_amplification) + + # Gene with no location. This should NOT return a variation + resp = await test_handler.normalize("IFNR amplification") + assert resp.variation is None + + +@pytest.mark.asyncio +async def test_valid_queries(test_handler): + """Test that valid queries don"t throw exceptions. Used for queries that + revealed bugs in service. + """ + assert await test_handler.normalize("CCND1 Y44D") + + resp = await test_handler.normalize("NC_000002.12:g.73448098_73448100delCTC") + assert resp + assert resp.variation.state.sequence.root == "CTC" + assert resp.variation.id == "ga4gh:VA.7Un0qQ6Ksg7hKCTt3xKR8VGEQuRU4jH_" + + # Test ambiguous IUPAC code N + for q in [ + "NC_000017.10:g.7572948_7572949insTTTTTTTTTNNNNN", + "NC_000007.13:g.140453136A>N", + "NC_000007.13:g.140453135_140453136delinsATN", + "NM_007294.3:c.2902_2903insTCN", + "NM_004333.4:c.1799T>N", + "NM_001289937.1:c.2326_2327delinsCTN", + ]: + resp = await test_handler.normalize(q) + assert resp.variation, q + + +@pytest.mark.asyncio +async def test_no_matches(test_handler): + """Test no matches work correctly.""" + queries = [ + "braf", # no change + "braf v600e", # incorrect case + "braf v600000932092039e", # invalid pos + "NP_000213.1:cp.Leu862=", # cp is invalid + "NP_000213.1:cp.Leu862", # cp is invalid + "BRAF V600E 33", # not supported query type + "NP_004324.2:p.Glu600Val", # not valid ref + "NP_004324.2:p.Glu600Gal", # not valid ref + "NP_004324.2839:p.Glu600Val", # not valid accession + "NP_004324.2:t.Glu600Val", # t is invalid + "this:c.54G>H", # not a valid accession + "NC_000007.13:g.4TT%20(p.Arg325Trp)", + "NG_008212.3:g.5426_5445del", # NG accessions not supported + "NC_000010.11-87925523-C-G", # invalid format + "clinvar:10", + " ", + "", + ] + for q in queries: + resp = await test_handler.normalize(q) + assert resp.variation is None + + +@pytest.mark.asyncio +async def test_service_meta(): + """Test that service meta info populates correctly.""" + response = await normalize_get_response("BRAF v600e", "default") + service_meta = response.service_meta_ + assert service_meta.name == "variation-normalizer" + assert service_meta.version + assert isinstance(service_meta.response_datetime, datetime) + assert ( + service_meta.url == "https://github.com/cancervariants/variation-normalization" + ) + + response = await normalize_get_response("this-wont-normalize", "default") + service_meta = response.service_meta_ + assert service_meta.name == "variation-normalizer" + assert service_meta.version + assert isinstance(service_meta.response_datetime, datetime) + assert ( + service_meta.url == "https://github.com/cancervariants/variation-normalization" + ) + + response = await to_vrs_get_response("this-wont-normalize") + service_meta = response.service_meta_ + assert service_meta.name == "variation-normalizer" + assert service_meta.version + assert isinstance(service_meta.response_datetime, datetime) + assert ( + service_meta.url == "https://github.com/cancervariants/variation-normalization" + ) diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py new file mode 100644 index 0000000..8ac3fb0 --- /dev/null +++ b/tests/test_tokenizer.py @@ -0,0 +1,232 @@ +"""Module for testing tokenizers""" +import pytest +import yaml + +from tests import PROJECT_ROOT +from variation.schemas.token_response_schema import ( + AmplificationToken, + CdnaDeletionToken, + CdnaDelInsToken, + CdnaInsertionToken, + CdnaReferenceAgreeToken, + CdnaSubstitutionToken, + GenomicDeletionAmbiguousToken, + GenomicDeletionToken, + GenomicDelInsToken, + GenomicDuplicationAmbiguousToken, + GenomicDuplicationToken, + GenomicInsertionToken, + GenomicReferenceAgreeToken, + GenomicSubstitutionToken, + ProteinDeletionToken, + ProteinDelInsToken, + ProteinInsertionToken, + ProteinReferenceAgreeToken, + ProteinStopGainToken, + ProteinSubstitutionToken, +) +from variation.tokenizers import ( + CdnaDeletion, + CdnaDelIns, + CdnaGenomicReferenceAgree, + CdnaInsertion, + CdnaSubstitution, + FreeTextCategorical, + GenomicDeletion, + GenomicDelIns, + GenomicDuplication, + GenomicInsertion, + GenomicSubstitution, + ProteinDeletion, + ProteinDelIns, + ProteinInsertion, + ProteinReferenceAgree, + ProteinSubstitution, +) + + +@pytest.fixture(scope="module") +def all_fixtures(): + """Create fixture for tokenizers""" + with open(f"{PROJECT_ROOT}/tests/fixtures/tokenizers.yml") as stream: + return yaml.safe_load(stream) + + +def tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token): + """Ensure that fixtures exist for fixture name and that tokenizer response matches + expected + """ + labels = ["should_match", "should_not_match"] + fixtures = all_fixtures.get(fixture_name, {labels[0]: [], labels[1]: []}) + + for label in labels: + assert fixtures[label], f"{fixture_name} has no {label} queries" + + for x in fixtures[label]: + query = x["token"] + token = tokenizer_instance().match(query) + + if label == "should_match": + assert isinstance(token, expected_token), query + else: + assert not isinstance(token, expected_token), query + + +def test_amplification(all_fixtures): + """Test that amplification tokenizer works""" + fixture_name = "amplification" + tokenizer_instance = FreeTextCategorical + expected_token = AmplificationToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) + + +def test_protein_substitution(all_fixtures): + """Test that protein substitution tokenizer works""" + fixture_name = "protein_substitution" + tokenizer_instance = ProteinSubstitution + expected_token = ProteinSubstitutionToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) + + +def test_cdna_substitution(all_fixtures): + """Test that cdna substitution tokenizer works""" + fixture_name = "cdna_substitution" + tokenizer_instance = CdnaSubstitution + expected_token = CdnaSubstitutionToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) + + +def test_genomic_substitution(all_fixtures): + """Test that genomic substitution tokenizer works""" + fixture_name = "genomic_substitution" + tokenizer_instance = GenomicSubstitution + expected_token = GenomicSubstitutionToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) + + +def test_protein_stop_gain(all_fixtures): + """Test that protein stop gain tokenizer works""" + fixture_name = "protein_stop_gain" + tokenizer_instance = ProteinSubstitution + expected_token = ProteinStopGainToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) + + +def test_protein_reference_agree(all_fixtures): + """Test that protein reference agree tokenizer works""" + fixture_name = "protein_reference_agree" + tokenizer_instance = ProteinReferenceAgree + expected_token = ProteinReferenceAgreeToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) + + +def test_cdna_reference_agree(all_fixtures): + """Test that cdna reference agree tokenizer works""" + fixture_name = "cdna_reference_agree" + tokenizer_instance = CdnaGenomicReferenceAgree + expected_token = CdnaReferenceAgreeToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) + + +def test_genomic_reference_agree(all_fixtures): + """Test that genomic reference agree tokenizer works""" + fixture_name = "genomic_reference_agree" + tokenizer_instance = CdnaGenomicReferenceAgree + expected_token = GenomicReferenceAgreeToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) + + +def test_protein_delins(all_fixtures): + """Test that protein delins tokenizer works""" + fixture_name = "protein_delins" + tokenizer_instance = ProteinDelIns + expected_token = ProteinDelInsToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) + + +def test_cdna_delins(all_fixtures): + """Test that cdna delins tokenizer works""" + fixture_name = "cdna_delins" + tokenizer_instance = CdnaDelIns + expected_token = CdnaDelInsToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) + + +def test_genomic_delins(all_fixtures): + """Test that genomic delins tokenizer works""" + fixture_name = "genomic_delins" + tokenizer_instance = GenomicDelIns + expected_token = GenomicDelInsToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) + + +def test_protein_deletion(all_fixtures): + """Test that protein deletion tokenizer works""" + fixture_name = "protein_deletion" + tokenizer_instance = ProteinDeletion + expected_token = ProteinDeletionToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) + + +def test_cdna_deletion(all_fixtures): + """Test that cdna deletion tokenizer works""" + fixture_name = "cdna_deletion" + tokenizer_instance = CdnaDeletion + expected_token = CdnaDeletionToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) + + +def test_genomic_deletion(all_fixtures): + """Test that genomic deletion tokenizer works""" + fixture_name = "genomic_deletion" + tokenizer_instance = GenomicDeletion + expected_token = GenomicDeletionToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) + + +def test_genomic_deletion_ambiguous(all_fixtures): + """Test that genomic deletion ambiguous tokenizer works""" + fixture_name = "genomic_deletion_ambiguous" + tokenizer_instance = GenomicDeletion + expected_token = GenomicDeletionAmbiguousToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) + + +def test_protein_insertion(all_fixtures): + """Test that protein insertion tokenizer works""" + fixture_name = "protein_insertion" + tokenizer_instance = ProteinInsertion + expected_token = ProteinInsertionToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) + + +def test_cdna_insertion(all_fixtures): + """Test that cdna insertion tokenizer works""" + fixture_name = "cdna_insertion" + tokenizer_instance = CdnaInsertion + expected_token = CdnaInsertionToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) + + +def test_genomic_insertion(all_fixtures): + """Test that genomic insertion tokenizer works""" + fixture_name = "genomic_insertion" + tokenizer_instance = GenomicInsertion + expected_token = GenomicInsertionToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) + + +def test_genomic_duplication(all_fixtures): + """Test that genomic duplication tokenizer works""" + fixture_name = "genomic_duplication" + tokenizer_instance = GenomicDuplication + expected_token = GenomicDuplicationToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) + + +def test_genomic_duplication_ambiguous(all_fixtures): + """Test that genomic duplication ambiguous tokenizer works""" + fixture_name = "genomic_duplication_ambiguous" + tokenizer_instance = GenomicDuplication + expected_token = GenomicDuplicationAmbiguousToken + tokenizer_checks(all_fixtures, fixture_name, tokenizer_instance, expected_token) diff --git a/tests/test_translator.py b/tests/test_translator.py new file mode 100644 index 0000000..babe94d --- /dev/null +++ b/tests/test_translator.py @@ -0,0 +1,452 @@ +"""Module for testing translators""" +import pytest +import yaml + +from tests import PROJECT_ROOT +from variation.hgvs_dup_del_mode import HGVSDupDelMode +from variation.translators import ( + Amplification, + CdnaDeletion, + CdnaDelIns, + CdnaInsertion, + CdnaReferenceAgree, + CdnaSubstitution, + GenomicDeletion, + GenomicDeletionAmbiguous, + GenomicDelIns, + GenomicDuplication, + GenomicDuplicationAmbiguous, + GenomicInsertion, + GenomicReferenceAgree, + GenomicSubstitution, + ProteinDeletion, + ProteinDelIns, + ProteinInsertion, + ProteinReferenceAgree, + ProteinStopGain, + ProteinSubstitution, +) +from variation.validate import Validate +from variation.vrs_representation import VRSRepresentation + + +@pytest.fixture(scope="module") +def all_fixtures(): + """Create fixture for translators""" + with open(f"{PROJECT_ROOT}/tests/fixtures/translators.yml") as stream: + return yaml.safe_load(stream) + + +@pytest.fixture(scope="module") +def trans_params(test_cool_seq_tool): + """Create fixture for translator params""" + vrs_representation = VRSRepresentation(test_cool_seq_tool.seqrepo_access) + hgvs_dup_del_mode = HGVSDupDelMode(test_cool_seq_tool.seqrepo_access) + return [ + test_cool_seq_tool.seqrepo_access, + test_cool_seq_tool.mane_transcript, + test_cool_seq_tool.uta_db, + vrs_representation, + hgvs_dup_del_mode, + ] + + +@pytest.fixture(scope="module") +def test_validator(val_params): + """Create fixture for validate class""" + return Validate(*val_params) + + +async def translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, +): + """Ensure that fixtures exist for fixture name and that translator response matches + expected + """ + fixtures = all_fixtures.get(fixture_name, {"tests": []}) + assert fixtures["tests"], f"{fixture_name} has no tests" + + for x in fixtures["tests"]: + query = x["query"] + expected = x["variations"] + + tokens = test_tokenizer.perform(query, []) + classification = test_classifier.perform(tokens) + validation_summary = await test_validator.perform(classification) + translations = [] + for vr in validation_summary.valid_results: + translation_result = await translator_instance(*trans_params).translate( + vr, [] + ) + vrs_variation = translation_result.vrs_variation + if vrs_variation and vrs_variation not in translations: + assert vrs_variation in expected, f"{query}: {vrs_variation['id']}" + translations.append(vrs_variation) + + assert len(translations) == len(expected), query + + +@pytest.mark.asyncio +async def test_protein_substitution( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that protein substitution validator works correctly""" + translator_instance = ProteinSubstitution + fixture_name = "protein_substitution" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) + + +@pytest.mark.asyncio +async def test_cdna_substitution( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that cdna substitution validator works correctly""" + translator_instance = CdnaSubstitution + fixture_name = "cdna_substitution" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) + + +@pytest.mark.asyncio +async def test_genomic_substitution( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that genomic substitution validator works correctly""" + translator_instance = GenomicSubstitution + fixture_name = "genomic_substitution" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) + + +@pytest.mark.asyncio +async def test_protein_stop_gain( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that protein stop gain validator works correctly""" + translator_instance = ProteinStopGain + fixture_name = "protein_stop_gain" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) + + +@pytest.mark.asyncio +async def test_protein_reference_agree( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that protein reference agree validator works correctly""" + translator_instance = ProteinReferenceAgree + fixture_name = "protein_reference_agree" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) + + +@pytest.mark.asyncio +async def test_cdna_reference_agree( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that cdna reference agree validator works correctly""" + translator_instance = CdnaReferenceAgree + fixture_name = "cdna_reference_agree" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) + + +@pytest.mark.asyncio +async def test_genomic_reference_agree( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that genomic reference agree validator works correctly""" + translator_instance = GenomicReferenceAgree + fixture_name = "genomic_reference_agree" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) + + +@pytest.mark.asyncio +async def test_protein_delins( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that protein delins validator works correctly""" + translator_instance = ProteinDelIns + fixture_name = "protein_delins" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) + + +@pytest.mark.asyncio +async def test_cdna_delins( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that cdna delins validator works correctly""" + translator_instance = CdnaDelIns + fixture_name = "cdna_delins" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) + + +@pytest.mark.asyncio +async def test_genomic_delins( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that genomic delins validator works correctly""" + translator_instance = GenomicDelIns + fixture_name = "genomic_delins" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) + + +@pytest.mark.asyncio +async def test_protein_deletion( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that protein deletion validator works correctly""" + translator_instance = ProteinDeletion + fixture_name = "protein_deletion" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) + + +@pytest.mark.asyncio +async def test_cdna_deletion( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that protein deletion validator works correctly""" + translator_instance = CdnaDeletion + fixture_name = "cdna_deletion" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) + + +@pytest.mark.asyncio +async def test_genomic_deletion( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that genomic deletion validator works correctly""" + translator_instance = GenomicDeletion + fixture_name = "genomic_deletion" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) + + +@pytest.mark.asyncio +async def test_genomic_deletion_ambiguous( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that genomic deletion ambiguous validator works correctly""" + translator_instance = GenomicDeletionAmbiguous + fixture_name = "genomic_deletion_ambiguous" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) + + +@pytest.mark.asyncio +async def test_protein_insertion( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that protein insertion validator works correctly""" + translator_instance = ProteinInsertion + fixture_name = "protein_insertion" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) + + +@pytest.mark.asyncio +async def test_cdna_insertion( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that cdna insertion validator works correctly""" + translator_instance = CdnaInsertion + fixture_name = "cdna_insertion" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) + + +@pytest.mark.asyncio +async def test_genomic_insertion( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that genomic insertion validator works correctly""" + translator_instance = GenomicInsertion + fixture_name = "genomic_insertion" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) + + +@pytest.mark.asyncio +async def test_genomic_duplication( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that genomic duplication validator works correctly""" + translator_instance = GenomicDuplication + fixture_name = "genomic_duplication" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) + + +@pytest.mark.asyncio +async def test_genomic_duplication_ambiguous( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that genomic duplication ambiguous validator works correctly""" + translator_instance = GenomicDuplicationAmbiguous + fixture_name = "genomic_duplication_ambiguous" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) + + +@pytest.mark.asyncio +async def test_amplification( + all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator +): + """Test that amplification validator works correctly""" + translator_instance = Amplification + fixture_name = "amplification" + await translator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + test_validator, + trans_params, + fixture_name, + translator_instance, + ) diff --git a/tests/test_validator.py b/tests/test_validator.py new file mode 100644 index 0000000..b97707c --- /dev/null +++ b/tests/test_validator.py @@ -0,0 +1,413 @@ +"""Module for testing validators""" +import pytest +import yaml + +from tests import PROJECT_ROOT +from variation.validators import ( + Amplification, + CdnaDeletion, + CdnaDelIns, + CdnaInsertion, + CdnaReferenceAgree, + CdnaSubstitution, + GenomicDeletion, + GenomicDeletionAmbiguous, + GenomicDelIns, + GenomicDuplication, + GenomicDuplicationAmbiguous, + GenomicInsertion, + GenomicReferenceAgree, + GenomicSubstitution, + ProteinDeletion, + ProteinDelIns, + ProteinInsertion, + ProteinReferenceAgree, + ProteinStopGain, + ProteinSubstitution, +) + + +@pytest.fixture(scope="module") +def all_fixtures(): + """Create fixture for validators""" + with open(f"{PROJECT_ROOT}/tests/fixtures/validators.yml") as stream: + return yaml.safe_load(stream) + + +async def validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, +): + """Ensure that fixtures exist for fixture name and that validator response matches + expected + """ + labels = ["should_match", "should_not_match"] + if fixture_name == "amplification": + # Amplification are always valid + labels = labels[:-1] + fixtures = all_fixtures.get(fixture_name, {labels[0]: []}) + else: + labels = ["should_match", "should_not_match"] + fixtures = all_fixtures.get(fixture_name, {labels[0]: [], labels[1]: []}) + + for label in labels: + assert fixtures[label], f"{fixture_name} has no {label} queries" + + for x in fixtures[label]: + query = x["query"] + tokens = test_tokenizer.perform(query, []) + classification = test_classifier.perform(tokens) + + try: + validation_results = await validator_instance(*val_params).validate( + classification + ) + except Exception as e: + raise Exception(f"{e}: {query}") + else: + validator_instance + is_valid = False + for vr in validation_results: + if vr.is_valid: + is_valid = True + break + + assert is_valid if label == "should_match" else not is_valid, query + + +@pytest.mark.asyncio +async def test_protein_substitution( + all_fixtures, test_tokenizer, test_classifier, val_params +): + """Test that protein substitution validator works correctly""" + fixture_name = "protein_substitution" + validator_instance = ProteinSubstitution + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) + + +@pytest.mark.asyncio +async def test_cdna_substitution( + all_fixtures, test_tokenizer, test_classifier, val_params +): + """Test that cdna substitution validator works correctly""" + fixture_name = "cdna_substitution" + validator_instance = CdnaSubstitution + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) + + +@pytest.mark.asyncio +async def test_genomic_substitution( + all_fixtures, test_tokenizer, test_classifier, val_params +): + """Test that genomic substitution validator works correctly""" + fixture_name = "genomic_substitution" + validator_instance = GenomicSubstitution + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) + + +@pytest.mark.asyncio +async def test_protein_stop_gain( + all_fixtures, test_tokenizer, test_classifier, val_params +): + """Test that protein stop gain validator works correctly""" + fixture_name = "protein_stop_gain" + validator_instance = ProteinStopGain + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) + + +@pytest.mark.asyncio +async def test_protein_reference_agree( + all_fixtures, test_tokenizer, test_classifier, val_params +): + """Test that protein reference agree validator works correctly""" + fixture_name = "protein_reference_agree" + validator_instance = ProteinReferenceAgree + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) + + +@pytest.mark.asyncio +async def test_cdna_reference_agree( + all_fixtures, test_tokenizer, test_classifier, val_params +): + """Test that cdna reference agree validator works correctly""" + fixture_name = "cdna_reference_agree" + validator_instance = CdnaReferenceAgree + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) + + +@pytest.mark.asyncio +async def test_genomic_reference_agree( + all_fixtures, test_tokenizer, test_classifier, val_params +): + """Test that genomic reference agree validator works correctly""" + fixture_name = "genomic_reference_agree" + validator_instance = GenomicReferenceAgree + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) + + +@pytest.mark.asyncio +async def test_protein_delins( + all_fixtures, test_tokenizer, test_classifier, val_params +): + """Test that protein delins validator works correctly""" + fixture_name = "protein_delins" + validator_instance = ProteinDelIns + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) + + +@pytest.mark.asyncio +async def test_cdna_delins(all_fixtures, test_tokenizer, test_classifier, val_params): + """Test that cdna delins validator works correctly""" + fixture_name = "cdna_delins" + validator_instance = CdnaDelIns + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) + + +@pytest.mark.asyncio +async def test_genomic_delins( + all_fixtures, test_tokenizer, test_classifier, val_params +): + """Test that genomic delins validator works correctly""" + fixture_name = "genomic_delins" + validator_instance = GenomicDelIns + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) + + +@pytest.mark.asyncio +async def test_protein_deletion( + all_fixtures, test_tokenizer, test_classifier, val_params +): + """Test that protein deletion validator works correctly""" + fixture_name = "protein_deletion" + validator_instance = ProteinDeletion + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) + + +@pytest.mark.asyncio +async def test_cdna_deletion(all_fixtures, test_tokenizer, test_classifier, val_params): + """Test that cdna deletion validator works correctly""" + fixture_name = "cdna_deletion" + validator_instance = CdnaDeletion + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) + + +@pytest.mark.asyncio +async def test_genomic_deletion( + all_fixtures, test_tokenizer, test_classifier, val_params +): + """Test that genomic deletion validator works correctly""" + fixture_name = "genomic_deletion" + validator_instance = GenomicDeletion + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) + + +@pytest.mark.asyncio +async def test_genomic_deletion_ambiguous( + all_fixtures, test_tokenizer, test_classifier, val_params +): + """Test that genomic deletion ambiguous validator works correctly""" + fixture_name = "genomic_deletion_ambiguous" + validator_instance = GenomicDeletionAmbiguous + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) + + +@pytest.mark.asyncio +async def test_protein_insertion( + all_fixtures, test_tokenizer, test_classifier, val_params +): + """Test that protein insertion validator works correctly""" + fixture_name = "protein_insertion" + validator_instance = ProteinInsertion + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) + + +@pytest.mark.asyncio +async def test_cdna_insertion( + all_fixtures, test_tokenizer, test_classifier, val_params +): + """Test that cdna insertion validator works correctly""" + fixture_name = "cdna_insertion" + validator_instance = CdnaInsertion + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) + + +@pytest.mark.asyncio +async def test_genomic_insertion( + all_fixtures, test_tokenizer, test_classifier, val_params +): + """Test that genomic insertion validator works correctly""" + fixture_name = "genomic_insertion" + validator_instance = GenomicInsertion + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) + + +@pytest.mark.asyncio +async def test_genomic_duplication( + all_fixtures, test_tokenizer, test_classifier, val_params +): + """Test that genomic duplication validator works correctly""" + fixture_name = "genomic_duplication" + validator_instance = GenomicDuplication + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) + + +@pytest.mark.asyncio +async def test_genomic_duplication_ambiguous( + all_fixtures, test_tokenizer, test_classifier, val_params +): + """Test that genomic duplication ambiguous validator works correctly""" + fixture_name = "genomic_duplication_ambiguous" + validator_instance = GenomicDuplicationAmbiguous + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) + + +@pytest.mark.asyncio +async def test_amplification(all_fixtures, test_tokenizer, test_classifier, val_params): + """Test that amplification validator works correctly""" + fixture_name = "amplification" + validator_instance = Amplification + await validator_checks( + all_fixtures, + test_tokenizer, + test_classifier, + val_params, + fixture_name, + validator_instance, + ) diff --git a/tests/to_copy_number_variation/test_amplification_to_cx_var.py b/tests/to_copy_number_variation/test_amplification_to_cx_var.py new file mode 100644 index 0000000..8f76374 --- /dev/null +++ b/tests/to_copy_number_variation/test_amplification_to_cx_var.py @@ -0,0 +1,89 @@ +"""Module for testing Amplification to Copy Number Change""" +import pytest +from ga4gh.vrs import models + + +@pytest.fixture(scope="module") +def kit_amplification(): + """Create test fixture for KIT amplification""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.wQv1KnYyhMd1aKoXFrOVzT3rMNvo0OIS", + "copyChange": "efo:0030072", + "location": { + "type": "SequenceLocation", + "id": "ga4gh:SL.5UgZnBz5pAVUWzNMyC1YJBeVnAA_DGUE", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.iy7Zfceb5_VGtTQzJ-v5JpPbpeifHD_V", + }, + "start": 55599320, + "end": 55599321, + }, + } + return models.CopyNumberChange(**params) + + +def test_amplification_to_cx_var( + test_cnv_handler, braf_amplification, prpf8_amplification, kit_amplification +): + """Test that amplification_to_cx_var method works correctly""" + # Using gene normalizer + resp = test_cnv_handler.amplification_to_cx_var(gene="braf") + assert resp.copy_number_change.model_dump( + exclude_none=True + ) == braf_amplification.model_dump(exclude_none=True) + assert resp.amplification_label == "BRAF Amplification" + assert resp.warnings == [] + + # Gene with > 1 sequence location + resp = test_cnv_handler.amplification_to_cx_var(gene="PRPF8") + assert resp.copy_number_change.model_dump( + exclude_none=True + ) == prpf8_amplification.model_dump(exclude_none=True) + assert resp.amplification_label == "PRPF8 Amplification" + assert resp.warnings == [] + + # Gene with no location. This should NOT return a variation + resp = test_cnv_handler.amplification_to_cx_var(gene="ifnr") + assert resp.copy_number_change is None + assert resp.amplification_label == "IFNR Amplification" + assert resp.warnings == [ + "gene-normalizer could not find a priority sequence " "location for gene: IFNR" + ] + + # Using sequence_id, start, end + resp = test_cnv_handler.amplification_to_cx_var( + gene="KIT", sequence_id="NC_000004.11", start=55599321, end=55599321 + ) + assert resp.copy_number_change.model_dump( + exclude_none=True + ) == kit_amplification.model_dump(exclude_none=True) + assert resp.amplification_label == "KIT Amplification" + assert resp.warnings == [] + + # Sequence_id not found in seqrepo + resp = test_cnv_handler.amplification_to_cx_var( + gene="BRAF", sequence_id="NC_000007", start=140453136, end=140453136 + ) + assert resp.copy_number_change is None + assert resp.amplification_label == "BRAF Amplification" + assert resp.warnings == [ + "SeqRepo unable to get translated identifiers for " "NC_000007" + ] + + # pos not on valid sequence_id + resp = test_cnv_handler.amplification_to_cx_var( + gene="braf", sequence_id="NC_000007.13", start=55599321, end=9955599321 + ) + assert resp.copy_number_change is None + assert resp.amplification_label == "BRAF Amplification" + assert resp.warnings == [ + "End inter-residue coordinate (9955599321) is out of index on NC_000007.13" + ] + + # invalid gene + resp = test_cnv_handler.amplification_to_cx_var(gene="invalid") + assert resp.copy_number_change is None + assert resp.amplification_label is None + assert resp.warnings == ["gene-normalizer returned no match for gene: invalid"] diff --git a/tests/to_copy_number_variation/test_hgvs_to_copy_number.py b/tests/to_copy_number_variation/test_hgvs_to_copy_number.py new file mode 100644 index 0000000..3e950cb --- /dev/null +++ b/tests/to_copy_number_variation/test_hgvs_to_copy_number.py @@ -0,0 +1,1364 @@ +"""Module for testing the hgvs to copy number count and copy number change endpoints""" +import copy + +import pytest +from ga4gh.vrs import models + +from tests.conftest import cnv_assertion_checks + + +@pytest.fixture(scope="module") +def genomic_dup1_cx_38(genomic_dup1_seq_loc_not_normalized): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.d8BWSLNKN0K4n8ySG0jWPCr4cJIqEf5g", + "location": genomic_dup1_seq_loc_not_normalized, + "copyChange": "efo:0030069", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_dup1_37_loc(): + """Create test fixture GRCh37 duplication location""" + return { + "id": "ga4gh:SL.ntKfbY4eZVFNOAMuZPb4RBRhINxvOmM9", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.VNBualIltAyi2AI_uXcKU7M9XUOuA7MS", + }, + "start": 49568694, + "end": 49568695, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_dup1_cn_37(genomic_dup1_37_loc): + """Create test fixture copy number count variation (not normalized)""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.-TsNBiFhHFcWLb7pz9GCWcJunlBmb_B4", + "location": genomic_dup1_37_loc, + "copies": 3, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_dup1_cx_37(genomic_dup1_37_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.inPVJ3ANsN-Xb22HakFf_BmMg73gZiFo", + "location": genomic_dup1_37_loc, + "copyChange": "efo:0030069", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_dup2_cx_38(genomic_dup2_seq_loc_normalized): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.uchcAvP6DXLirT7zASWHcWwmGWPLc8ye", + "location": genomic_dup2_seq_loc_normalized, + "copyChange": "efo:0030067", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_dup2_37_loc(): + """Create test fixture GRCh37 duplication location""" + return { + "id": "ga4gh:SL.g5_YcFgvTQSCuirJLMviwlue4NTb9EJ-", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.v7noePfnNpK8ghYXEqZ9NukMXW7YeNsm", + }, + "start": 33229406, + "end": 33229410, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_dup2_cn_37(genomic_dup2_37_loc): + """Create test fixture copy number count variation""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.jQdYqHKs7Y7BPQq5lBIddKd208iJrskw", + "location": genomic_dup2_37_loc, + "copies": 3, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_dup2_cx_37(genomic_dup2_37_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.6Nzma3vnsYb7ZqJ0XSiqv2F8-XFDKMyP", + "location": genomic_dup2_37_loc, + "copyChange": "efo:0030067", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_dup3_cx_38(genomic_del3_dup3_loc_not_normalized): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.3CEaG1qP1k9AU_aae_iIUx4uaTb72N1R", + "location": genomic_del3_dup3_loc_not_normalized, + "copyChange": "efo:0030072", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_dup3_37_loc(): + """Create test fixture GRCh37 duplication location""" + return { + "id": "ga4gh:SL.xDN-t4g0hLgYTKyh3_88Drln1HdishyF", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.v7noePfnNpK8ghYXEqZ9NukMXW7YeNsm", + }, + "start": [31078343, 31118467], + "end": [33292395, 33435268], + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_del3_dup3_cn_37(genomic_dup3_37_loc): + """Create test fixture copy number variation for del/dup 3 on GRCh37""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.TFwnv4Lv7f2ZvboyqSBygWbC57QzuQ8R", + "location": genomic_dup3_37_loc, + "copies": 2, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_dup3_cx_37(genomic_dup3_37_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.A9Z_PAgta0MwukOSjrntOneG9N66_FED", + "location": genomic_dup3_37_loc, + "copyChange": "efo:0030072", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_dup4_cn_38(genomic_dup4_loc): + """Create test fixture copy number count variation""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.KqbQewUgZYfmottbgn1xYq58DiPVU5SZ", + "location": genomic_dup4_loc, + "copies": 3, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_dup4_cx_38(genomic_dup4_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.X0nbb6qzkjqisoT8Ls_7-OS9KkYfTCCu", + "location": genomic_dup4_loc, + "copyChange": "efo:0030069", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_dup4_37_loc(): + """Create test fixture GRCh37 duplication location""" + return { + "id": "ga4gh:SL.WVXqGHNVaD96semkKQVfTuEo4TN-yMGE", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.iy_UbUrvECxFRX5LPTH_KPojdlT7BKsf", + }, + "start": [None, 29652251], + "end": [29981821, None], + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_dup4_cn_37(genomic_dup4_37_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.m19rHAtbxPm8ojn1pZm7Pq7e00WJkE75", + "location": genomic_dup4_37_loc, + "copies": 3, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_dup4_cx_37(genomic_dup4_37_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.5cVAesP5EKfh3uOTXe-BWyHdsFEvW7a0", + "location": genomic_dup4_37_loc, + "copyChange": "efo:0030069", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_dup5_cn_38(genomic_dup5_loc): + """Create test fixture copy number count variation""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.hTq6Z10Mg7rS_T_uoODhF24HmvRdEkLk", + "location": genomic_dup5_loc, + "copies": 4, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_dup5_cx_38(genomic_dup5_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.bR_i_ztx0rLrEbML1B_xoqJ50W39qlKN", + "location": genomic_dup5_loc, + "copyChange": "efo:0030067", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_dup5_37_loc(): + """Create test fixture GRCh37 duplication location""" + return { + "id": "ga4gh:SL.hS8rIdNliH9F4YuLlHhxLIdrHVVrCEXz", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.v7noePfnNpK8ghYXEqZ9NukMXW7YeNsm", + }, + "start": [None, 153287262], + "end": 153357667, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_dup5_cn_37(genomic_dup5_37_loc): + """Create test fixture copy number count variation""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.QdhteI3bI3qRFEZOk9pmICPsKFI6MCXd", + "location": genomic_dup5_37_loc, + "copies": 4, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_dup5_cx_37(genomic_dup5_37_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.PJWAoRYtFYcewmPtM5W2eG6TSef7gAyJ", + "location": genomic_dup5_37_loc, + "copyChange": "efo:0030067", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_dup6_cn_38(genomic_dup6_loc): + """Create test fixture copy number count variation""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.KSFn5KQIPuPVJ6FjWaF0vzl7eRwwHbX9", + "location": genomic_dup6_loc, + "copies": 2, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_dup6_cx_38(genomic_dup6_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.5irbnyk3aqsHCEbs2WAgiNpChn3asZgQ", + "location": genomic_dup6_loc, + "copyChange": "efo:0030064", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_dup6_37_loc(): + """Create test fixture GRCh37 duplication location""" + return { + "id": "ga4gh:SL.xoQ6AO6YVSUdlvcO5WtpXGJykEbMktY6", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.v7noePfnNpK8ghYXEqZ9NukMXW7YeNsm", + }, + "start": 153287262, + "end": [153357667, None], + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_dup6_cn_37(genomic_dup6_37_loc): + """Create test fixture copy number count variation""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.SijB_fXpFimGmPnWe2YHNcVv4NyEp9Uo", + "location": genomic_dup6_37_loc, + "copies": 2, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_dup6_cx_37(genomic_dup6_37_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.IkUmxq_kjLmo2mPv4eg9fgcd3B12tT4G", + "location": genomic_dup6_37_loc, + "copyChange": "efo:0030064", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del1_cx_38(genomic_del1_seq_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.LWRBNtBgcETMXEKezrr7WUPjO9WoOaqL", + "location": genomic_del1_seq_loc, + "copyChange": "efo:0030064", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del1_37_loc(): + """Create test fixture GRCh37 deletion location""" + return { + "id": "ga4gh:SL.zz9AUA6ANv3OPftr3dI-7GxGeaZKeADW", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.VNBualIltAyi2AI_uXcKU7M9XUOuA7MS", + }, + "start": 10191494, + "end": 10191495, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_del1_cn_37(genomic_del1_37_loc): + """Create test fixture copy number count variation""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN._n2CTBQdDMSObUf6DPdj5t7XLPAQ-Ojt", + "location": genomic_del1_37_loc, + "copies": 1, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_del1_cx_37(genomic_del1_37_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.KqiBf1gspLI7WQu-wVOFYrG1HhLyqFlV", + "location": genomic_del1_37_loc, + "copyChange": "efo:0030064", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del2_cx_38(genomic_del2_seq_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.ehFxhcx6VUozuQxTd4vskjpaUIptU0Qg", + "location": genomic_del2_seq_loc, + "copyChange": "efo:0030071", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del2_37_loc(): + """Create test fixture GRCh37 deletion location""" + return { + "id": "ga4gh:SL.BwPpPS55EYrek4Gs7xdd8sNjYJvcZRi7", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.VNBualIltAyi2AI_uXcKU7M9XUOuA7MS", + }, + "start": 10188278, + "end": 10188297, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_del2_cn_37(genomic_del2_37_loc): + """Create test fixture copy number count variation""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.ic8DuXVLz-cFut46rneGHbkACsEQg832", + "location": genomic_del2_37_loc, + "copies": 1, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_del2_cx_37(genomic_del2_37_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.0i-lR3bpBiuYAGC4D7UErkrrgrawJahz", + "location": genomic_del2_37_loc, + "copyChange": "efo:0030071", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del3_cx_38(genomic_del3_dup3_loc_not_normalized): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.-9fCLzZprnBM-nl08MJUo5oqZ4ehduv7", + "location": genomic_del3_dup3_loc_not_normalized, + "copyChange": "efo:0030069", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del3_37_loc(): + """Create test fixture GRCh37 deletion location""" + return { + "id": "ga4gh:SL.xDN-t4g0hLgYTKyh3_88Drln1HdishyF", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.v7noePfnNpK8ghYXEqZ9NukMXW7YeNsm", + }, + "start": [31078343, 31118467], + "end": [33292395, 33435268], + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_del3_cx_37(genomic_del3_37_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.uiJMtQrX_-w3-ehTTu7o2ByqatZYhYVu", + "location": genomic_del3_37_loc, + "copyChange": "efo:0030069", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del4_cn_38(genomic_del4_seq_loc): + """Create test fixture copy number count variation""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.2k3RnvQtBIIIqSqVlTT7Uh0KeuD8LIpK", + "location": genomic_del4_seq_loc, + "copies": 4, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_del4_cx_38(genomic_del4_seq_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.1DiUzraiKZLJb8oF8ynARS816fthsJpV", + "location": genomic_del4_seq_loc, + "copyChange": "efo:0030067", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del4_37_loc(): + """Create test fixture GRCh37 deletion location""" + return { + "id": "ga4gh:SL.2ORImLGRcezhWXDYI9sZvLwFGPDH9WKS", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.v7noePfnNpK8ghYXEqZ9NukMXW7YeNsm", + }, + "start": [None, 31138612], + "end": [33357594, None], + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_del4_cn_37(genomic_del4_37_loc): + """Create test fixture copy number count variation""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.Mv42RTfmPVQV4nSoytbDbK4cfsUO0wyD", + "location": genomic_del4_37_loc, + "copies": 4, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_del4_cx_37(genomic_del4_37_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.CJdXMZXSEE4hCIwjGxke4EWY7lMENYPj", + "location": genomic_del4_37_loc, + "copyChange": "efo:0030067", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del5_cn_38(genomic_del5_seq_loc): + """Create test fixture copy number count variation""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.VIYxNpNj8c5ulSfr8mFkxTBCGMunoUIN", + "location": genomic_del5_seq_loc, + "copies": 2, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_del5_cx_38(genomic_del5_seq_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.w-H5jcMK0AijJomgeu_LhN5IQ_0Z0GIi", + "location": genomic_del5_seq_loc, + "copyChange": "efo:0030064", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del5_37_loc(): + """Create test fixture GRCh37 deletion location""" + return { + "id": "ga4gh:SL.xYPrcJoKhU-xWyeBB4-DzYmFYOEynMBy", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.v7noePfnNpK8ghYXEqZ9NukMXW7YeNsm", + }, + "start": [None, 18593473], + "end": 18671749, + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_del5_cn_37(genomic_del5_37_loc): + """Create test fixture copy number count variation""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.9qwSqDhzXCHPSWycZ-qePwwEXrCpQ02A", + "location": genomic_del5_37_loc, + "copies": 2, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_del5_cx_37(genomic_del5_37_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.ICA0LcW-0gaOgJziPZJcGdXQ_MR3OiDu", + "location": genomic_del5_37_loc, + "copyChange": "efo:0030064", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del6_cn_38(genomic_del6_seq_loc): + """Create test fixture copy number count variation""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.CZEc44pX7Dh9yJARvvz6EW9oQvgkbwYf", + "location": genomic_del6_seq_loc, + "copies": 1, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_del6_cx_38(genomic_del6_seq_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.02f8o7Gz9y11bMOWl7Vacc3V5J5M82bH", + "location": genomic_del6_seq_loc, + "copyChange": "efo:0030071", + } + return models.CopyNumberChange(**params) + + +@pytest.fixture(scope="module") +def genomic_del6_37_loc(): + """Create test fixture GRCh37 deletion location""" + return { + "id": "ga4gh:SL.nENEk628TbRKyB9H9n12ssAQZnpwJDUo", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.KqaUhJMW3CDjhoVtBetdEKT1n6hM-7Ek", + }, + "start": 133783901, + "end": [133785996, None], + "type": "SequenceLocation", + } + + +@pytest.fixture(scope="module") +def genomic_del6_cn_37(genomic_del6_37_loc): + """Create test fixture copy number count variation""" + params = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.dPP5zGqiwSPNnuj49POIaul6_3msLwgF", + "location": genomic_del6_37_loc, + "copies": 1, + } + return models.CopyNumberCount(**params) + + +@pytest.fixture(scope="module") +def genomic_del6_cx_37(genomic_del6_37_loc): + """Create test fixture copy number change variation""" + params = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.2YOJ93--D5aER5HsmnM4JP8GLWCcKKN-", + "location": genomic_del6_37_loc, + "copyChange": "efo:0030071", + } + return models.CopyNumberChange(**params) + + +@pytest.mark.asyncio +async def test_genomic_dup1_copy_number_count( + test_cnv_handler, genomic_dup1_38_cn, genomic_dup1_cn_37 +): + """Test that genomic duplication works correctly""" + q = "NC_000003.12:g.49531262dup" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, + baseline_copies=2, + do_liftover=False, + ) + cnv_assertion_checks(resp, genomic_dup1_38_cn) + + q = "NC_000003.11:g.49568695dup" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup1_cn_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=True + ) + cnv_assertion_checks(resp, genomic_dup1_38_cn) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=1, do_liftover=True + ) + expected = copy.deepcopy(genomic_dup1_38_cn) + expected.copies = 2 + expected.id = "ga4gh:CN.BvXjVTvckz3zAp8yATcQjLp-1sWyEO9d" + cnv_assertion_checks(resp, expected) + + +@pytest.mark.asyncio +async def test_genomic_dup1_copy_number_change( + test_cnv_handler, genomic_dup1_cx_38, genomic_dup1_cx_37 +): + """Test that genomic duplication works correctly""" + q = "NC_000003.12:g.49531262dup" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030069", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup1_cx_38) + + q = "NC_000003.11:g.49568695dup" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030069", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup1_cx_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030069", do_liftover=True + ) + cnv_assertion_checks(resp, genomic_dup1_cx_38) + + +@pytest.mark.asyncio +async def test_genomic_dup2_copy_number_count( + test_cnv_handler, genomic_dup2_38_cn, genomic_dup2_cn_37 +): + """Test that genomic duplication works correctly""" + q = "NC_000023.11:g.33211290_33211293dup" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup2_38_cn) + + q = "NC_000023.10:g.33229407_33229410dup" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup2_cn_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=True + ) + cnv_assertion_checks(resp, genomic_dup2_38_cn) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=1, do_liftover=True + ) + expected = copy.deepcopy(genomic_dup2_38_cn) + expected.copies = 2 + expected.id = "ga4gh:CN.40RjBTowl-97BT5vsPUgqdLJKNvL583c" + cnv_assertion_checks(resp, expected) + + +@pytest.mark.asyncio +async def test_genomic_dup2_copy_number_change( + test_cnv_handler, genomic_dup2_cx_38, genomic_dup2_cx_37 +): + """Test that genomic duplication works correctly""" + q = "NC_000023.11:g.33211290_33211293dup" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030067", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup2_cx_38) + + q = "NC_000023.10:g.33229407_33229410dup" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030067", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup2_cx_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030067", do_liftover=True + ) + cnv_assertion_checks(resp, genomic_dup2_cx_38) + + +@pytest.mark.asyncio +async def test_genomic_dup3_copy_number_count( + test_cnv_handler, genomic_del3_dup3_cn_38, genomic_del3_dup3_cn_37 +): + """Test that genomic duplication works correctly""" + q = "NC_000023.11:g.(31060227_31100351)_(33274278_33417151)dup" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=1, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del3_dup3_cn_38) + + q = "NC_000023.10:g.(31078344_31118468)_(33292395_33435268)dup" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=1, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del3_dup3_cn_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=1, do_liftover=True + ) + cnv_assertion_checks(resp, genomic_del3_dup3_cn_38) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=True + ) + expected = copy.deepcopy(genomic_del3_dup3_cn_38) + expected.copies = 3 + expected.id = "ga4gh:CN.k_3m5Hu3_J5Mb8Rx8zH0plZ12U0XD1Du" + cnv_assertion_checks(resp, expected) + + +@pytest.mark.asyncio +async def test_genomic_dup3_copy_number_change( + test_cnv_handler, genomic_dup3_cx_38, genomic_dup3_cx_37 +): + """Test that genomic duplication works correctly""" + q = "NC_000023.11:g.(31060227_31100351)_(33274278_33417151)dup" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030072", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup3_cx_38) + + q = "NC_000023.10:g.(31078344_31118468)_(33292395_33435268)dup" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030072", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup3_cx_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030072", do_liftover=True + ) + cnv_assertion_checks(resp, genomic_dup3_cx_38) + + +@pytest.mark.asyncio +async def test_genomic_dup4_copy_number_count( + test_cnv_handler, genomic_dup4_cn_38, genomic_dup4_cn_37 +): + """Test that genomic duplication works correctly""" + q = "NC_000020.11:g.(?_30417576)_(31394018_?)dup" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup4_cn_38) + + q = "NC_000020.10:g.(?_29652252)_(29981821_?)dup" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup4_cn_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=True + ) + cnv_assertion_checks(resp, genomic_dup4_cn_38) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=1, do_liftover=True + ) + expected = copy.deepcopy(genomic_dup4_cn_38) + expected.copies = 2 + expected.id = "ga4gh:CN.214AwcxGQiu1rY8UYQpud23sQI5DJbm1" + cnv_assertion_checks(resp, expected) + + +@pytest.mark.asyncio +async def test_genomic_dup4_copy_number_change( + test_cnv_handler, genomic_dup4_cx_38, genomic_dup4_cx_37 +): + """Test that genomic duplication works correctly""" + q = "NC_000020.11:g.(?_30417576)_(31394018_?)dup" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030069", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup4_cx_38) + + q = "NC_000020.10:g.(?_29652252)_(29981821_?)dup" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030069", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup4_cx_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030069", do_liftover=True + ) + cnv_assertion_checks(resp, genomic_dup4_cx_38) + + +@pytest.mark.asyncio +async def test_genomic_dup5_copy_number_count( + test_cnv_handler, genomic_dup5_cn_38, genomic_dup5_cn_37 +): + """Test that genomic duplication works correctly""" + q = "NC_000023.11:g.(?_154021812)_154092209dup" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=3, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup5_cn_38) + + q = "NC_000023.10:g.(?_153287263)_153357667dup" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=3, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup5_cn_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=3, do_liftover=True + ) + cnv_assertion_checks(resp, genomic_dup5_cn_38) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=4, do_liftover=True + ) + expected = copy.deepcopy(genomic_dup5_cn_38) + expected.copies = 5 + expected.id = "ga4gh:CN.XvDElgE55k4blDLkzlDhwh5xcGyZrucn" + cnv_assertion_checks(resp, expected) + + +@pytest.mark.asyncio +async def test_genomic_dup5_copy_number_change( + test_cnv_handler, genomic_dup5_cx_38, genomic_dup5_cx_37 +): + """Test that genomic duplication works correctly""" + q = "NC_000023.11:g.(?_154021812)_154092209dup" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030067", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup5_cx_38) + + q = "NC_000023.10:g.(?_153287263)_153357667dup" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030067", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup5_cx_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030067", do_liftover=True + ) + cnv_assertion_checks(resp, genomic_dup5_cx_38) + + +@pytest.mark.asyncio +async def test_genomic_dup6_copy_number_count( + test_cnv_handler, genomic_dup6_cn_38, genomic_dup6_cn_37 +): + """Test that genomic duplication works correctly""" + q = "NC_000023.11:g.154021812_(154092209_?)dup" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=1, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup6_cn_38) + + q = "NC_000023.10:g.153287263_(153357667_?)dup" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=1, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup6_cn_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=1, do_liftover=True + ) + cnv_assertion_checks(resp, genomic_dup6_cn_38) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=True + ) + expected = copy.deepcopy(genomic_dup6_cn_38) + expected.copies = 3 + expected.id = "ga4gh:CN.px3rDGLGlOmJLGKBoojl1UrFKu6Rhb1P" + cnv_assertion_checks(resp, expected) + + +@pytest.mark.asyncio +async def test_genomic_dup6_copy_number_change( + test_cnv_handler, genomic_dup6_cx_38, genomic_dup6_cx_37 +): + """Test that genomic duplication works correctly""" + q = "NC_000023.11:g.154021812_(154092209_?)dup" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030064", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup6_cx_38) + + q = "NC_000023.10:g.153287263_(153357667_?)dup" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030064", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_dup6_cx_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030064", do_liftover=True + ) + cnv_assertion_checks(resp, genomic_dup6_cx_38) + + +@pytest.mark.asyncio +async def test_genomic_del1_copy_number_count( + test_cnv_handler, genomic_del1_38_cn, genomic_del1_cn_37 +): + """Test that genomic deletion works correctly""" + q = "NC_000003.12:g.10149811del" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del1_38_cn) + + q = "NC_000003.11:g.10191495del" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del1_cn_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=True + ) + cnv_assertion_checks(resp, genomic_del1_38_cn) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=3, do_liftover=True + ) + expected = copy.deepcopy(genomic_del1_38_cn) + expected.copies = 2 + expected.id = "ga4gh:CN.S0viz7yEPYtrHIBxBLlCKnIc9x8FspQp" + cnv_assertion_checks(resp, expected) + + +@pytest.mark.asyncio +async def test_genomic_del1_copy_number_change( + test_cnv_handler, genomic_del1_cx_38, genomic_del1_cx_37 +): + """Test that genomic deletion works correctly""" + q = "NC_000003.12:g.10149811del" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030064", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del1_cx_38) + + q = "NC_000003.11:g.10191495del" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030064", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del1_cx_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030064", do_liftover=True + ) + cnv_assertion_checks(resp, genomic_del1_cx_38) + + +@pytest.mark.asyncio +async def test_genomic_del2_copy_number_count( + test_cnv_handler, genomic_del2_38_cn, genomic_del2_cn_37 +): + """Test that genomic deletion works correctly""" + q = "NC_000003.12:g.10146595_10146613del" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del2_38_cn) + + q = "NC_000003.11:g.10188279_10188297del" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del2_cn_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=True + ) + cnv_assertion_checks(resp, genomic_del2_38_cn) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=4, do_liftover=True + ) + expected = copy.deepcopy(genomic_del2_38_cn) + expected.copies = 3 + expected.id = "ga4gh:CN.d8thjAuvDC3acnDwFnz2Mg6PEMipVNJk" + cnv_assertion_checks(resp, expected) + + +@pytest.mark.asyncio +async def test_genomic_del2_copy_number_change( + test_cnv_handler, genomic_del2_cx_38, genomic_del2_cx_37 +): + """Test that genomic deletion works correctly""" + q = "NC_000003.12:g.10146595_10146613del" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030071", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del2_cx_38) + + q = "NC_000003.11:g.10188279_10188297del" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030071", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del2_cx_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030071", do_liftover=True + ) + cnv_assertion_checks(resp, genomic_del2_cx_38) + + +@pytest.mark.asyncio +async def test_genomic_del3_copy_number_count( + test_cnv_handler, genomic_del3_dup3_cn_38, genomic_del3_dup3_cn_37 +): + """Test that genomic deletion works correctly""" + q = "NC_000023.11:g.(31060227_31100351)_(33274278_33417151)del" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=3, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del3_dup3_cn_38) + + q = "NC_000023.10:g.(31078344_31118468)_(33292395_33435268)del" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=3, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del3_dup3_cn_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=3, do_liftover=True + ) + cnv_assertion_checks(resp, genomic_del3_dup3_cn_38) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=True + ) + expected = copy.deepcopy(genomic_del3_dup3_cn_38) + expected.copies = 1 + expected.id = "ga4gh:CN.vqcaYwbK8oewMEtKbP2pn5npQtkOAGdo" + cnv_assertion_checks(resp, expected) + + +@pytest.mark.asyncio +async def test_genomic_del3_copy_number_change( + test_cnv_handler, genomic_del3_cx_38, genomic_del3_cx_37 +): + """Test that genomic deletion works correctly""" + q = "NC_000023.11:g.(31060227_31100351)_(33274278_33417151)del" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030069", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del3_cx_38) + + q = "NC_000023.10:g.(31078344_31118468)_(33292395_33435268)del" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030069", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del3_cx_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030069", do_liftover=True + ) + cnv_assertion_checks(resp, genomic_del3_cx_38) + + +@pytest.mark.asyncio +async def test_genomic_del4_copy_number_count( + test_cnv_handler, genomic_del4_cn_38, genomic_del4_cn_37 +): + """Test that genomic deletion works correctly""" + q = "NC_000023.11:g.(?_31120496)_(33339477_?)del" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=5, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del4_cn_38) + + q = "NC_000023.10:g.(?_31138613)_(33357594_?)del" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=5, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del4_cn_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=5, do_liftover=True + ) + cnv_assertion_checks(resp, genomic_del4_cn_38) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=3, do_liftover=True + ) + expected = copy.deepcopy(genomic_del4_cn_38) + expected.copies = 2 + expected.id = "ga4gh:CN.9qA8nLBcResblQhR0xfz16vPHP-tYXIA" + cnv_assertion_checks(resp, expected) + + +@pytest.mark.asyncio +async def test_genomic_del4_copy_number_change( + test_cnv_handler, genomic_del4_cx_38, genomic_del4_cx_37 +): + """Test that genomic deletion works correctly""" + q = "NC_000023.11:g.(?_31120496)_(33339477_?)del" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030067", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del4_cx_38) + + q = "NC_000023.10:g.(?_31138613)_(33357594_?)del" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030067", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del4_cx_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030067", do_liftover=True + ) + cnv_assertion_checks(resp, genomic_del4_cx_38) + + +@pytest.mark.asyncio +async def test_genomic_del5_copy_number_count( + test_cnv_handler, genomic_del5_cn_38, genomic_del5_cn_37 +): + """Test that genomic deletion works correctly""" + q = "NC_000023.11:g.(?_18575354)_18653629del" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=3, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del5_cn_38) + + q = "NC_000023.10:g.(?_18593474)_18671749del" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=3, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del5_cn_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=3, do_liftover=True + ) + cnv_assertion_checks(resp, genomic_del5_cn_38) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=True + ) + expected = copy.deepcopy(genomic_del5_cn_38) + expected.copies = 1 + expected.id = "ga4gh:CN.7tln8gxFt8FfLrJH7XlcX7PVSQCQqnNZ" + cnv_assertion_checks(resp, expected) + + +@pytest.mark.asyncio +async def test_genomic_del5_copy_number_change( + test_cnv_handler, genomic_del5_cx_38, genomic_del5_cx_37 +): + """Test that genomic deletion works correctly""" + q = "NC_000023.11:g.(?_18575354)_18653629del" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030064", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del5_cx_38) + + q = "NC_000023.10:g.(?_18593474)_18671749del" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030064", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del5_cx_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030064", do_liftover=True + ) + cnv_assertion_checks(resp, genomic_del5_cx_38) + + +@pytest.mark.asyncio +async def test_genomic_del6_copy_number_count( + test_cnv_handler, genomic_del6_cn_38, genomic_del6_cn_37 +): + """Test that genomic deletion works correctly""" + q = "NC_000006.12:g.133462764_(133464858_?)del" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del6_cn_38) + + q = "NC_000006.11:g.133783902_(133785996_?)del" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del6_cn_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=2, do_liftover=True + ) + cnv_assertion_checks(resp, genomic_del6_cn_38) + + resp = await test_cnv_handler.hgvs_to_copy_number_count( + q, baseline_copies=3, do_liftover=True + ) + expected = copy.deepcopy(genomic_del6_cn_38) + expected.copies = 2 + expected.id = "ga4gh:CN.ydRovIJjiYNRUGv0w3iJ-pgK08MuYqGR" + cnv_assertion_checks(resp, expected) + + +@pytest.mark.asyncio +async def test_genomic_del6_copy_number_change( + test_cnv_handler, genomic_del6_cx_38, genomic_del6_cx_37 +): + """Test that genomic deletion works correctly""" + q = "NC_000006.12:g.133462764_(133464858_?)del" # 38 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030071", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del6_cx_38) + + q = "NC_000006.11:g.133783902_(133785996_?)del" # 37 + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030071", do_liftover=False + ) + cnv_assertion_checks(resp, genomic_del6_cx_37) + + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030071", do_liftover=True + ) + cnv_assertion_checks(resp, genomic_del6_cx_38) + + +@pytest.mark.asyncio +async def test_invalid_cnv(test_cnv_handler): + """Check that invalid input return warnings""" + q = "DAG1 g.49568695dup" + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, + copy_change="efo:0030071", + do_liftover=True, + ) + assert set(resp.warnings) == { + "DAG1 g.49568695dup is not a supported HGVS genomic duplication or deletion" + } + assert resp.copy_number_change is None + + q = "braf V600E" + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030071", do_liftover=True + ) + assert set(resp.warnings) == { + "braf V600E is not a supported HGVS genomic duplication or deletion" + } + assert resp.copy_number_change is None + + # Not yet supported + for q in [ + "NC_000018.9:g.(48556994_48573289)_48573471dup", + "NC_000018.9:g.48556994_(48573289_48573471)dup", + ]: + resp = await test_cnv_handler.hgvs_to_copy_number_change( + q, copy_change="efo:0030070" + ) + assert resp.warnings == [f"Unable to find classification for: {q}"], q + assert resp.copy_number_change is None, q diff --git a/tests/to_copy_number_variation/test_parsed_to_copy_number.py b/tests/to_copy_number_variation/test_parsed_to_copy_number.py new file mode 100644 index 0000000..d147b94 --- /dev/null +++ b/tests/to_copy_number_variation/test_parsed_to_copy_number.py @@ -0,0 +1,1056 @@ +"""Test that parsed_to_copy_number works correctly""" +from copy import deepcopy + +import pytest +from ga4gh.vrs import models +from pydantic import ValidationError + +from tests.conftest import cnv_assertion_checks +from variation.schemas.copy_number_schema import ( + ClinVarAssembly, + Comparator, + ParsedPosType, + ParsedToCnVarQuery, + ParsedToCxVarQuery, +) +from variation.to_copy_number_variation import ToCopyNumberError + + +@pytest.fixture(scope="module") +def cn_gain1(): + """Create test fixture for clinvar copy number gain. + https://www.ncbi.nlm.nih.gov/clinvar/variation/145208/?new_evidence=true + """ + variation = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.Qrs0TaGCcJiibMvhcML6BTSCVtX95FBl", + "location": { + "type": "SequenceLocation", + "id": "ga4gh:SL.g6xj5oKF99OysSxcfHyGYbh8NFNn2r61", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU", + }, + "start": [None, 143134062], + "end": [143284670, None], + }, + "copies": 3, + } + return models.CopyNumberCount(**variation) + + +@pytest.fixture(scope="module") +def cn_gain2(): + """Create test fixture for clinvar copy number gain. + https://www.ncbi.nlm.nih.gov/clinvar/variation/146181/?new_evidence=true + """ + variation = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.-SqT6JTz0WpKfGQjdHnuJnyK8YMcAmez", + "location": { + "type": "SequenceLocation", + "id": "ga4gh:SL.2f5wWnJ52UShqq0wRTahKWH-YFCFTixG", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.AsXvWL1-2i5U_buw6_niVIxD6zTbAuS6", + }, + "start": [None, 31738808], + "end": [32217725, None], + }, + "copies": 2, + } + return models.CopyNumberCount(**variation) + + +@pytest.fixture(scope="module") +def cn_gain2_37(): + """Create test fixture for clinvar copy number gain on GRCh37 assembly. + https://www.ncbi.nlm.nih.gov/clinvar/variation/146181/?new_evidence=true + """ + variation = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.gtS4om__GNKDFZxdtno7Cwiv_8Tv0_As", + "location": { + "type": "SequenceLocation", + "id": "ga4gh:SL.knW3_K9Kj2bvgGvnW3uorAEhZ9lnBD4F", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.zIMZb3Ft7RdWa5XYq0PxIlezLY2ccCgt", + }, + "start": [None, 32031011], + "end": [32509926, None], + }, + "copies": 2, + } + return models.CopyNumberCount(**variation) + + +@pytest.fixture(scope="module") +def cn_loss1(): + """Create test fixture for clinvar copy number loss. + https://www.ncbi.nlm.nih.gov/clinvar/variation/146181/?new_evidence=true + """ + variation = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.ckk73c3GG4x-P0uL5Iv1tzBPxYea1V03", + "location": { + "type": "SequenceLocation", + "id": "ga4gh:SL.S_YZii49zAuWk8hA71OD5Ud1mtQRpw5T", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.v7noePfnNpK8ghYXEqZ9NukMXW7YeNsm", + }, + "start": [None, 10491131], + "end": [10535643, None], + }, + "copies": 1, + } + return models.CopyNumberCount(**variation) + + +@pytest.fixture(scope="module") +def cn_loss2(): + """Create test fixture for clinvar copy number loss. + https://www.ncbi.nlm.nih.gov/clinvar/variation/148425/?new_evidence=true + """ + variation = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.6vQIGMEa94FNmLBwQLHRTHaf_yrjMnBz", + "location": { + "type": "SequenceLocation", + "id": "ga4gh:SL.xeLChE7XHqLtLjrVEBnHpxvtdWjRA0Aw", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + }, + "start": [None, 10000], + "end": [1223133, None], + }, + "copies": 0, + } + return models.CopyNumberCount(**variation) + + +@pytest.fixture(scope="module") +def cn_definite_number(): + """Create test fixture for copy number count using definite range for start and + number for end + """ + variation = { + "type": "CopyNumberCount", + "id": "ga4gh:CN.8xU8MDlKeD7kyhiVJmSwvi3iV78ReIc5", + "location": { + "type": "SequenceLocation", + "id": "ga4gh:SL.NqQ42igO9R3BBbA7q7jQ_81rfZOjpzGg", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU", + }, + "start": [143134062, 143134064], + "end": 143284670, + }, + "copies": 3, + } + return models.CopyNumberCount(**variation) + + +@pytest.fixture(scope="module") +def cx_numbers(): + """Create test fixture for copy number change using numbers for start and end""" + variation = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.BTNwndSs3RylLhtL9Y45GePsVX35eeTT", + "location": { + "type": "SequenceLocation", + "id": "ga4gh:SL.Pu3oAKHColJSZ3zY_Xu5MeezINaTFlNq", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + }, + "start": 10000, + "end": 1223133, + }, + "copyChange": "efo:0030069", + } + return models.CopyNumberChange(**variation) + + +@pytest.fixture(scope="module") +def cx_definite_ranges(): + """Create test fixture for copy number change using definite ranges for start and + end + """ + variation = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.gn7z-74PrlvMWAVK7jsP9oYnp0pCezee", + "location": { + "type": "SequenceLocation", + "id": "ga4gh:SL.jOyDc0XwpyvY-SqxowgWxb7N5ODEYc4I", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + }, + "start": [10000, 10005], + "end": [1223130, 1223133], + }, + "copyChange": "efo:0030069", + } + return models.CopyNumberChange(**variation) + + +@pytest.fixture(scope="module") +def cx_indefinite_ranges(): + """Create test fixture for copy number change using indefinite ranges for start and + end + """ + variation = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.2lbeFEsxiN9sdMRtqiaYaM0HPy2UJWEC", + "location": { + "type": "SequenceLocation", + "id": "ga4gh:SL.-XBGqDkrHG7D19jCYuvNfEeJoBSBEHFA", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + }, + "start": [None, 10000], + "end": [1223130, None], + }, + "copyChange": "efo:0030069", + } + return models.CopyNumberChange(**variation) + + +@pytest.fixture(scope="module") +def cx_number_indefinite(): + """Create test fixture for copy number change using number for start and indefinite + range for end + """ + variation = { + "type": "CopyNumberChange", + "id": "ga4gh:CX.38jEUd5AhCbQk9hB36hS6mEqRKlY7ugj", + "location": { + "type": "SequenceLocation", + "id": "ga4gh:SL.XuivAIjbPg8CLUUz7TZXO6mJWfAfU6HJ", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + }, + "start": 10000, + "end": [1223130, None], + }, + "copyChange": "efo:0030069", + } + return models.CopyNumberChange(**variation) + + +def test_get_parsed_ac(test_cnv_handler): + """Test that _get_parsed_ac works correctly""" + for assembly in [ClinVarAssembly.GRCH37, ClinVarAssembly.HG19]: + resp = test_cnv_handler._get_parsed_ac(assembly, "chr7", use_grch38=False) + assert resp.lifted_over is False + assert resp.accession == "ga4gh:SQ.IW78mgV5Cqf6M24hy52hPjyyo5tCCd86" + + resp = test_cnv_handler._get_parsed_ac(assembly, "chr7", use_grch38=True) + assert resp.lifted_over is True + assert resp.accession == "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul" + + for assembly in [ClinVarAssembly.GRCH38, ClinVarAssembly.HG38]: + resp = test_cnv_handler._get_parsed_ac(assembly, "chr7", use_grch38=False) + assert resp.lifted_over is False + assert resp.accession == "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul" + + resp = test_cnv_handler._get_parsed_ac(assembly, "chr7", use_grch38=True) + assert resp.lifted_over is False + assert resp.accession == "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul" + + with pytest.raises(ToCopyNumberError) as e: + test_cnv_handler._get_parsed_ac( + ClinVarAssembly.NCBI36, "chr7", use_grch38=False + ) + assert str(e.value) == "NCBI36 assembly is not currently supported" + + with pytest.raises(ToCopyNumberError) as e: + test_cnv_handler._get_parsed_ac(ClinVarAssembly.HG18, "chr7", use_grch38=False) + assert str(e.value) == "hg18 assembly is not currently supported" + + +def test_get_parsed_ac_chr(test_cnv_handler): + """Test that _get_parsed_ac_chr works correctly""" + resp = test_cnv_handler._get_parsed_ac_chr("NC_000007.13", False) + assert resp.accession == "ga4gh:SQ.IW78mgV5Cqf6M24hy52hPjyyo5tCCd86" + assert resp.chromosome == "chr7" + assert resp.lifted_over is False + + resp = test_cnv_handler._get_parsed_ac_chr("NC_000007.13", True) + assert resp.accession == "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul" + assert resp.chromosome == "chr7" + assert resp.lifted_over is True + + for do_liftover in [True, False]: + resp = test_cnv_handler._get_parsed_ac_chr("NC_000007.14", do_liftover) + assert resp.accession == "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul" + assert resp.chromosome == "chr7" + assert resp.lifted_over is False + + # if genomic ac not provided + with pytest.raises(ToCopyNumberError) as e: + test_cnv_handler._get_parsed_ac_chr("NP_000542.1", False) + assert str(e.value) == "Not a supported genomic accession: NP_000542.1" + + # invalid accession + with pytest.raises(ToCopyNumberError) as e: + test_cnv_handler._get_parsed_ac_chr("NC_00000713", False) + assert ( + str(e.value) == "SeqRepo unable to get translated identifiers for NC_00000713" + ) # noqa: E501 + + +def test_validate_pos(test_cnv_handler): + """Test that _validate_ac_pos works correctly""" + resp = test_cnv_handler._validate_ac_pos("NC_000007.14", 140753336) + assert resp is None + + # invalid accession + with pytest.raises(ToCopyNumberError) as e: + test_cnv_handler._validate_ac_pos("NC_00000714", 140753336) + assert str(e.value) == "Accession not found in SeqRepo: NC_00000714" + + # invalid pos + with pytest.raises(ToCopyNumberError) as e: + test_cnv_handler._validate_ac_pos("NC_000007.14", 159345975) + assert str(e.value) == "Position (159345975) is not valid on NC_000007.14" + + # invalid pos + with pytest.raises(ToCopyNumberError) as e: + test_cnv_handler._validate_ac_pos("NC_000007.14", 99999999999) + assert str(e.value) == "SeqRepo ValueError: Position out of range (99999999998)" + + +def test_get_vrs_loc_start_or_end(test_cnv_handler): + """Test that _get_vrs_loc_start_or_end works correctly""" + ac = "NC_000007.14" + pos0 = 140753336 + pos1 = 140753350 + + # Number start + resp = test_cnv_handler._get_vrs_loc_start_or_end( + ac, pos0, ParsedPosType.NUMBER, is_start=True + ) + assert resp == 140753335 + + # Number end + resp = test_cnv_handler._get_vrs_loc_start_or_end( + ac, pos0, ParsedPosType.NUMBER, is_start=False + ) + assert resp == 140753336 + + # Definite Range start + resp = test_cnv_handler._get_vrs_loc_start_or_end( + ac, pos0, ParsedPosType.DEFINITE_RANGE, is_start=True, pos1=pos1 + ) + assert resp == models.Range([140753335, 140753349]) + + # Definite Range end + resp = test_cnv_handler._get_vrs_loc_start_or_end( + ac, pos0, ParsedPosType.DEFINITE_RANGE, is_start=False, pos1=pos1 + ) + assert resp == models.Range([pos0, pos1]) + + # Indefinite Range start + resp = test_cnv_handler._get_vrs_loc_start_or_end( + ac, + pos0, + ParsedPosType.INDEFINITE_RANGE, + is_start=True, + comparator=Comparator.LT_OR_EQUAL, + ) + assert resp == models.Range([None, 140753335]) + + # Indefinite Range end + resp = test_cnv_handler._get_vrs_loc_start_or_end( + ac, + pos0, + ParsedPosType.INDEFINITE_RANGE, + is_start=False, + comparator=Comparator.GT_OR_EQUAL, + ) + assert resp == models.Range([140753336, None]) + + +def test_liftover_pos(test_cnv_handler): + """Test that _liftover_pos works correctly""" + resp = test_cnv_handler._liftover_pos("chr7", 140453136, 140453137, None, None) + assert resp == { + "start0": 140753336, + "end0": 140753337, + "start1": None, + "end1": None, + } + + resp = test_cnv_handler._liftover_pos( + "chr7", 140453136, 140453137, 140453138, 140453139 + ) + assert resp == { + "start0": 140753336, + "end0": 140753337, + "start1": 140753338, + "end1": 140753339, + } + + # invalid pos + with pytest.raises(ToCopyNumberError) as e: + test_cnv_handler._liftover_pos("chr7", 159345975, 159345976, None, None) + assert str(e.value) == "Unable to liftover: chr7 with pos 159345975" + + +def test_parsed_copy_number_gain(test_cnv_handler, cn_gain1, cn_gain2, cn_gain2_37): + """Test that parsed_to_copy_number works for parsed copy number gain queries""" + # https://www.ncbi.nlm.nih.gov/clinvar/variation/145208/?new_evidence=true + rb = ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + assembly=ClinVarAssembly.GRCH37, + chromosome="chr1", + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_gain1) + + rb = ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + assembly=ClinVarAssembly.HG19, + chromosome="chr1", + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_gain1) + + rb = ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + accession="NC_000001.10", + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_gain1) + + # https://www.ncbi.nlm.nih.gov/clinvar/variation/146181/?new_evidence=true + # 38 + rb = ParsedToCnVarQuery( + start0=31738809, + end0=32217725, + copies0=2, + assembly=ClinVarAssembly.GRCH38, + chromosome="chr15", + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_gain2) + + # 38 with liftover (shouldnt do anything) + rb = ParsedToCnVarQuery( + start0=31738809, + end0=32217725, + copies0=2, + assembly=ClinVarAssembly.GRCH38, + chromosome="chr15", + do_liftover=True, + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_gain2) + + # 38 with liftover (shouldnt do anything) + rb = ParsedToCnVarQuery( + start0=31738809, + end0=32217725, + copies0=2, + assembly=ClinVarAssembly.HG38, + chromosome="chr15", + do_liftover=True, + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_gain2) + + # 38 + rb = ParsedToCnVarQuery( + start0=31738809, + end0=32217725, + copies0=2, + assembly=ClinVarAssembly.HG38, + chromosome="chr15", + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_gain2) + + # 38 accession + rb = ParsedToCnVarQuery( + start0=31738809, + end0=32217725, + copies0=2, + accession="NC_000015.10", + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_gain2) + + # 38 accession with liftover (shouldnt do anything) + rb = ParsedToCnVarQuery( + start0=31738809, + end0=32217725, + copies0=2, + accession="NC_000015.10", + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_gain2) + + # 37 with liftover + rb = ParsedToCnVarQuery( + start0=32031012, + end0=32509926, + copies0=2, + accession="NC_000015.9", + do_liftover=True, + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_gain2) + + # 37 chr+accession with liftover + rb = ParsedToCnVarQuery( + start0=32031012, + end0=32509926, + copies0=2, + chromosome="chr15", + assembly=ClinVarAssembly.GRCH37, + do_liftover=True, + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_gain2) + + # 37 with no liftover + rb = ParsedToCnVarQuery( + start0=32031012, + end0=32509926, + copies0=2, + accession="NC_000015.9", + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_gain2_37) + + # 37 chr+accession with no liftover + rb = ParsedToCnVarQuery( + start0=32031012, + end0=32509926, + copies0=2, + chromosome="chr15", + assembly=ClinVarAssembly.GRCH37, + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_gain2_37) + + +def test_parsed_copy_number_loss(test_cnv_handler, cn_loss1, cn_loss2): + """Test that parsed_to_copy_number works for parsed copy number loss queries""" + # https://www.ncbi.nlm.nih.gov/clinvar/variation/1299222/?new_evidence=true + rb = ParsedToCnVarQuery( + start0=10491132, + end0=10535643, + copies0=1, + assembly=ClinVarAssembly.GRCH37, + chromosome="chrX", + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_loss1) + + rb = ParsedToCnVarQuery( + start0=10491132, + end0=10535643, + copies0=1, + assembly=ClinVarAssembly.HG19, + chromosome="chrX", + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_loss1) + + rb = ParsedToCnVarQuery( + start0=10491132, + end0=10535643, + copies0=1, + accession="NC_000023.10", + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_loss1) + + # https://www.ncbi.nlm.nih.gov/clinvar/variation/148425/?new_evidence=true + rb = ParsedToCnVarQuery( + start0=10001, + end0=1223133, + copies0=0, + assembly=ClinVarAssembly.GRCH38, + chromosome="chrY", + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_loss2) + + rb = ParsedToCnVarQuery( + start0=10001, + end0=1223133, + copies0=0, + assembly=ClinVarAssembly.HG38, + chromosome="chrY", + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_loss2) + + rb = ParsedToCnVarQuery( + start0=10001, + end0=1223133, + copies0=0, + accession="NC_000024.10", + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_loss2) + + +def test_to_parsed_cn_var(test_cnv_handler, cn_definite_number): + """Test that parsed_to_copy_number works correctly for copy number count""" + # start uses definite and end uses number + rb = ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + assembly=ClinVarAssembly.GRCH37, + chromosome="chr1", + start_pos_type=ParsedPosType.DEFINITE_RANGE, + start1=143134065, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cn_definite_number) + + # copies is definite range + rb = ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + copies1=5, + copies_type=ParsedPosType.DEFINITE_RANGE, + assembly=ClinVarAssembly.GRCH37, + chromosome="chr1", + start_pos_type=ParsedPosType.DEFINITE_RANGE, + start1=143134065, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + expected = deepcopy(cn_definite_number) + expected.copies = models.Range([3, 5]) + expected.id = "ga4gh:CN.Z-nHquCOaZ9-7qlEiHJjTCNTun8rphs5" + cnv_assertion_checks(resp, expected) + + # copies is indefinite range <= + rb = ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + copies_comparator=Comparator.LT_OR_EQUAL, + copies_type=ParsedPosType.INDEFINITE_RANGE, + assembly=ClinVarAssembly.GRCH37, + chromosome="chr1", + start_pos_type=ParsedPosType.DEFINITE_RANGE, + start1=143134065, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + expected = deepcopy(cn_definite_number) + expected.copies = models.Range([None, 3]) + expected.id = "ga4gh:CN.ATCEmOahCspjTUZwYcg3hZVEhonOxqZ0" + cnv_assertion_checks(resp, expected) + + # copies is indefinite range >= + rb = ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + copies_comparator=Comparator.GT_OR_EQUAL, + copies_type=ParsedPosType.INDEFINITE_RANGE, + assembly=ClinVarAssembly.GRCH37, + chromosome="chr1", + start_pos_type=ParsedPosType.DEFINITE_RANGE, + start1=143134065, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + expected = deepcopy(cn_definite_number) + expected.copies = [3, None] + expected.id = "ga4gh:CN.sXOp5QcykicrYTyRgzJvk4O9ha6ignDd" + cnv_assertion_checks(resp, expected) + + # start_pos and end_pos indefinite range + rb = ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + assembly=ClinVarAssembly.GRCH37, + chromosome="chr1", + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.GT_OR_EQUAL, + end_pos_comparator=Comparator.LT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_count.model_dump(exclude_none=True) == { + "type": "CopyNumberCount", + "id": "ga4gh:CN.Hw32hEhUrWYl1j3Nty4cmrZlbveSw8oF", + "location": { + "type": "SequenceLocation", + "id": "ga4gh:SL.nXrqjadKZikhhdHvDmgVovb0HiKoXRq7", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU", + }, + "start": [143134062, None], + "end": [None, 143284670], + }, + "copies": 3, + } + + +def test_parsed_to_cx_var( + test_cnv_handler, + cx_numbers, + cx_definite_ranges, + cx_indefinite_ranges, + cx_number_indefinite, +): + """Test that parsed_to_copy_number works for copy number change""" + # start and end use number + rb = ParsedToCxVarQuery( + start0=10001, + end0=1223133, + copy_change=models.CopyChange.EFO_0030069, + assembly=ClinVarAssembly.GRCH38, + chromosome="chrY", + start_pos_type=ParsedPosType.NUMBER, + end_pos_type=ParsedPosType.NUMBER, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cx_numbers) + + # start and end use definite ranges + rb = ParsedToCxVarQuery( + start0=10001, + end0=1223130, + copy_change=models.CopyChange.EFO_0030069, + assembly=ClinVarAssembly.GRCH38, + chromosome="chrY", + start_pos_type=ParsedPosType.DEFINITE_RANGE, + end_pos_type=ParsedPosType.DEFINITE_RANGE, + start1=10006, + end1=1223133, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cx_definite_ranges) + + # start and end use indefinite ranges + rb = ParsedToCxVarQuery( + start0=10001, + end0=1223130, + copy_change=models.CopyChange.EFO_0030069, + assembly=ClinVarAssembly.GRCH38, + chromosome="chrY", + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cx_indefinite_ranges) + + # start uses number and end use indefinite range + rb = ParsedToCxVarQuery( + start0=10001, + end0=1223130, + copy_change=models.CopyChange.EFO_0030069, + assembly=ClinVarAssembly.GRCH38, + chromosome="chrY", + start_pos_type=ParsedPosType.NUMBER, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + cnv_assertion_checks(resp, cx_number_indefinite) + + +def test_invalid(test_cnv_handler): + """Test invalid copy number queries returns no variation and warnings""" + # Invalid Copy Change + with pytest.raises(ValidationError) as e: + ParsedToCxVarQuery( + start0=10491132, + end0=10535643, + copy_change="efo:1234", + accession="NC_000001.10", + ) + assert "Input should be 'efo:" in str(e.value) + + # NCBI36/hg18 assembly + rb = ParsedToCxVarQuery( + start0=2623228, + end0=3150942, + copy_change=models.CopyChange.EFO_0030070, + assembly=ClinVarAssembly.NCBI36, + chromosome="chr1", + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_change is None + assert resp.warnings == ["NCBI36 assembly is not currently supported"] + + rb = ParsedToCxVarQuery( + start0=2623228, + end0=3150942, + copy_change=models.CopyChange.EFO_0030070, + assembly=ClinVarAssembly.HG18, + chromosome="chr1", + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_change is None + assert resp.warnings == ["hg18 assembly is not currently supported"] + + # Must give both assembly + chromosome or accession + ac_assembly_chr_msg = "Must provide either `accession` or both `assembly` and `chromosome`" # noqa: E501 + with pytest.raises(ValidationError) as e: + ParsedToCxVarQuery( + start0=31738809, + end0=32217725, + copy_change=models.CopyChange.EFO_0030070, + assembly="hg38", + ) + assert ac_assembly_chr_msg in str(e.value) + + # Must give both assembly + chromosome or accession + with pytest.raises(ValidationError) as e: + ParsedToCxVarQuery( + start0=31738809, + end0=32217725, + copy_change=models.CopyChange.EFO_0030070, + chromosome="chr15", + ) + assert ac_assembly_chr_msg in str(e.value) + + # Must give both assembly + chromosome or accession + with pytest.raises(ValidationError) as e: + ParsedToCxVarQuery( + start0=31738809, + end0=32217725, + copy_change=models.CopyChange.EFO_0030070, + ) + assert ac_assembly_chr_msg in str(e.value) + + # invalid chromosome + with pytest.raises(ValidationError) as e: + ParsedToCxVarQuery( + start0=10001, + end0=1223133, + copy_change=models.CopyChange.EFO_0030070, + assembly=ClinVarAssembly.GRCH38, + chromosome="z", + ) + assert "`chromosome`, z, does not match r'^chr(X|Y|([1-9]|1[0-9]|2[0-2]))$'" in str( + e.value + ) + + # invalid assembly + with pytest.raises(ValidationError) as e: + ParsedToCxVarQuery( + start0=10001, + end0=1223133, + copy_change=models.CopyChange.EFO_0030070, + assembly="GRCh99", + ) + assert "Input should be 'GRCh38'," in str(e.value) + + # invalid accession + rb = ParsedToCxVarQuery( + start0=10491132, + end0=10535643, + copy_change=models.CopyChange.EFO_0030070, + accession="NC_00002310", + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_change is None + assert resp.warnings == [ + "SeqRepo unable to get translated identifiers for NC_00002310" + ] + + # Invalid position + rb = ParsedToCxVarQuery( + start0=31738809, + end0=2302991250, + copy_change=models.CopyChange.EFO_0030070, + accession="NC_000015.10", + ) + resp = test_cnv_handler.parsed_to_copy_number(rb) + assert resp.copy_number_change is None + assert resp.warnings == ["SeqRepo ValueError: Position out of range (2302991249)"] + + # start must be less than end + with pytest.raises(ValidationError) as e: + ParsedToCxVarQuery( + start0=10001, + end0=1223130, + copy_change=models.CopyChange.EFO_0030069, + assembly=ClinVarAssembly.GRCH38, + chromosome="chrY", + start_pos_type=ParsedPosType.DEFINITE_RANGE, + end_pos_type=ParsedPosType.DEFINITE_RANGE, + start1=1223132, + end1=1223133, + ) + assert "end positions must be greater than start" in str(e.value) + + # start1 not provided + with pytest.raises(ValidationError) as e: + ParsedToCxVarQuery( + start0=10001, + end0=1223130, + copy_change=models.CopyChange.EFO_0030069, + assembly=ClinVarAssembly.GRCH38, + chromosome="chrY", + start_pos_type=ParsedPosType.DEFINITE_RANGE, + ) + assert "`start1` is required for definite ranges" in str(e.value) + + # copies1 not provided when copies_type is DefiniteRange + with pytest.raises(ValidationError) as e: + ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + copies_type=ParsedPosType.DEFINITE_RANGE, + assembly=ClinVarAssembly.GRCH37, + chromosome="chr1", + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + assert ( + "`copies1` must be provided for `copies_type == ParsedPosType.DEFINITE_RANGE`" + in str(e.value) + ) + + # copies_comparator not provided when copies_type is IndefiniteRange + with pytest.raises(ValidationError) as e: + ParsedToCnVarQuery( + start0=143134063, + end0=143284670, + copies0=3, + copies_type=ParsedPosType.INDEFINITE_RANGE, + assembly=ClinVarAssembly.GRCH37, + chromosome="chr1", + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + start_pos_comparator=Comparator.LT_OR_EQUAL, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_comparator=Comparator.GT_OR_EQUAL, + ) + assert ( + "`copies_comparator` must be provided for `copies_type == ParsedPosType.INDEFINITE_RANGE`" + in str(e.value) + ) + + # `start_pos_comparator` not provided when start_pos_type is Indefinite Range + with pytest.raises(ValidationError) as e: + ParsedToCnVarQuery( + start0=31738809, + end0=32217725, + copies0=2, + assembly=ClinVarAssembly.GRCH38, + chromosome="chr15", + start_pos_type=ParsedPosType.INDEFINITE_RANGE, + end_pos_type=ParsedPosType.NUMBER, + ) + assert "`start_pos_comparator` is required for indefinite ranges" in str(e.value) + + # `end_pos_comparator` not provided when end_pos_type is Indefinite Range + with pytest.raises(ValidationError) as e: + ParsedToCnVarQuery( + start0=31738809, + end0=32217725, + copies0=2, + assembly=ClinVarAssembly.GRCH38, + chromosome="chr15", + start_pos_type=ParsedPosType.NUMBER, + end_pos_type=ParsedPosType.INDEFINITE_RANGE, + ) + assert "`end_pos_comparator` is required for indefinite ranges" in str(e.value) diff --git a/variation.log b/variation.log new file mode 100644 index 0000000..02258b2 --- /dev/null +++ b/variation.log @@ -0,0 +1,101 @@ +[2023-11-22 19:13:51,012] - cool_seq_tool - INFO : Downloading MANE.GRCh38.v1.3.refseq_genomic.gff.gz... +[2024-02-05 14:47:26,855] - cool_seq_tool - INFO : Downloading MANE summary file from NCBI. +[2024-02-05 15:25:53,557] - cool_seq_tool - INFO : Downloading LRG RefSeq data from NCBI. +[2024-02-05 15:26:31,897] - cool_seq_tool - INFO : LRG RefSeq data download complete. +[2024-02-05 19:31:14,079] - cool_seq_tool - INFO : Downloading LRG RefSeq data from NCBI. +[2024-02-05 19:31:17,759] - cool_seq_tool - INFO : LRG RefSeq data download complete. +[2024-02-05 19:31:18,400] - biocommons.seqrepo.fastadir.fastadir - INFO : File descriptor caching disabled +[2024-02-05 19:31:22,272] - botocore.credentials - INFO : Found credentials in shared credentials file: ~/.aws/credentials +[2024-02-06 11:29:01,937] - biocommons.seqrepo.fastadir.fastadir - INFO : File descriptor caching disabled +[2024-02-06 11:29:15,056] - botocore.credentials - INFO : Found credentials in shared credentials file: ~/.aws/credentials +[2024-02-06 11:53:24,414] - biocommons.seqrepo.fastadir.fastadir - INFO : File descriptor caching disabled +[2024-02-06 11:53:25,761] - botocore.credentials - INFO : Found credentials in shared credentials file: ~/.aws/credentials +[2024-02-06 11:53:27,471] - gene.database.dynamodb - INFO : gene_normalizer table is missing or unavailable. +[2024-02-06 19:33:52,095] - biocommons.seqrepo.fastadir.fastadir - INFO : File descriptor caching disabled +[2024-02-06 19:33:53,344] - botocore.credentials - INFO : Found credentials in shared credentials file: ~/.aws/credentials +[2024-02-06 21:00:45,926] - fastapi - WARNING : email-validator not installed, email fields will be treated as str. +To install, run: pip install email-validator +[2024-02-14 23:07:35,307] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term hgvs: +[2024-02-14 23:07:35,312] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term hgvs: +[2024-02-14 23:07:35,318] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term hgvs: +[2024-02-14 23:07:35,322] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term hgvs: +[2024-02-14 23:07:35,326] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term hgvs: +[2024-02-14 23:07:35,334] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term hgvs: +[2024-02-14 23:07:35,337] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term hgvs: +[2024-02-14 23:07:51,121] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term grch37: +[2024-02-14 23:07:51,136] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term grch37: +[2024-02-14 23:07:51,142] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-14 23:07:51,146] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-14 23:07:51,160] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-14 23:07:51,163] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-14 23:07:51,168] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-14 23:08:19,651] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term braf: +[2024-02-14 23:08:19,656] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term braf: +[2024-02-14 23:08:19,658] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term braf: +[2024-02-14 23:08:19,660] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term braf: +[2024-02-14 23:08:19,661] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term braf: +[2024-02-14 23:08:19,663] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term braf: +[2024-02-14 23:08:19,665] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term braf: +[2024-02-14 23:08:19,674] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term v600e: +[2024-02-14 23:08:19,679] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term v600e: +[2024-02-14 23:08:19,684] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term v600e: +[2024-02-14 23:08:19,689] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term v600e: +[2024-02-14 23:08:19,695] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term v600e: +[2024-02-14 23:08:19,701] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term v600e: +[2024-02-14 23:08:19,707] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term v600e: +[2024-02-14 23:22:19,732] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term braf: +[2024-02-14 23:22:19,738] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term braf: +[2024-02-14 23:22:19,744] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term braf: +[2024-02-14 23:22:19,752] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term braf: +[2024-02-14 23:22:19,756] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term braf: +[2024-02-14 23:22:19,762] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term braf: +[2024-02-14 23:22:19,769] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term braf: +[2024-02-14 23:22:19,774] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term v600e: +[2024-02-14 23:22:19,777] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term v600e: +[2024-02-14 23:22:19,782] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term v600e: +[2024-02-14 23:22:19,786] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term v600e: +[2024-02-14 23:22:19,791] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term v600e: +[2024-02-14 23:22:19,796] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term v600e: +[2024-02-14 23:22:19,801] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term v600e: +[2024-02-14 23:22:28,280] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term "braf: +[2024-02-14 23:22:28,303] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term "braf: +[2024-02-14 23:22:28,309] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term "braf: +[2024-02-14 23:22:28,311] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term "braf: +[2024-02-14 23:22:28,313] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term "braf: +[2024-02-14 23:22:28,315] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term "braf: +[2024-02-14 23:22:28,316] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term "braf: +[2024-02-14 23:22:28,317] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term v600e": +[2024-02-14 23:22:28,319] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term v600e": +[2024-02-14 23:22:28,321] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term v600e": +[2024-02-14 23:22:28,323] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term v600e": +[2024-02-14 23:22:28,324] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term v600e": +[2024-02-14 23:22:28,326] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term v600e": +[2024-02-14 23:22:28,328] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term v600e": +[2024-02-14 23:22:38,773] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term grch37: +[2024-02-14 23:22:38,780] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term grch37: +[2024-02-14 23:22:38,784] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-14 23:22:38,790] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-14 23:22:38,795] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-14 23:22:38,800] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-14 23:22:38,806] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-15 19:24:50,909] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term grch37: +[2024-02-15 19:24:50,913] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term grch37: +[2024-02-15 19:24:50,915] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-15 19:24:50,917] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-15 19:24:50,919] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-15 19:24:50,921] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-15 19:24:50,924] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-16 15:11:34,273] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term grch37: +[2024-02-16 15:11:34,278] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term grch37: +[2024-02-16 15:11:34,282] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-16 15:11:34,285] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-16 15:11:34,289] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-16 15:11:34,295] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-16 15:11:34,300] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term grch37: +[2024-02-22 10:39:58,206] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term hgvs: +[2024-02-22 10:39:58,212] - gene.database.dynamodb - ERROR : boto3 client error on get_records_by_id for search term hgvs: +[2024-02-22 10:39:58,215] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term hgvs: +[2024-02-22 10:39:58,220] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term hgvs: +[2024-02-22 10:39:58,225] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term hgvs: +[2024-02-22 10:39:58,233] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term hgvs: +[2024-02-22 10:39:58,240] - gene.database.dynamodb - ERROR : boto3 client error on get_refs_by_type for search term hgvs: diff --git a/variation/__init__.py b/variation/__init__.py new file mode 100644 index 0000000..ed2efae --- /dev/null +++ b/variation/__init__.py @@ -0,0 +1,30 @@ +"""The Variation Normalization package.""" +import logging +from os import environ + +if "VARIATION_NORM_EB_PROD" in environ: + LOG_FN = "/tmp/variation.log" +else: + LOG_FN = "variation.log" + +logging.basicConfig( + filename=LOG_FN, format="[%(asctime)s] - %(name)s - %(levelname)s : %(message)s" +) +logger = logging.getLogger("variation") +logger.setLevel(logging.DEBUG) +logger.handlers = [] + +logging.getLogger("cool_seq_tool").setLevel(logging.INFO) +logging.getLogger("boto3").setLevel(logging.INFO) +logging.getLogger("botocore").setLevel(logging.INFO) +logging.getLogger("urllib3").setLevel(logging.INFO) +logging.getLogger("python_jsonschema_objects").setLevel(logging.INFO) +logging.getLogger("hgvs.parser").setLevel(logging.INFO) +logging.getLogger("biocommons.seqrepo.seqaliasdb.seqaliasdb").setLevel(logging.INFO) +logging.getLogger("biocommons.seqrepo.fastadir.fastadir").setLevel(logging.INFO) +logging.getLogger("asyncio").setLevel(logging.INFO) + +if "VARIATION_NORM_EB_PROD" in environ: + ch = logging.StreamHandler() + ch.setLevel(logging.INFO) + logger.addHandler(ch) diff --git a/variation/classifiers/__init__.py b/variation/classifiers/__init__.py new file mode 100644 index 0000000..c174ba7 --- /dev/null +++ b/variation/classifiers/__init__.py @@ -0,0 +1,23 @@ +"""Classifier package level import.""" +from .amplification_classifier import AmplificationClassifier +from .cdna_deletion_classifier import CdnaDeletionClassifier +from .cdna_delins_classifier import CdnaDelInsClassifier +from .cdna_insertion_classifier import CdnaInsertionClassifier +from .cdna_reference_agree_classifier import CdnaReferenceAgreeClassifier +from .cdna_substitution_classifier import CdnaSubstitutionClassifier +from .genomic_deletion_ambiguous import GenomicDeletionAmbiguousClassifier +from .genomic_deletion_classifier import GenomicDeletionClassifier +from .genomic_delins_classifier import GenomicDelInsClassifier +from .genomic_duplication_ambiguous import GenomicDuplicationAmbiguousClassifier +from .genomic_duplication_classifier import GenomicDuplicationClassifier +from .genomic_insertion_classifier import GenomicInsertionClassifier +from .genomic_reference_agree_classifier import GenomicReferenceAgreeClassifier +from .genomic_substitution_classifier import GenomicSubstitutionClassifier +from .gnomad_vcf_classifier import GnomadVcfClassifier +from .hgvs_classifier import HgvsClassifier +from .protein_deletion_classifier import ProteinDeletionClassifier +from .protein_delins_classifier import ProteinDelInsClassifier +from .protein_insertion_classifier import ProteinInsertionClassifier +from .protein_reference_agree import ProteinReferenceAgreeClassifier +from .protein_stop_gain_classifier import ProteinStopGainClassifier +from .protein_substitution_classifier import ProteinSubstitutionClassifier diff --git a/variation/classifiers/amplification_classifier.py b/variation/classifiers/amplification_classifier.py new file mode 100644 index 0000000..69bb683 --- /dev/null +++ b/variation/classifiers/amplification_classifier.py @@ -0,0 +1,34 @@ +"""A module for the Amplification Classifier""" +from typing import List + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + AmplificationClassification, + Nomenclature, +) +from variation.schemas.token_response_schema import Token, TokenType + + +class AmplificationClassifier(Classifier): + """The Amplification Classifier class""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the amplification classification. + + :return: List of list of tokens, where order matters, that represent an + amplification classification. + """ + return [[TokenType.GENE, TokenType.AMPLIFICATION]] + + def match(self, tokens: List[Token]) -> AmplificationClassification: + """Return the amplification classification from a list of token matches. + + :param tokens: List of ordered tokens that are exact match candidates for an + amplification classification + :return: amplification classification for the list of matched tokens + """ + return AmplificationClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=tokens[0], + ) diff --git a/variation/classifiers/cdna_deletion_classifier.py b/variation/classifiers/cdna_deletion_classifier.py new file mode 100644 index 0000000..929854c --- /dev/null +++ b/variation/classifiers/cdna_deletion_classifier.py @@ -0,0 +1,39 @@ +"""A module for the Cdna Deletion Classifier.""" +from typing import List + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + CdnaDeletionClassification, + Nomenclature, +) +from variation.schemas.token_response_schema import Token, TokenType + + +class CdnaDeletionClassifier(Classifier): + """The Cdna Deletion Classifier class.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the cdna deletion classification. + + :return: List of list of tokens, where order matters, that represent a cdna + deletion classification. + """ + return [[TokenType.GENE, TokenType.CDNA_DELETION]] + + def match(self, tokens: List[Token]) -> CdnaDeletionClassification: + """Return the cdna deletion classification from a list of token matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + cdna deletion classification + :return: cdna deletion classification for the list of matched tokens + """ + gene_token, cdna_deletion_token = tokens + + return CdnaDeletionClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=gene_token, + pos0=cdna_deletion_token.pos0, + pos1=cdna_deletion_token.pos1, + deleted_sequence=cdna_deletion_token.deleted_sequence, + ) diff --git a/variation/classifiers/cdna_delins_classifier.py b/variation/classifiers/cdna_delins_classifier.py new file mode 100644 index 0000000..891906e --- /dev/null +++ b/variation/classifiers/cdna_delins_classifier.py @@ -0,0 +1,39 @@ +"""A module for the Cdna DelIns Classifier.""" +from typing import List + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + CdnaDelInsClassification, + Nomenclature, +) +from variation.schemas.token_response_schema import Token, TokenType + + +class CdnaDelInsClassifier(Classifier): + """The Cdna DelIns Classifier class.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the cdna delins classification. + + :return: List of list of tokens, where order matters, that represent a cdna + delins classification. + """ + return [[TokenType.GENE, TokenType.CDNA_DELINS]] + + def match(self, tokens: List[Token]) -> CdnaDelInsClassification: + """Return the cdna delins classification from a list of token matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + cdna delins classification + :return: cdna delins classification for the list of matched tokens + """ + gene_token, cdna_delins_token = tokens + + return CdnaDelInsClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=gene_token, + pos0=cdna_delins_token.pos0, + pos1=cdna_delins_token.pos1, + inserted_sequence=cdna_delins_token.inserted_sequence, + ) diff --git a/variation/classifiers/cdna_insertion_classifier.py b/variation/classifiers/cdna_insertion_classifier.py new file mode 100644 index 0000000..dd4daf0 --- /dev/null +++ b/variation/classifiers/cdna_insertion_classifier.py @@ -0,0 +1,39 @@ +"""A module for the Cdna insertion Classifier.""" +from typing import List + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + CdnaInsertionClassification, + Nomenclature, +) +from variation.schemas.token_response_schema import Token, TokenType + + +class CdnaInsertionClassifier(Classifier): + """The Cdna insertion Classifier class.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the cdna insertion classification. + + :return: List of list of tokens, where order matters, that represent a cdna + insertion classification. + """ + return [[TokenType.GENE, TokenType.CDNA_INSERTION]] + + def match(self, tokens: List[Token]) -> CdnaInsertionClassification: + """Return the cdna insertion classification from a list of token matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + cdna insertion classification + :return: cdna insertion classification for the list of matched tokens + """ + gene_token, cdna_ins_token = tokens + + return CdnaInsertionClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=gene_token, + pos0=cdna_ins_token.pos0, + pos1=cdna_ins_token.pos1, + inserted_sequence=cdna_ins_token.inserted_sequence, + ) diff --git a/variation/classifiers/cdna_reference_agree_classifier.py b/variation/classifiers/cdna_reference_agree_classifier.py new file mode 100644 index 0000000..297dc7e --- /dev/null +++ b/variation/classifiers/cdna_reference_agree_classifier.py @@ -0,0 +1,48 @@ +"""A module for the cDNA Reference Agree Classifier.""" +from typing import List + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + CdnaReferenceAgreeClassification, + Nomenclature, +) +from variation.schemas.token_response_schema import Token, TokenType + + +class CdnaReferenceAgreeClassifier(Classifier): + """The Cdna Reference Agree Classifier class.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the cdna reference agree + classification. + + :return: List of list of tokens, where order matters, that represent a cdna + reference agree classification. + """ + return [ + [TokenType.GENE, TokenType.CDNA_REFERENCE_AGREE], + [ + TokenType.GENE, + TokenType.PROTEIN_SUBSTITUTION, + TokenType.CDNA_REFERENCE_AGREE, + ], + ] + + def match(self, tokens: List[Token]) -> CdnaReferenceAgreeClassification: + """Return the cdna reference agree classification from a list of token matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + cdna reference agree classification + :return: cdna reference agree classification for the list of matched tokens + """ + if len(tokens) == 2: + gene_token, cdna_ref_agree_token = tokens + else: + gene_token, _, cdna_ref_agree_token = tokens + + return CdnaReferenceAgreeClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=gene_token, + pos=cdna_ref_agree_token.pos, + ) diff --git a/variation/classifiers/cdna_substitution_classifier.py b/variation/classifiers/cdna_substitution_classifier.py new file mode 100644 index 0000000..6d4dce7 --- /dev/null +++ b/variation/classifiers/cdna_substitution_classifier.py @@ -0,0 +1,50 @@ +"""A module for the Cdna Substitution Classifier.""" +from typing import List, Optional + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + CdnaSubstitutionClassification, + Nomenclature, +) +from variation.schemas.token_response_schema import Token, TokenType + + +class CdnaSubstitutionClassifier(Classifier): + """The Cdna Substitution Classifier class.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the cdna substitution classification. + + :return: List of list of tokens, where order matters, that represent a cdna + substitution classification. + """ + return [ + [TokenType.GENE, TokenType.CDNA_SUBSTITUTION], + [ + TokenType.GENE, + TokenType.PROTEIN_SUBSTITUTION, + TokenType.CDNA_SUBSTITUTION, + ], + ] + + def match(self, tokens: List[Token]) -> Optional[CdnaSubstitutionClassification]: + """Return the cdna substitution classification from a list of token matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + cdna substitution classification + :return: cdna substitution classification for the list of matched tokens + """ + if len(tokens) == 2: + gene_token, cdna_sub_token = tokens + else: + gene_token, _, cdna_sub_token = tokens + + if len(cdna_sub_token.ref) == len(cdna_sub_token.alt): + return CdnaSubstitutionClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=gene_token, + pos=cdna_sub_token.pos, + ref=cdna_sub_token.ref, + alt=cdna_sub_token.alt, + ) diff --git a/variation/classifiers/classifier.py b/variation/classifiers/classifier.py new file mode 100644 index 0000000..400a7e8 --- /dev/null +++ b/variation/classifiers/classifier.py @@ -0,0 +1,45 @@ +"""Module for Classification methods.""" +from abc import ABC, abstractmethod +from typing import List, Optional + +from variation.schemas.classification_response_schema import Classification +from variation.schemas.token_response_schema import Token, TokenType + + +class Classifier(ABC): + """The Classifier class.""" + + @abstractmethod + def match(self, tokens: List[Token]) -> Optional[Classification]: + """Return the classification from a list of token matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + given classification + :return: A classification for the list of matched tokens + """ + + @abstractmethod + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for a given classification. + + :return: List of list of tokens, where order matters, that represent a given + classification. + """ + pass + + def can_classify(self, tokens: List[Token]) -> bool: + """Return whether or not a list of tokens can be classified by a given + classification + + :param tokens: List of tokens found in an input query + :return: `True` if a list of tokens matches the tokens needed, where order + matters, to represent a given classification. `False`, otherwise. + """ + token_types = list(map(lambda t: t.token_type, tokens)) + exact_matches: List[List[str]] = [] + + for candidate in self.exact_match_candidates(): + if token_types == candidate: + exact_matches.append(candidate) + + return len(exact_matches) == 1 diff --git a/variation/classifiers/genomic_deletion_ambiguous.py b/variation/classifiers/genomic_deletion_ambiguous.py new file mode 100644 index 0000000..6173b65 --- /dev/null +++ b/variation/classifiers/genomic_deletion_ambiguous.py @@ -0,0 +1,52 @@ +"""A module for the Genomic Deletion Ambiguous Classifier.""" +from typing import List + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + GenomicDeletionAmbiguousClassification, + Nomenclature, +) +from variation.schemas.token_response_schema import Token, TokenType +from variation.utils import get_ambiguous_type + + +class GenomicDeletionAmbiguousClassifier(Classifier): + """The Genomic Deletion Ambiguous Classifier class.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the genomic ambiguous deletion + classification. + + :return: List of list of tokens, where order matters, that represent a genomic + ambiguous deletion classification. + """ + return [[TokenType.GENE, TokenType.GENOMIC_DELETION_AMBIGUOUS]] + + def match(self, tokens: List[Token]) -> GenomicDeletionAmbiguousClassification: + """Return the genomic ambiguous deletion classification from a list of token + matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + genomic ambiguous deletion classification + :return: genomic ambiguous deletion classification for the list of matched + tokens + """ + gene_token, genomic_del_token = tokens + ambiguous_type = get_ambiguous_type( + genomic_del_token.pos0, + genomic_del_token.pos1, + genomic_del_token.pos2, + genomic_del_token.pos3, + genomic_del_token.ambiguous_regex_type, + ) + + return GenomicDeletionAmbiguousClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=gene_token, + pos0=genomic_del_token.pos0, + pos1=genomic_del_token.pos1, + pos2=genomic_del_token.pos2, + pos3=genomic_del_token.pos3, + ambiguous_type=ambiguous_type, + ) diff --git a/variation/classifiers/genomic_deletion_classifier.py b/variation/classifiers/genomic_deletion_classifier.py new file mode 100644 index 0000000..e5b60f0 --- /dev/null +++ b/variation/classifiers/genomic_deletion_classifier.py @@ -0,0 +1,39 @@ +"""A module for the Genomic Deletion Classifier.""" +from typing import List + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + GenomicDeletionClassification, + Nomenclature, +) +from variation.schemas.token_response_schema import Token, TokenType + + +class GenomicDeletionClassifier(Classifier): + """The Genomic Deletion Classifier class.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the genomic deletion classification. + + :return: List of list of tokens, where order matters, that represent a genomic + deletion classification. + """ + return [[TokenType.GENE, TokenType.GENOMIC_DELETION]] + + def match(self, tokens: List[Token]) -> GenomicDeletionClassification: + """Return the genomic deletion classification from a list of token matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + genomic deletion classification + :return: genomic deletion classification for the list of matched tokens + """ + gene_token, genomic_deletion_token = tokens + + return GenomicDeletionClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=gene_token, + pos0=genomic_deletion_token.pos0, + pos1=genomic_deletion_token.pos1, + deleted_sequence=genomic_deletion_token.deleted_sequence, + ) diff --git a/variation/classifiers/genomic_delins_classifier.py b/variation/classifiers/genomic_delins_classifier.py new file mode 100644 index 0000000..9be0530 --- /dev/null +++ b/variation/classifiers/genomic_delins_classifier.py @@ -0,0 +1,39 @@ +"""A module for the Genomic DelIns Classifier.""" +from typing import List + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + GenomicDelInsClassification, + Nomenclature, +) +from variation.schemas.token_response_schema import Token, TokenType + + +class GenomicDelInsClassifier(Classifier): + """The Genomic DelIns Classifier class.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the genomic delins classification. + + :return: List of list of tokens, where order matters, that represent a genomic + delins classification. + """ + return [[TokenType.GENE, TokenType.GENOMIC_DELINS]] + + def match(self, tokens: List[Token]) -> GenomicDelInsClassification: + """Return the genomic delins classification from a list of token matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + genomic delins classification + :return: genomic delins classification for the list of matched tokens + """ + gene_token, genomic_delins_token = tokens + + return GenomicDelInsClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=gene_token, + pos0=genomic_delins_token.pos0, + pos1=genomic_delins_token.pos1, + inserted_sequence=genomic_delins_token.inserted_sequence, + ) diff --git a/variation/classifiers/genomic_duplication_ambiguous.py b/variation/classifiers/genomic_duplication_ambiguous.py new file mode 100644 index 0000000..6bcf9a5 --- /dev/null +++ b/variation/classifiers/genomic_duplication_ambiguous.py @@ -0,0 +1,52 @@ +"""A module for the Genomic Duplication Ambiguous Classifier.""" +from typing import List + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + GenomicDuplicationAmbiguousClassification, + Nomenclature, +) +from variation.schemas.token_response_schema import Token, TokenType +from variation.utils import get_ambiguous_type + + +class GenomicDuplicationAmbiguousClassifier(Classifier): + """The Genomic Duplication Ambiguous Classifier class.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the genomic ambiguous duplication + classification. + + :return: List of list of tokens, where order matters, that represent a genomic + ambiguous duplication classification. + """ + return [[TokenType.GENE, TokenType.GENOMIC_DUPLICATION_AMBIGUOUS]] + + def match(self, tokens: List[Token]) -> GenomicDuplicationAmbiguousClassification: + """Return the genomic ambiguous duplication classification from a list of token + matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + genomic ambiguous duplication classification + :return: genomic ambiguous duplication classification for the list of matched + tokens + """ + gene_token, genomic_dup_token = tokens + ambiguous_type = get_ambiguous_type( + genomic_dup_token.pos0, + genomic_dup_token.pos1, + genomic_dup_token.pos2, + genomic_dup_token.pos3, + genomic_dup_token.ambiguous_regex_type, + ) + + return GenomicDuplicationAmbiguousClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=gene_token, + pos0=genomic_dup_token.pos0, + pos1=genomic_dup_token.pos1, + pos2=genomic_dup_token.pos2, + pos3=genomic_dup_token.pos3, + ambiguous_type=ambiguous_type, + ) diff --git a/variation/classifiers/genomic_duplication_classifier.py b/variation/classifiers/genomic_duplication_classifier.py new file mode 100644 index 0000000..271eb96 --- /dev/null +++ b/variation/classifiers/genomic_duplication_classifier.py @@ -0,0 +1,38 @@ +"""A module for the Genomic Duplication Classifier.""" +from typing import List + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + GenomicDuplicationClassification, + Nomenclature, +) +from variation.schemas.token_response_schema import Token, TokenType + + +class GenomicDuplicationClassifier(Classifier): + """The Genomic Duplication Classifier class.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the genomic duplication classification. + + :return: List of list of tokens, where order matters, that represent a genomic + duplication classification. + """ + return [[TokenType.GENE, TokenType.GENOMIC_DUPLICATION]] + + def match(self, tokens: List[Token]) -> GenomicDuplicationClassification: + """Return the genomic duplication classification from a list of token matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + genomic duplication classification + :return: genomic duplication classification for the list of matched tokens + """ + gene_token, genomic_dup_token = tokens + + return GenomicDuplicationClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=gene_token, + pos0=genomic_dup_token.pos0, + pos1=genomic_dup_token.pos1, + ) diff --git a/variation/classifiers/genomic_insertion_classifier.py b/variation/classifiers/genomic_insertion_classifier.py new file mode 100644 index 0000000..09a362e --- /dev/null +++ b/variation/classifiers/genomic_insertion_classifier.py @@ -0,0 +1,39 @@ +"""A module for the Genomic Insertion Classifier.""" +from typing import List + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + GenomicInsertionClassification, + Nomenclature, +) +from variation.schemas.token_response_schema import Token, TokenType + + +class GenomicInsertionClassifier(Classifier): + """The Genomic Insertion Classifier class.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the genomic insertion classification. + + :return: List of list of tokens, where order matters, that represent a genomic + insertion classification. + """ + return [[TokenType.GENE, TokenType.GENOMIC_INSERTION]] + + def match(self, tokens: List[Token]) -> GenomicInsertionClassification: + """Return the genomic insertion classification from a list of token matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + genomic insertion classification + :return: genomic insertion classification for the list of matched tokens + """ + gene_token, genomic_ins_token = tokens + + return GenomicInsertionClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=gene_token, + pos0=genomic_ins_token.pos0, + pos1=genomic_ins_token.pos1, + inserted_sequence=genomic_ins_token.inserted_sequence, + ) diff --git a/variation/classifiers/genomic_reference_agree_classifier.py b/variation/classifiers/genomic_reference_agree_classifier.py new file mode 100644 index 0000000..ccf9d93 --- /dev/null +++ b/variation/classifiers/genomic_reference_agree_classifier.py @@ -0,0 +1,39 @@ +"""A module for the Genomic Reference Agree Classifier.""" +from typing import List + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + GenomicReferenceAgreeClassification, + Nomenclature, +) +from variation.schemas.token_response_schema import Token, TokenType + + +class GenomicReferenceAgreeClassifier(Classifier): + """The Genomic Reference Agree Classifier class.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the genomic reference agree + classification. + + :return: List of list of tokens, where order matters, that represent a genomic + reference agree classification. + """ + return [[TokenType.GENE, TokenType.GENOMIC_REFERENCE_AGREE]] + + def match(self, tokens: List[Token]) -> GenomicReferenceAgreeClassification: + """Return the genomic reference agree classification from a list of token + matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + genomic reference agree classification + :return: genomic reference agree classification for the list of matched tokens + """ + gene_token, genomic_ref_agree_token = tokens + + return GenomicReferenceAgreeClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=gene_token, + pos=genomic_ref_agree_token.pos, + ) diff --git a/variation/classifiers/genomic_substitution_classifier.py b/variation/classifiers/genomic_substitution_classifier.py new file mode 100644 index 0000000..4f39b30 --- /dev/null +++ b/variation/classifiers/genomic_substitution_classifier.py @@ -0,0 +1,51 @@ +"""A module for the Genomic Substitution Classifier.""" +from typing import List, Optional + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + GenomicSubstitutionClassification, + Nomenclature, +) +from variation.schemas.token_response_schema import Token, TokenType + + +class GenomicSubstitutionClassifier(Classifier): + """The Genomic Substitution Classifier class.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the genomic substitution + classification. + + :return: List of list of tokens, where order matters, that represent a genomic + substitution classification. + """ + return [ + [TokenType.GENE, TokenType.GENOMIC_SUBSTITUTION], + [ + TokenType.GENE, + TokenType.PROTEIN_SUBSTITUTION, + TokenType.GENOMIC_SUBSTITUTION, + ], + ] + + def match(self, tokens: List[Token]) -> Optional[GenomicSubstitutionClassification]: + """Return the genomic substitution classification from a list of token matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + genomic substitution classification + :return: genomic substitution classification for the list of matched tokens + """ + if len(tokens) == 2: + gene_token, genomic_sub_token = tokens + else: + gene_token, _, genomic_sub_token = tokens + + if len(genomic_sub_token.ref) == len(genomic_sub_token.alt): + return GenomicSubstitutionClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=gene_token, + pos=genomic_sub_token.pos, + ref=genomic_sub_token.ref, + alt=genomic_sub_token.alt, + ) diff --git a/variation/classifiers/gnomad_vcf_classifier.py b/variation/classifiers/gnomad_vcf_classifier.py new file mode 100644 index 0000000..6ebfae8 --- /dev/null +++ b/variation/classifiers/gnomad_vcf_classifier.py @@ -0,0 +1,72 @@ +"""A module for the gnomAD VCF Classifier""" +from typing import List, Optional, Union + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + GenomicDeletionClassification, + GenomicDelInsClassification, + GenomicInsertionClassification, + GenomicReferenceAgreeClassification, + GenomicSubstitutionClassification, + Nomenclature, +) +from variation.schemas.token_response_schema import GnomadVcfToken, TokenType + + +class GnomadVcfClassifier(Classifier): + """The gnomAD VCF Classifier""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the gnomad vcf classification. + + :return: List of list of tokens, where order matters, that represent a gnomad + vcf classification. + """ + return [[TokenType.GNOMAD_VCF]] + + def match( + self, token: GnomadVcfToken + ) -> Optional[ + Union[ + GenomicReferenceAgreeClassification, + GenomicSubstitutionClassification, + GenomicInsertionClassification, + GenomicDeletionClassification, + ] + ]: + """Return the genomic classification (either reference agree, substitution, + insertion, or deletion) from a gnomad vcf token. + Currently only support simple genomic variation. + + :param token: gnomad vcf token + :return: The corresponding genomic classification for the gnomad vcf token if + simple variation change. Else, return `None` + """ + params = {"matching_tokens": [token], "nomenclature": Nomenclature.GNOMAD_VCF} + + ref = token.ref + alt = token.alt + + len_ref = len(ref) + len_alt = len(alt) + + if len_ref == len_alt: + # substitution + params["pos"] = token.pos + + if ref == alt: + return GenomicReferenceAgreeClassification(**params) + else: + params["ref"] = ref + params["alt"] = alt + + return GenomicSubstitutionClassification(**params) + + # delins + params["pos0"] = token.pos + params["pos1"] = (params["pos0"] + len_ref) - 1 + if params["pos0"] == params["pos1"]: + del params["pos1"] + + params["inserted_sequence"] = alt + return GenomicDelInsClassification(**params) diff --git a/variation/classifiers/hgvs_classifier.py b/variation/classifiers/hgvs_classifier.py new file mode 100644 index 0000000..073ac99 --- /dev/null +++ b/variation/classifiers/hgvs_classifier.py @@ -0,0 +1,372 @@ +"""A module for the HGVS Classifier.""" +from re import Match, Pattern +from typing import Dict, List, Optional + +from cool_seq_tool.schemas import AnnotationLayer + +from variation.classifiers.classifier import Classifier +from variation.regex import ( + CDNA_REGEXPRS, + GENOMIC_DEL_AMBIGUOUS_REGEXPRS, + GENOMIC_DUP_AMBIGUOUS_REGEXPRS, + GENOMIC_REGEXPRS, + PROTEIN_REGEXPRS, +) +from variation.schemas.app_schemas import AmbiguousRegexType +from variation.schemas.classification_response_schema import ( + CdnaDeletionClassification, + CdnaDelInsClassification, + CdnaInsertionClassification, + CdnaReferenceAgreeClassification, + CdnaSubstitutionClassification, + Classification, + ClassificationType, + GenomicDeletionAmbiguousClassification, + GenomicDeletionClassification, + GenomicDelInsClassification, + GenomicDuplicationAmbiguousClassification, + GenomicDuplicationClassification, + GenomicInsertionClassification, + GenomicReferenceAgreeClassification, + GenomicSubstitutionClassification, + Nomenclature, + ProteinDeletionClassification, + ProteinDelInsClassification, + ProteinInsertionClassification, + ProteinReferenceAgreeClassification, + ProteinStopGainClassification, + ProteinSubstitutionClassification, +) +from variation.schemas.token_response_schema import HgvsToken, TokenType +from variation.utils import get_ambiguous_type + + +class HgvsClassifier(Classifier): + """The HGVS Classifier.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the hgvs classification. + + :return: List of list of tokens, where order matters, that represent a hgvs + classification. + """ + return [[TokenType.HGVS]] + + def match(self, token: HgvsToken) -> Optional[Classification]: + """Return the classification from a hgvs token using regex matches to determine + the type of classification. + + :param token: hgvs token + :return: The corresponding classification for the hgvs token if a regex match + is found. Else, return `None` + """ + classification = None + params = { + "matching_tokens": [token], + "nomenclature": Nomenclature.HGVS, + "ac": token.accession, + } + + if token.coordinate_type == AnnotationLayer.GENOMIC: + classification = self._genomic_classification(token, params) + if not classification: + # Try ambiguous + classification = self._genomic_ambiguous_classification(token, params) + elif token.coordinate_type == AnnotationLayer.CDNA: + classification = self._cdna_classification(token, params) + elif token.coordinate_type == AnnotationLayer.PROTEIN: + classification = self._protein_classification(token, params) + + return classification + + @staticmethod + def _regex_match(change: str, regex: Pattern) -> Optional[Match]: + """Strip parentheses from `change` and return whether or not `change` matches + the `regex` + + :param change: The alteration part of the hgvs expression + :param regex: The pattern to match against + :return: A regex match if found against pattern, else `None` + """ + if change[0] == "(" and change[-1] == ")": + match = regex.match(change[1:-1]) + else: + match = regex.match(change) + return match + + def _protein_classification( + self, token: HgvsToken, params: Dict + ) -> Optional[Classification]: + """Determine if hgvs token matches regex checks and return corresponding protein + classification if a match is found + + :param token: hgvs token + :param params: Base fields for a classification. This will get mutated if a + match is found. + :return: Protein classification if hgvs token matches regex checks. Else, `None` + """ + for regex, _, classification_type in PROTEIN_REGEXPRS: + match = self._regex_match(token.change, regex) + + if match: + match_dict = match.groupdict() + params.update(match_dict) + + if classification_type == ClassificationType.PROTEIN_SUBSTITUTION: + params["pos"] = int(params["pos"]) + if params["alt"] in {"Ter", "*"}: + params["alt"] = "*" + return ProteinStopGainClassification(**params) + else: + return ProteinSubstitutionClassification(**params) + elif classification_type == ClassificationType.PROTEIN_REFERENCE_AGREE: + params["pos"] = int(params["pos"]) + return ProteinReferenceAgreeClassification(**params) + elif classification_type == ClassificationType.PROTEIN_DELINS: + params["pos0"] = int(params["pos0"]) + params["pos1"] = ( + int(params["pos1"]) + if params["pos1"] is not None + else params["pos1"] + ) + return ProteinDelInsClassification(**params) + elif classification_type == ClassificationType.PROTEIN_DELETION: + params["pos0"] = int(params["pos0"]) + params["pos1"] = ( + int(params["pos1"]) + if params["pos1"] is not None + else params["pos1"] + ) + return ProteinDeletionClassification(**params) + elif classification_type == ClassificationType.PROTEIN_INSERTION: + params["pos0"] = int(params["pos0"]) + params["pos1"] = ( + int(params["pos1"]) + if params["pos1"] is not None + else params["pos1"] + ) + return ProteinInsertionClassification(**params) + + return None + + def _cdna_classification( + self, token: HgvsToken, params: Dict + ) -> Optional[Classification]: + """Determine if hgvs token matches regex checks and return corresponding cdna + classification if a match is found + + :param token: hgvs token + :param params: Base fields for a classification. This will get mutated if a + match is found. + :return: cdna classification if hgvs token matches regex checks. Else, `None` + """ + for regex, _, classification_type in CDNA_REGEXPRS: + match = self._regex_match(token.change, regex) + + if match: + match_dict = match.groupdict() + params.update(match_dict) + + if classification_type == ClassificationType.CDNA_SUBSTITUTION: + params["pos"] = int(params["pos"]) + return CdnaSubstitutionClassification(**params) + elif classification_type == ClassificationType.CDNA_REFERENCE_AGREE: + params["pos"] = int(params["pos"]) + return CdnaReferenceAgreeClassification(**params) + elif classification_type == ClassificationType.CDNA_DELINS: + params["pos0"] = int(params["pos0"]) + params["pos1"] = ( + int(params["pos1"]) + if params["pos1"] is not None + else params["pos1"] + ) + return CdnaDelInsClassification(**params) + elif classification_type == ClassificationType.CDNA_DELETION: + params["pos0"] = int(params["pos0"]) + params["pos1"] = ( + int(params["pos1"]) + if params["pos1"] is not None + else params["pos1"] + ) + return CdnaDeletionClassification(**params) + elif classification_type == ClassificationType.CDNA_INSERTION: + params["pos0"] = int(params["pos0"]) + params["pos1"] = ( + int(params["pos1"]) + if params["pos1"] is not None + else params["pos1"] + ) + return CdnaInsertionClassification(**params) + + def _genomic_classification( + self, token: HgvsToken, params: Dict + ) -> Optional[Classification]: + """Determine if hgvs token matches regex checks and return corresponding genomic + classification if a match is found. Only checks against 'simple' + duplication/deletions. + + :param token: hgvs token + :param params: Base fields for a classification. This will get mutated if a + match is found. + :return: genomic classification if hgvs token matches regex checks. Else, `None` + """ + for regex, _, classification_type in GENOMIC_REGEXPRS: + match = self._regex_match(token.change, regex) + + if match: + match_dict = match.groupdict() + params.update(match_dict) + + if classification_type == ClassificationType.GENOMIC_SUBSTITUTION: + params["pos"] = int(params["pos"]) + return GenomicSubstitutionClassification(**params) + elif classification_type == ClassificationType.GENOMIC_REFERENCE_AGREE: + params["pos"] = int(params["pos"]) + return GenomicReferenceAgreeClassification(**params) + elif classification_type == ClassificationType.GENOMIC_DELINS: + params["pos0"] = int(params["pos0"]) + params["pos1"] = ( + int(params["pos1"]) + if params["pos1"] is not None + else params["pos1"] + ) + return GenomicDelInsClassification(**params) + elif classification_type == ClassificationType.GENOMIC_INSERTION: + params["pos0"] = int(params["pos0"]) + params["pos1"] = ( + int(params["pos1"]) + if params["pos1"] is not None + else params["pos1"] + ) + return GenomicInsertionClassification(**params) + elif classification_type == ClassificationType.GENOMIC_DELETION: + params["pos0"] = int(params["pos0"]) + params["pos1"] = ( + int(params["pos1"]) + if params["pos1"] is not None + else params["pos1"] + ) + return GenomicDeletionClassification(**params) + elif classification_type == ClassificationType.GENOMIC_DUPLICATION: + params["pos0"] = int(params["pos0"]) + params["pos1"] = ( + int(params["pos1"]) + if params["pos1"] is not None + else params["pos1"] + ) + return GenomicDuplicationClassification(**params) + + def _genomic_ambiguous_classification( + self, token: HgvsToken, params: Dict + ) -> Optional[Classification]: + """Determine if hgvs token matches regex checks and return corresponding genomic + ambiguous classification if a match is found. Only checks against ambiguous + duplication/deletions. + + :param token: hgvs token + :param params: Base fields for a classification. This will get mutated if a + match is found. + :return: genomic ambiguous classification if hgvs token matches regex checks. + Else, `None` + """ + if token.token.endswith("dup"): + return self._genomic_dup_ambiguous_classification(token, params) + elif token.token.endswith("del"): + return self._genomic_del_ambiguous_classification(token, params) + + return None + + @staticmethod + def _update_ambiguous_params(params: Dict, regex_type: AmbiguousRegexType) -> None: + """Mutates `params` to match correct types and gets associated ambiguous type + from fields in `params` + + :param params: Fields for a classification. This will get mutated. + :param regex_type: The kind of ambiguous regex that was used + """ + params["pos0"] = ( + int(params["pos0"]) if params["pos0"] != "?" else params["pos0"] + ) + + if "pos1" in params: + params["pos1"] = ( + int(params["pos1"]) if params["pos1"] != "?" else params["pos1"] + ) + else: + params["pos1"] = None + + params["pos2"] = ( + int(params["pos2"]) if params["pos2"] != "?" else params["pos2"] + ) + + if "pos3" in params: + params["pos3"] = ( + int(params["pos3"]) if params["pos3"] != "?" else params["pos3"] + ) + else: + params["pos3"] = None + + ambiguous_type = get_ambiguous_type( + params["pos0"], params["pos1"], params["pos2"], params["pos3"], regex_type + ) + if ambiguous_type: + params["ambiguous_type"] = ambiguous_type + + def _genomic_dup_ambiguous_classification( + self, token: HgvsToken, params: Dict + ) -> Optional[Classification]: + """Determine if hgvs token matches regex checks and return corresponding genomic + ambiguous duplication classification if a match is found. Only checks against + genomic ambiguous duplications. + + :param token: hgvs token + :param params: Base fields for a classification. This will get mutated if a + match is found. + :return: genomic ambiguous duplication classification if hgvs token matches + regex checks. Else, `None` + """ + for regex, _, classification_type, regex_type in GENOMIC_DUP_AMBIGUOUS_REGEXPRS: + match = regex.match(token.change) + + if match: + match_dict = match.groupdict() + params.update(match_dict) + + if ( + classification_type + == ClassificationType.GENOMIC_DUPLICATION_AMBIGUOUS + ): + self._update_ambiguous_params(params, regex_type) + + # If ambiguous type not in params, it means we don't support it yet + if "ambiguous_type" in params: + return GenomicDuplicationAmbiguousClassification(**params) + return None + + def _genomic_del_ambiguous_classification( + self, token: HgvsToken, params: Dict + ) -> Optional[Classification]: + """Determine if hgvs token matches regex checks and return corresponding genomic + ambiguous deletion classification if a match is found. Only checks against + genomic ambiguous deletion. + + :param token: hgvs token + :param params: Base fields for a classification. This will get mutated if a + match is found. + :return: genomic ambiguous deletion classification if hgvs token matches regex + checks. Else, `None` + """ + for regex, _, classification_type, regex_type in GENOMIC_DEL_AMBIGUOUS_REGEXPRS: + match = regex.match(token.change) + + if match: + match_dict = match.groupdict() + params.update(match_dict) + + if classification_type == ClassificationType.GENOMIC_DELETION_AMBIGUOUS: + self._update_ambiguous_params(params, regex_type) + + # If ambiguous type not in params, it means we don't support it yet + if "ambiguous_type" in params: + return GenomicDeletionAmbiguousClassification(**params) + return None diff --git a/variation/classifiers/protein_deletion_classifier.py b/variation/classifiers/protein_deletion_classifier.py new file mode 100644 index 0000000..402df0a --- /dev/null +++ b/variation/classifiers/protein_deletion_classifier.py @@ -0,0 +1,41 @@ +"""A module for the Protein Deletion Classifier.""" +from typing import List + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + Nomenclature, + ProteinDeletionClassification, +) +from variation.schemas.token_response_schema import Token, TokenType + + +class ProteinDeletionClassifier(Classifier): + """The Protein Deletion Classifier class.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the protein deletion classification. + + :return: List of list of tokens, where order matters, that represent a protein + deletion classification. + """ + return [[TokenType.GENE, TokenType.PROTEIN_DELETION]] + + def match(self, tokens: List[Token]) -> ProteinDeletionClassification: + """Return the protein deletion classification from a list of token matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + protein deletion classification + :return: protein deletion classification for the list of matched tokens + """ + gene_token, protein_del_token = tokens + + return ProteinDeletionClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=gene_token, + aa0=protein_del_token.aa0, + pos0=protein_del_token.pos0, + aa1=protein_del_token.aa1, + pos1=protein_del_token.pos1, + deleted_sequence=protein_del_token.deleted_sequence, + ) diff --git a/variation/classifiers/protein_delins_classifier.py b/variation/classifiers/protein_delins_classifier.py new file mode 100644 index 0000000..2574288 --- /dev/null +++ b/variation/classifiers/protein_delins_classifier.py @@ -0,0 +1,41 @@ +"""A module for the Protein DelIns Classifier.""" +from typing import List + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + Nomenclature, + ProteinDelInsClassification, +) +from variation.schemas.token_response_schema import Token, TokenType + + +class ProteinDelInsClassifier(Classifier): + """The Protein DelIns Classifier class.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the protein delins classification. + + :return: List of list of tokens, where order matters, that represent a protein + delins classification. + """ + return [[TokenType.GENE, TokenType.PROTEIN_DELINS]] + + def match(self, tokens: List[Token]) -> ProteinDelInsClassification: + """Return the protein delins classification from a list of token matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + protein delins classification + :return: protein delins classification for the list of matched tokens + """ + gene_token, protein_delins_token = tokens + + return ProteinDelInsClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=gene_token, + aa0=protein_delins_token.aa0, + pos0=protein_delins_token.pos0, + aa1=protein_delins_token.aa1, + pos1=protein_delins_token.pos1, + inserted_sequence=protein_delins_token.inserted_sequence, + ) diff --git a/variation/classifiers/protein_insertion_classifier.py b/variation/classifiers/protein_insertion_classifier.py new file mode 100644 index 0000000..e0f6cb4 --- /dev/null +++ b/variation/classifiers/protein_insertion_classifier.py @@ -0,0 +1,41 @@ +"""A module for the Protein Insertion Classifier.""" +from typing import List + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + Nomenclature, + ProteinInsertionClassification, +) +from variation.schemas.token_response_schema import Token, TokenType + + +class ProteinInsertionClassifier(Classifier): + """The Protein Insertion Classifier class.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the protein insertion classification. + + :return: List of list of tokens, where order matters, that represent a protein + insertion classification. + """ + return [[TokenType.GENE, TokenType.PROTEIN_INSERTION]] + + def match(self, tokens: List[Token]) -> ProteinInsertionClassification: + """Return the protein insertion classification from a list of token matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + protein insertion classification + :return: protein insertion classification for the list of matched tokens + """ + gene_token, protein_ins_token = tokens + + return ProteinInsertionClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=gene_token, + aa0=protein_ins_token.aa0, + pos0=protein_ins_token.pos0, + aa1=protein_ins_token.aa1, + pos1=protein_ins_token.pos1, + inserted_sequence=protein_ins_token.inserted_sequence, + ) diff --git a/variation/classifiers/protein_reference_agree.py b/variation/classifiers/protein_reference_agree.py new file mode 100644 index 0000000..b0c6cd9 --- /dev/null +++ b/variation/classifiers/protein_reference_agree.py @@ -0,0 +1,40 @@ +"""A module for the Reference Agree Classifier.""" +from typing import List + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + Nomenclature, + ProteinReferenceAgreeClassification, +) +from variation.schemas.token_response_schema import Token, TokenType + + +class ProteinReferenceAgreeClassifier(Classifier): + """The Reference Agree Classifier class.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the protein reference agree + classification. + + :return: List of list of tokens, where order matters, that represent a protein + reference agree classification. + """ + return [[TokenType.GENE, TokenType.PROTEIN_REFERENCE_AGREE]] + + def match(self, tokens: List[Token]) -> ProteinReferenceAgreeClassification: + """Return the protein reference agree classification from a list of token + matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + protein reference agree classification + :return: protein reference agree classification for the list of matched tokens + """ + gene_token, protein_ref_agree_token = tokens + + return ProteinReferenceAgreeClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=gene_token, + pos=protein_ref_agree_token.pos, + ref=protein_ref_agree_token.ref, + ) diff --git a/variation/classifiers/protein_stop_gain_classifier.py b/variation/classifiers/protein_stop_gain_classifier.py new file mode 100644 index 0000000..1c95986 --- /dev/null +++ b/variation/classifiers/protein_stop_gain_classifier.py @@ -0,0 +1,39 @@ +"""A module for the Protein Stop Gain Classifier.""" +from typing import List + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + Nomenclature, + ProteinStopGainClassification, +) +from variation.schemas.token_response_schema import Token, TokenType + + +class ProteinStopGainClassifier(Classifier): + """The Protein Stop Gain Classifier class.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the protein stop gain classification. + + :return: List of list of tokens, where order matters, that represent a protein + stop gain classification. + """ + return [[TokenType.GENE, TokenType.PROTEIN_STOP_GAIN]] + + def match(self, tokens: List[Token]) -> ProteinStopGainClassification: + """Return the protein stop gain classification from a list of token matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + protein stop gain classification + :return: protein stop gain classification for the list of matched tokens + """ + gene_token, protein_stop_gain_token = tokens + + return ProteinStopGainClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=gene_token, + pos=protein_stop_gain_token.pos, + ref=protein_stop_gain_token.ref, + alt=protein_stop_gain_token.alt, + ) diff --git a/variation/classifiers/protein_substitution_classifier.py b/variation/classifiers/protein_substitution_classifier.py new file mode 100644 index 0000000..7181f34 --- /dev/null +++ b/variation/classifiers/protein_substitution_classifier.py @@ -0,0 +1,40 @@ +"""A module for the Protein Substitution Classifier.""" +from typing import List + +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import ( + Nomenclature, + ProteinSubstitutionClassification, +) +from variation.schemas.token_response_schema import Token, TokenType + + +class ProteinSubstitutionClassifier(Classifier): + """The ProteinSubstitution Classifier class.""" + + def exact_match_candidates(self) -> List[List[TokenType]]: + """Return the token match candidates for the protein substitution + classification. + + :return: List of list of tokens, where order matters, that represent a protein + substitution classification. + """ + return [[TokenType.GENE, TokenType.PROTEIN_SUBSTITUTION]] + + def match(self, tokens: List[Token]) -> ProteinSubstitutionClassification: + """Return the protein substitution classification from a list of token matches. + + :param tokens: List of ordered tokens that are exact match candidates for a + protein substitution classification + :return: protein substitution classification for the list of matched tokens + """ + gene_token, protein_sub_token = tokens + + return ProteinSubstitutionClassification( + matching_tokens=tokens, + nomenclature=Nomenclature.FREE_TEXT, + gene_token=gene_token, + pos=protein_sub_token.pos, + ref=protein_sub_token.ref, + alt=protein_sub_token.alt, + ) diff --git a/variation/classify.py b/variation/classify.py new file mode 100644 index 0000000..cd2db3a --- /dev/null +++ b/variation/classify.py @@ -0,0 +1,86 @@ +"""Module for classification.""" +from typing import List, Optional + +from variation.classifiers import ( + AmplificationClassifier, + CdnaDeletionClassifier, + CdnaDelInsClassifier, + CdnaInsertionClassifier, + CdnaReferenceAgreeClassifier, + CdnaSubstitutionClassifier, + GenomicDeletionAmbiguousClassifier, + GenomicDeletionClassifier, + GenomicDelInsClassifier, + GenomicDuplicationAmbiguousClassifier, + GenomicDuplicationClassifier, + GenomicInsertionClassifier, + GenomicReferenceAgreeClassifier, + GenomicSubstitutionClassifier, + GnomadVcfClassifier, + HgvsClassifier, + ProteinDeletionClassifier, + ProteinDelInsClassifier, + ProteinInsertionClassifier, + ProteinReferenceAgreeClassifier, + ProteinStopGainClassifier, + ProteinSubstitutionClassifier, +) +from variation.classifiers.classifier import Classifier +from variation.schemas.classification_response_schema import Classification +from variation.schemas.token_response_schema import Token, TokenType + + +class Classify: + """The classify class.""" + + hgvs_classifier = HgvsClassifier() + gnomad_vcf_classifier = GnomadVcfClassifier() + classifiers: List[Classifier] = [ + ProteinDelInsClassifier(), + ProteinSubstitutionClassifier(), + ProteinStopGainClassifier(), + ProteinReferenceAgreeClassifier(), + CdnaSubstitutionClassifier(), + GenomicSubstitutionClassifier(), + CdnaReferenceAgreeClassifier(), + GenomicReferenceAgreeClassifier(), + ProteinDelInsClassifier(), + CdnaDelInsClassifier(), + GenomicDelInsClassifier(), + ProteinDeletionClassifier(), + CdnaDeletionClassifier(), + GenomicDeletionClassifier(), + GenomicDeletionAmbiguousClassifier(), + ProteinInsertionClassifier(), + CdnaInsertionClassifier(), + GenomicInsertionClassifier(), + GenomicDuplicationClassifier(), + GenomicDuplicationAmbiguousClassifier(), + AmplificationClassifier(), + ] + + def perform(self, tokens: List[Token]) -> Optional[Classification]: + """Classify a list of tokens. + + :param tokens: List of tokens found + :return: Classification for a list of tokens if found + """ + classification = None + + if len(tokens) == 1: + token_type = tokens[0].token_type + + if token_type == TokenType.HGVS: + classification = self.hgvs_classifier.match(tokens[0]) + elif token_type == TokenType.GNOMAD_VCF: + classification = self.gnomad_vcf_classifier.match(tokens[0]) + else: + for classifier in self.classifiers: + # We only do EXACT match candidates + can_classify = classifier.can_classify(tokens) + if can_classify: + classification = classifier.match(tokens) + if classification: + break + + return classification diff --git a/variation/hgvs_dup_del_mode.py b/variation/hgvs_dup_del_mode.py new file mode 100644 index 0000000..c7df9e1 --- /dev/null +++ b/variation/hgvs_dup_del_mode.py @@ -0,0 +1,223 @@ +"""Module for hgvs_dup_del_mode in normalize endpoint.""" +from typing import Dict, List, Optional, Union + +from cool_seq_tool.handlers import SeqRepoAccess +from cool_seq_tool.schemas import ResidueMode +from ga4gh.core import ga4gh_identify +from ga4gh.vrs import models, normalize + +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.token_response_schema import AMBIGUOUS_REGIONS, AltType + +# Define deletion alt types +DELS = {AltType.DELETION_AMBIGUOUS, AltType.DELETION} + + +class HGVSDupDelMode: + """Class for handling how to interpret HGVS duplications and deletions.""" + + def __init__(self, seqrepo_access: SeqRepoAccess) -> None: + """Initialize HGVS Dup Del Mode. + + :param SeqRepoAccess seqrepo_access: Access to seqrepo + """ + self.seqrepo_access = seqrepo_access + + def default_mode( + self, + alt_type: Union[ + AltType.DELETION, + AltType.DELETION_AMBIGUOUS, + AltType.DUPLICATION, + AltType.DUPLICATION_AMBIGUOUS, + ], + location: Dict, + vrs_seq_loc_ac: str, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + alt: Optional[str] = None, + ) -> Optional[Dict]: + """Use default characteristics to return a variation. + If baseline_copies not provided and endpoints are ambiguous - copy_number_change + if copy_change not provided: + copy_change - `efo:0030067` (loss) if del, `efo:0030070` (gain) if dup + elif baseline_copies provided: copy_number_count + copies are baseline + 1 for dup, baseline - 1 for del + else + allele + + :param alt_type: The type of alteration + :param location: Sequence Location object + :param vrs_seq_loc_ac: Accession used in VRS Sequence Location + :param baseline_copies: Baseline copies for Copy Number Count variation + :param copy_change: copy change for Copy Number Change Variation + :param alt: Alteration + :return: VRS Variation object represented as a dict + """ + variation = None + if not baseline_copies and alt_type in AMBIGUOUS_REGIONS: + variation = self.copy_number_change_mode(alt_type, location, copy_change) + elif baseline_copies: + variation = self.copy_number_count_mode(alt_type, location, baseline_copies) + else: + variation = self.allele_mode(location, alt_type, vrs_seq_loc_ac, alt) + return variation + + def copy_number_count_mode( + self, + alt_type: Union[ + AltType.DELETION, + AltType.DELETION_AMBIGUOUS, + AltType.DUPLICATION, + AltType.DUPLICATION_AMBIGUOUS, + ], + location: Dict, + baseline_copies: int, + ) -> Dict: + """Return a VRS Copy Number Variation. + + :param alt_type: The type of alteration + :param location: VRS SequenceLocation + :param baseline_copies: Baseline copies number + :return: VRS Copy Number object represented as a dict + """ + copies = baseline_copies - 1 if alt_type in DELS else baseline_copies + 1 + seq_loc = models.SequenceLocation(**location) + seq_loc.id = ga4gh_identify(seq_loc) + cn = models.CopyNumberCount(copies=copies, location=seq_loc) + cn.id = ga4gh_identify(cn) + return cn.model_dump(exclude_none=True) + + def copy_number_change_mode( + self, + alt_type: Union[ + AltType.DELETION, + AltType.DELETION_AMBIGUOUS, + AltType.DUPLICATION, + AltType.DUPLICATION_AMBIGUOUS, + ], + location: Dict, + copy_change: Optional[models.CopyChange] = None, + ) -> Dict: + """Return copy number change variation + + :param alt_type: The type of alteration + :param location: VRS SequenceLocation + :param copy_change: The copy change + :return: Copy Number Change variation as a dict + """ + if not copy_change: + copy_change = ( + models.CopyChange.EFO_0030067 + if alt_type in DELS + else models.CopyChange.EFO_0030070 + ) + + seq_loc = models.SequenceLocation(**location) + seq_loc.id = ga4gh_identify(seq_loc) + cx = models.CopyNumberChange(location=seq_loc, copyChange=copy_change) + cx.id = ga4gh_identify(cx) + return cx.model_dump(exclude_none=True) + + def allele_mode( + self, + location: Dict, + alt_type: AltType, + vrs_seq_loc_ac: str, + alt: str, + ) -> Optional[Dict]: + """Return a VRS Allele with a normalized LiteralSequenceExpression or + ReferenceLengthExpression. + + :param location: VRS Location + :param alt_type: Alteration type + :param vrs_seq_loc_ac: Accession used in VRS Sequence Location + :param alt: Alteration + :return: VRS Allele object represented as a dict + """ + if alt_type in AMBIGUOUS_REGIONS: + return None + + if alt_type == AltType.DUPLICATION: + ref, _ = self.seqrepo_access.get_reference_sequence( + vrs_seq_loc_ac, + start=location["start"], + end=location["end"], + residue_mode=ResidueMode.INTER_RESIDUE, + ) + + if ref: + state = ref + ref + else: + return None + else: + state = alt or "" + + allele = models.Allele( + location=models.SequenceLocation(**location), + state=models.LiteralSequenceExpression(sequence=state), + ) + + try: + allele = normalize(allele, self.seqrepo_access) + except (KeyError, AttributeError): + return None + else: + allele.location.id = ga4gh_identify(allele.location) + allele.id = ga4gh_identify(allele) + return allele.model_dump(exclude_none=True) + + def interpret_variation( + self, + alt_type: AltType, + location: Dict, + errors: List, + hgvs_dup_del_mode: HGVSDupDelModeOption, + vrs_seq_loc_ac: str, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + alt: Optional[str] = None, + ) -> Dict: + """Interpret variation using HGVSDupDelMode + + :param alt_type: Alteration type + :param location: VRS Location object + :param errors: List of errors + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param vrs_seq_loc_ac: Accession used in VRS Sequence Location + :param baseline_copies: Baseline copies number + :param copy_change: The copy change + :param alt: The alteration + :return: VRS Variation object + """ + variation = None + if hgvs_dup_del_mode == HGVSDupDelModeOption.DEFAULT: + variation = self.default_mode( + alt_type, + location, + vrs_seq_loc_ac, + baseline_copies=baseline_copies, + copy_change=copy_change, + alt=alt, + ) + elif hgvs_dup_del_mode == HGVSDupDelModeOption.ALLELE: + variation = self.allele_mode(location, alt_type, vrs_seq_loc_ac, alt) + elif hgvs_dup_del_mode == HGVSDupDelModeOption.COPY_NUMBER_COUNT: + if baseline_copies: + variation = self.copy_number_count_mode( + alt_type, location, baseline_copies + ) + else: + errors.append( + "`baseline_copies` must be provided for Copy Number Count Variation" + ) + elif hgvs_dup_del_mode == HGVSDupDelModeOption.COPY_NUMBER_CHANGE: + variation = self.copy_number_change_mode( + alt_type, location, copy_change=copy_change + ) + + if not variation: + errors.append("Unable to get VRS Variation") + + return variation diff --git a/variation/main.py b/variation/main.py new file mode 100644 index 0000000..ee0b7c4 --- /dev/null +++ b/variation/main.py @@ -0,0 +1,770 @@ +"""Main application for FastAPI.""" +import traceback +from datetime import datetime +from enum import Enum +from typing import List, Optional, Union +from urllib.parse import unquote + +import pkg_resources +from bioutils.exceptions import BioutilsError +from cool_seq_tool.schemas import Assembly, ResidueMode +from fastapi import FastAPI, Query +from ga4gh.vrs import models +from hgvs.exceptions import HGVSError +from pydantic import ValidationError + +from variation import logger +from variation.query import QueryHandler +from variation.schemas import NormalizeService, ServiceMeta, ToVRSService +from variation.schemas.copy_number_schema import ( + AmplificationToCxVarService, + ParsedToCnVarQuery, + ParsedToCnVarService, + ParsedToCxVarQuery, + ParsedToCxVarService, +) +from variation.schemas.hgvs_to_copy_number_schema import ( + HgvsToCopyNumberChangeService, + HgvsToCopyNumberCountService, +) +from variation.schemas.normalize_response_schema import ( + HGVSDupDelModeOption, + TranslateIdentifierService, +) +from variation.schemas.service_schema import ( + ToCdnaService, + ToGenomicService, +) +from variation.schemas.vrs_python_translator_schema import ( + TranslateFromFormat, + TranslateFromQuery, + TranslateFromService, + TranslateToHGVSQuery, + TranslateToQuery, + TranslateToService, + VrsPythonMeta, +) +from variation.version import __version__ +from fastapi.middleware.cors import CORSMiddleware + + +class Tag(Enum): + """Define tag names for endpoints""" + + MAIN = "Main" + SEQREPO = "SeqRepo" + TO_PROTEIN_VARIATION = "To Protein Variation" + VRS_PYTHON = "VRS-Python" + TO_COPY_NUMBER_VARIATION = "To Copy Number Variation" + ALIGNMENT_MAPPER = "Alignment Mapper" + + +query_handler = QueryHandler() + + +app = FastAPI( + title="The VICC Variation Normalizer", + description="Services and guidelines for normalizing variations.", + version=__version__, + contact={ + "name": "Alex H. Wagner", + "email": "Alex.Wagner@nationwidechildrens.org", + "url": "https://www.nationwidechildrens.org/specialties/institute-for-genomic-medicine/research-labs/wagner-lab", # noqa: E501 + }, + license={ + "name": "MIT", + "url": "https://github.com/cancervariants/variation-normalization/blob/main/LICENSE", + }, + docs_url="/variation", + openapi_url="/variation/openapi.json", + swagger_ui_parameters={"tryItOutEnabled": True}, +) + +translate_summary = ( + "Translate a HGVS, gnomAD VCF and Free Text descriptions to VRS" " variation(s)." +) +translate_description = ( + "Translate a human readable variation description to " + "VRS variation(s)." + " Performs fully-justified allele normalization. " + " Does not do any liftover operations or make any inferences " + "about the query." +) +translate_response_description = "A response to a validly-formed query." +q_description = "HGVS, gnomAD VCF or Free Text description on GRCh37 or GRCh38 assembly" + +''' +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) +''' + +@app.get( + "/variation/to_vrs", + summary=translate_summary, + response_description=translate_response_description, + response_model=ToVRSService, + response_model_exclude_none=True, + description=translate_description, + tags=[Tag.MAIN], +) +async def to_vrs( + q: str = Query(..., description=q_description), +) -> ToVRSService: + """Translate a HGVS, gnomAD VCF and Free Text descriptions to VRS variation(s). + Performs fully-justified allele normalization. Does not do any liftover operations + or make any inferences about the query. + + :param q: HGVS, gnomAD VCF or Free Text description on GRCh37 or GRCh38 assembly + :return: ToVRSService model for variation + """ + resp = await query_handler.to_vrs_handler.to_vrs(unquote(q)) + return resp + + +normalize_summary = ( + "Normalizes and translates a HGVS, gnomAD VCF or Free Text description on GRCh37 " + "or GRCh38 assembly to a single VRS Variation." +) +normalize_response_description = "A response to a validly-formed query." +normalize_description = ( + "Normalizes and translates a human readable variation description to a single VRS " + "Variation. Performs fully-justified allele normalization. Will liftover to GRCh38 " + "and aligns to a priority transcript. Will make inferences about the query." +) +q_description = "HGVS, gnomAD VCF or Free Text description on GRCh37 or GRCh38 assembly" +hgvs_dup_del_mode_decsr = ( + "This parameter determines how to interpret HGVS dup/del expressions in VRS." +) + + +@app.get( + "/variation/normalize", + summary=normalize_summary, + response_description=normalize_response_description, + response_model=NormalizeService, + response_model_exclude_none=True, + description=normalize_description, + tags=[Tag.MAIN], +) +async def normalize( + q: str = Query(..., description=q_description), + hgvs_dup_del_mode: Optional[HGVSDupDelModeOption] = Query( + HGVSDupDelModeOption.DEFAULT, description=hgvs_dup_del_mode_decsr + ), + baseline_copies: Optional[int] = Query( + None, + description="Baseline copies for HGVS duplications and deletions represented as Copy Number Count Variation", # noqa: E501 + ), + copy_change: Optional[models.CopyChange] = Query( + None, + description="The copy change for HGVS duplications and deletions represented as Copy Number Change Variation.", # noqa: E501 + ), +) -> NormalizeService: + """Normalize and translate a HGVS, gnomAD VCF or Free Text description on GRCh37 + or GRCh38 assembly to a single VRS Variation. Performs fully-justified allele + normalization. Will liftover to GRCh38 and aligns to a priority transcript. Will + make inferences about the query. + + :param q: HGVS, gnomAD VCF or Free Text description on GRCh37 or GRCh38 assembly + :param hgvs_dup_del_mode: This parameter determines how to interpret HGVS dup/del + expressions in VRS. + :param baseline_copies: Baseline copies for HGVS duplications and deletions. + Required when `hgvs_dup_del_mode` is set to `copy_number_count`. + :param copy_change: The copy change for HGVS duplications and deletions represented + as Copy Number Change Variation. If not set, will use default `copy_change` for + query. + :return: NormalizeService for variation + """ + normalize_resp = await query_handler.normalize_handler.normalize( + unquote(q), + hgvs_dup_del_mode=hgvs_dup_del_mode, + baseline_copies=baseline_copies, + copy_change=copy_change, + ) + return normalize_resp + + +@app.get( + "/variation/translate_identifier", + summary="Given an identifier, use SeqRepo to return a list of aliases.", + response_description="A response to a validly-formed query.", + response_model=TranslateIdentifierService, + response_model_exclude_none=True, + description="Return list of aliases for an identifier", + tags=[Tag.SEQREPO], +) +def translate_identifier( + identifier: str = Query(..., description="The identifier to find aliases for"), + target_namespaces: Optional[str] = Query( + None, description="The namespaces of the aliases, separated by commas" + ), +) -> TranslateIdentifierService: + """Return data containing identifier aliases. + + :param str identifier: The identifier to find aliases for + :param Optional[str] target_namespaces: The namespaces of the aliases, + separated by commas + :return: TranslateIdentifierService data + """ + aliases = [] + warnings = [] + identifier = identifier.strip() + try: + aliases = query_handler.seqrepo_access.sr.translate_identifier( + identifier, target_namespaces=target_namespaces + ) + except KeyError: + warnings = [f"Identifier, {identifier}, does not exist in SeqRepo"] + except Exception as e: + warnings = [f"SeqRepo could not translate identifier, {identifier}:" f" {e}"] + + return TranslateIdentifierService( + identifier_query=identifier, + warnings=warnings, + aliases=aliases, + service_meta_=ServiceMeta( + version=__version__, response_datetime=datetime.now() + ), + ) + + +from_fmt_descr = ( + "Format of input variation to translate. Must be one of `beacon`, " + "`gnomad`, `hgvs`, or `spdi`" +) + + +@app.get( + "/variation/translate_from", + summary="Given variation as beacon, gnomad, hgvs or spdi representation, " + "return VRS Allele object using vrs-python's translator class", + response_description="A response to a validly-formed query.", + response_model_exclude_none=True, + description="Return VRS Allele object", + response_model=TranslateFromService, + tags=[Tag.VRS_PYTHON], +) +def vrs_python_translate_from( + variation: str = Query( + ..., + description="Variation to translate to VRS object." + " Must be represented as either beacon, " + "gnomad, hgvs, or spdi.", + ), + fmt: Optional[TranslateFromFormat] = Query(None, description=from_fmt_descr), +) -> TranslateFromService: + """Given variation query, return VRS Allele object using vrs-python"s translator + class + + :param str variation: Variation to translate to VRS object. Must be represented + as either beacon, gnomad, hgvs, or spdi + :param Optional[TranslateFromFormat] fmt: Format of variation. If not supplied, + vrs-python will infer its format. + :return: TranslateFromService containing VRS Allele object + """ + variation_query = unquote(variation.strip()) + warnings = list() + vrs_variation = None + try: + resp = query_handler.vrs_python_tlr.translate_from(variation_query, fmt) + except ( + KeyError, + ValueError, + ValidationError, + ) as e: + warnings.append(f"vrs-python translator raised {type(e).__name__}: {e}") + except HGVSError as e: + warnings.append(f"hgvs raised {type(e).__name__}: {e}") + except BioutilsError as e: + warnings.append(f"bioutils raised {type(e).__name__}: {e}") + else: + vrs_variation = resp.model_dump(exclude_none=True) + + return TranslateFromService( + query=TranslateFromQuery(variation=variation_query, fmt=fmt), + warnings=warnings, + variation=vrs_variation, + service_meta_=ServiceMeta( + version=__version__, response_datetime=datetime.now() + ), + vrs_python_meta_=VrsPythonMeta( + version=pkg_resources.get_distribution("ga4gh.vrs").version + ), + ) + + +g_to_p_summary = "Given GRCh38 gnomAD VCF, return VRS Variation object on MANE protein coordinate." # noqa: E501 +g_to_p_response_description = "A response to a validly-formed query." +g_to_p_description = ( + "Return VRS Variation object on protein coordinate for variation provided." +) +q_description = "GRCh38 gnomAD VCF (chr-pos-ref-alt) to normalize to MANE protein variation." # noqa: E501 + + +hgvs_dup_del_mode_decsr = ( + "This parameter determines how to interpret HGVS dup/del expressions in VRS." +) + + +def _get_allele( + request_body: Union[TranslateToQuery, TranslateToHGVSQuery], warnings: List +) -> Optional[models.Allele]: + """Return VRS allele object from request body. `warnings` will get updated if + exceptions are raised + + :param Union[TranslateToQuery, TranslateToHGVSQuery] request_body: Request body + containing `variation` + :param List warnings: List of warnings + :return: VRS Allele object if valid + """ + allele = None + try: + allele = models.Allele(**request_body["variation"]) + except ValidationError as e: + warnings.append(f"`allele` is not a valid VRS Allele: {e}") + return allele + + +@app.post( + "/variation/translate_to", + summary="Given VRS Allele object as a dict, return variation expressed as " + "queried format using vrs-python's translator class", + response_description="A response to a validly-formed query.", + description="Return variation in queried format representation. " + "Request body must contain `variation` and `fmt`. `variation` is" + " a VRS Allele object represented as a dict. `fmt` must be either" + " `spdi` or `hgvs`", + response_model=TranslateToService, + response_model_exclude_none=True, + tags=[Tag.VRS_PYTHON], +) +async def vrs_python_translate_to(request_body: TranslateToQuery) -> TranslateToService: + """Given VRS Allele object as a dict, return variation expressed as queried + format using vrs-python's translator class + + :param TranslateToQuery request_body: Request body. `variation` is a VRS Allele + object represented as a dict. `fmt` must be either `spdi` or `hgvs` + :return: TranslateToService containing variation represented as fmt representation + if valid VRS Allele, and warnings if found + """ + query = request_body + request_body = request_body.model_dump(by_alias=True) + warnings = list() + + allele = _get_allele(request_body, warnings) + + variations = list() + if allele: + try: + variations = query_handler.vrs_python_tlr.translate_to( + allele, request_body["fmt"] + ) + except ValueError as e: + warnings.append(f"vrs-python translator raised {type(e).__name__}: {e}") + + return TranslateToService( + query=query, + warnings=warnings, + variations=variations, + service_meta_=ServiceMeta( + version=__version__, response_datetime=datetime.now() + ), + vrs_python_meta_=VrsPythonMeta( + version=pkg_resources.get_distribution("ga4gh.vrs").version + ), + ) + + +to_hgvs_descr = ( + "Return variation as HGVS expressions. Request body must" + " contain `variation`, a VRS Allele object represented as a dict. " + "Can include optional parameter `namespace`. If `namespace` is not" + " None, returns HGVS strings for the specified namespace. If " + "`namespace` is None, returns HGVS strings for all alias translations." +) + + +@app.post( + "/variation/vrs_allele_to_hgvs", + summary="Given VRS Allele object as a dict, return HGVS expression(s)", + response_description="A response to a validly-formed query.", + description=to_hgvs_descr, + response_model=TranslateToService, + response_model_exclude_none=True, + tags=[Tag.VRS_PYTHON], +) +async def vrs_python_to_hgvs(request_body: TranslateToHGVSQuery) -> TranslateToService: + """Given VRS Allele object as a dict, return variation expressed as HGVS + expression(s) + + :param TranslateToHGVSQuery request_body: Request body. `variation` is a VRS Allele + object represented as a dict. Can provide optional parameter `namespace`. + If `namespace` is not None, returns HGVS strings for the specified namespace. + If `namespace` is None, returns HGVS strings for all alias translations. + :return: TranslateToService containing variation represented as HGVS representation + if valid VRS Allele, and warnings if found + """ + query = request_body + request_body = request_body.model_dump(by_alias=True) + warnings = list() + + allele = _get_allele(request_body, warnings) + + variations = list() + if allele: + try: + variations = query_handler.vrs_python_tlr._to_hgvs( + allele, namespace=request_body.get("namespace") or "refseq" + ) + except ValueError as e: + warnings.append(f"vrs-python translator raised {type(e).__name__}: {e}") + + return TranslateToService( + query=query, + warnings=warnings, + variations=variations, + service_meta_=ServiceMeta( + version=__version__, response_datetime=datetime.now() + ), + vrs_python_meta_=VrsPythonMeta( + version=pkg_resources.get_distribution("ga4gh.vrs").version + ), + ) + + +@app.get( + "/variation/hgvs_to_copy_number_count", + summary="Given HGVS expression, return VRS Copy Number Count Variation", + response_description="A response to a validly-formed query.", + description="Return VRS Copy Number Count Variation", + response_model=HgvsToCopyNumberCountService, + response_model_exclude_none=True, + tags=[Tag.TO_COPY_NUMBER_VARIATION], +) +async def hgvs_to_copy_number_count( + hgvs_expr: str = Query(..., description="Variation query"), + baseline_copies: Optional[int] = Query( + ..., description="Baseline copies for duplication" + ), + do_liftover: bool = Query( + False, description="Whether or not to liftover " "to GRCh38 assembly." + ), +) -> HgvsToCopyNumberCountService: + """Given hgvs expression, return copy number count variation + + :param hgvs_expr: HGVS expression + :param baseline_copies: Baseline copies number + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: HgvsToCopyNumberCountService + """ + resp = await query_handler.to_copy_number_handler.hgvs_to_copy_number_count( + unquote(hgvs_expr.strip()), + baseline_copies, + do_liftover, + ) + return resp + + +@app.get( + "/variation/hgvs_to_copy_number_change", + summary="Given HGVS expression, return VRS Copy Number Change Variation", + response_description="A response to a validly-formed query.", + description="Return VRS Copy Number Change Variation", + response_model=HgvsToCopyNumberChangeService, + response_model_exclude_none=True, + tags=[Tag.TO_COPY_NUMBER_VARIATION], +) +async def hgvs_to_copy_number_change( + hgvs_expr: str = Query(..., description="Variation query"), + copy_change: models.CopyChange = Query(..., description="The copy change"), + do_liftover: bool = Query( + False, description="Whether or not to liftover " "to GRCh38 assembly." + ), +) -> HgvsToCopyNumberChangeService: + """Given hgvs expression, return copy number change variation + + :param hgvs_expr: HGVS expression + :param copy_change: copy change + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: HgvsToCopyNumberChangeService + """ + resp = await query_handler.to_copy_number_handler.hgvs_to_copy_number_change( + unquote(hgvs_expr.strip()), + copy_change, + do_liftover, + ) + return resp + + +@app.post( + "/variation/parsed_to_cn_var", + summary="Given parsed genomic components, return VRS Copy Number Count " + "Variation", + response_description="A response to a validly-formed query.", + description="Return VRS Copy Number Count Variation", + response_model=ParsedToCnVarService, + response_model_exclude_none=True, + tags=[Tag.TO_COPY_NUMBER_VARIATION], +) +def parsed_to_cn_var(request_body: ParsedToCnVarQuery) -> ParsedToCnVarService: + """Given parsed genomic components, return Copy Number Count Variation. + + :param request_body: Request body + :return: ParsedToCnVarService containing Copy Number Count variation and list of + warnings + """ + try: + resp = query_handler.to_copy_number_handler.parsed_to_copy_number(request_body) + except Exception: + traceback_resp = traceback.format_exc().splitlines() + logger.exception(traceback_resp) + + return ParsedToCnVarService( + copy_number_count=None, + warnings=["Unhandled exception. See logs for more details."], + service_meta_=ServiceMeta( + version=__version__, response_datetime=datetime.now() + ), + ) + else: + return resp + + +@app.post( + "/variation/parsed_to_cx_var", + summary="Given parsed genomic components, return VRS Copy Number Change " + "Variation", + response_description="A response to a validly-formed query.", + description="Return VRS Copy Number Change Variation", + response_model=ParsedToCxVarService, + response_model_exclude_none=True, + tags=[Tag.TO_COPY_NUMBER_VARIATION], +) +def parsed_to_cx_var(request_body: ParsedToCxVarQuery) -> ParsedToCxVarService: + """Given parsed genomic components, return Copy Number Change Variation + + :param request_body: Request body + :return: ParsedToCxVarService containing Copy Number Change variation and list of + warnings + """ + try: + resp = query_handler.to_copy_number_handler.parsed_to_copy_number(request_body) + except Exception: + traceback_resp = traceback.format_exc().splitlines() + logger.exception(traceback_resp) + + return ParsedToCxVarService( + copy_number_count=None, + warnings=["Unhandled exception. See logs for more details."], + service_meta_=ServiceMeta( + version=__version__, response_datetime=datetime.now() + ), + ) + else: + return resp + + +amplification_to_cx_var_descr = ( + "Translate amplification to VRS Copy Number Change " + "Variation. If `sequence`, `start`, and `end` are " + "all provided, will return a SequenceLocation with " + "those properties. Else, gene-normalizer will be " + "used to retrieve the SequenceLocation." +) + + +@app.get( + "/variation/amplification_to_cx_var", + summary="Given amplification query, return VRS Copy Number Change Variation", + response_description="A response to a validly-formed query.", + description=amplification_to_cx_var_descr, + response_model=AmplificationToCxVarService, + response_model_exclude_none=True, + tags=[Tag.TO_COPY_NUMBER_VARIATION], +) +def amplification_to_cx_var( + gene: str = Query(..., description="Gene query"), + sequence_id: Optional[str] = Query(None, description="Sequence identifier"), + start: Optional[int] = Query( + None, description="Start position as residue coordinate" + ), + end: Optional[int] = Query(None, description="End position as residue coordinate"), +) -> AmplificationToCxVarService: + """Given amplification query, return Copy Number Change Variation + Parameter priority: + 1. sequence, start, end (must provide ALL) + 2. use the gene-normalizer to get the SequenceLocation + + :param gene: Gene query + :param sequence_id: Sequence ID for the location. If set, must also provide `start` + and `end` + :param start: Start position as residue coordinate for the sequence location. + If set, must also provide `sequence` and `end` + :param end: End position as residue coordinate for the sequence location. If set, + must also provide `sequence` and `start` + :return: AmplificationToCxVarService containing Copy Number Change and + list of warnings + """ + resp = query_handler.to_copy_number_handler.amplification_to_cx_var( + gene=gene, + sequence_id=sequence_id, + start=start, + end=end, + ) + return resp + + +@app.get( + "/variation/alignment_mapper/p_to_c", + summary="Translate protein representation to cDNA representation", + response_description="A response to a validly-formed query.", + description="Given protein accession and positions, return associated cDNA " + "accession and positions to codon(s)", + response_model=ToCdnaService, + response_model_exclude_none=True, + tags=[Tag.ALIGNMENT_MAPPER], +) +async def p_to_c( + p_ac: str = Query(..., description="Protein RefSeq accession"), + p_start_pos: int = Query(..., description="Protein start position"), + p_end_pos: int = Query(..., description="Protein end position"), + residue_mode: ResidueMode = Query( + ResidueMode.RESIDUE, + description="Residue mode for `p_start_pos` and `p_end_pos`", + ), +) -> ToCdnaService: + """Translate protein representation to cDNA representation + + :param str p_ac: Protein RefSeq accession + :param int p_start_pos: Protein start position + :param int p_end_pos: Protein end position + :param ResidueMode residue_mode: Residue mode for `p_start_pos` and `p_end_pos`. + :return: ToCdnaService containing cDNA representation, warnings, and + service meta + """ + try: + c_data, w = await query_handler.alignment_mapper.p_to_c( + p_ac, p_start_pos, p_end_pos, residue_mode + ) + except Exception as e: + logger.error("Unhandled exception: %s", str(e)) + w = "Unhandled exception. See logs for more information." + c_data = None + return ToCdnaService( + c_data=c_data, + warnings=[w] if w else [], + service_meta=ServiceMeta(version=__version__, response_datetime=datetime.now()), + ) + + +@app.get( + "/variation/alignment_mapper/c_to_g", + summary="Translate cDNA representation to genomic representation", + response_description="A response to a validly-formed query.", + description="Given cDNA accession and positions for codon(s), return associated genomic" # noqa: E501 + " accession and positions for a given target genome assembly", + response_model=ToGenomicService, + response_model_exclude_none=True, + tags=[Tag.ALIGNMENT_MAPPER], +) +async def c_to_g( + c_ac: str = Query(..., description="cDNA RefSeq accession"), + c_start_pos: int = Query(..., description="cDNA start position for codon"), + c_end_pos: int = Query(..., description="cDNA end position for codon"), + cds_start: Optional[int] = Query( + None, description="CDS start site. If not provided, this will be computed." + ), + residue_mode: ResidueMode = Query( + ResidueMode.RESIDUE, + description="Residue mode for `c_start_pos` and `c_end_pos`", + ), + target_genome_assembly: Assembly = Query( + Assembly.GRCH38, description="Genomic assembly to map to" + ), +) -> ToGenomicService: + """Translate cDNA representation to genomic representation + + :param str c_ac: cDNA RefSeq accession + :param int c_start_pos: cDNA start position for codon + :param int c_end_pos: cDNA end position for codon + :param Optional[int] cds_start: CDS start site. If not provided, this will be + computed. + :param ResidueMode residue_mode: Residue mode for `c_start_pos` and `c_end_pos`. + :param Assembly target_genome_assembly: Genome assembly to get genomic data for + :return: ToGenomicService containing genomic representation, warnings, and + service meta + """ + try: + g_data, w = await query_handler.alignment_mapper.c_to_g( + c_ac, + c_start_pos, + c_end_pos, + cds_start=cds_start, + residue_mode=residue_mode, + target_genome_assembly=target_genome_assembly, + ) + except Exception as e: + logger.error("Unhandled exception: %s", str(e)) + w = "Unhandled exception. See logs for more information." + g_data = None + return ToGenomicService( + g_data=g_data, + warnings=[w] if w else [], + service_meta=ServiceMeta(version=__version__, response_datetime=datetime.now()), + ) + + +@app.get( + "/variation/alignment_mapper/p_to_g", + summary="Translate protein representation to genomic representation", + response_description="A response to a validly-formed query.", + description="Given protein accession and positions, return associated genomic " + "accession and positions for a given target genome assembly", + response_model=ToGenomicService, + response_model_exclude_none=True, + tags=[Tag.ALIGNMENT_MAPPER], +) +async def p_to_g( + p_ac: str = Query(..., description="Protein RefSeq accession"), + p_start_pos: int = Query(..., description="Protein start position"), + p_end_pos: int = Query(..., description="Protein end position"), + residue_mode: ResidueMode = Query( + ResidueMode.RESIDUE, + description="Residue mode for `p_start_pos` and `p_end_pos`", + ), + target_genome_assembly: Assembly = Query( + Assembly.GRCH38, description="Genomic assembly to map to" + ), +) -> ToGenomicService: + """Translate protein representation to genomic representation + + :param str p_ac: Protein RefSeq accession + :param int p_start_pos: Protein start position + :param int p_end_pos: Protein end position + :param ResidueMode residue_mode: Residue mode for `p_start_pos` and `p_end_pos`. + :param Assembly target_genome_assembly: Genome assembly to get genomic data for + :return: ToGenomicService containing genomic representation, warnings, and + service meta + """ + try: + g_data, w = await query_handler.alignment_mapper.p_to_g( + p_ac, + p_start_pos, + p_end_pos, + residue_mode=residue_mode, + target_genome_assembly=target_genome_assembly, + ) + except Exception as e: + logger.error("Unhandled exception: %s", str(e)) + w = "Unhandled exception. See logs for more information." + g_data = None + return ToGenomicService( + g_data=g_data, + warnings=[w] if w else [], + service_meta=ServiceMeta(version=__version__, response_datetime=datetime.now()), + ) diff --git a/variation/normalize.py b/variation/normalize.py new file mode 100644 index 0000000..5e35089 --- /dev/null +++ b/variation/normalize.py @@ -0,0 +1,284 @@ +"""Module for Variation Normalization.""" +from datetime import datetime +from typing import List, Optional, Tuple +from urllib.parse import unquote + +from cool_seq_tool.handlers import SeqRepoAccess +from cool_seq_tool.sources import UtaDatabase +from ga4gh.vrs import models + +from variation.classify import Classify +from variation.schemas.app_schemas import Endpoint +from variation.schemas.normalize_response_schema import ( + HGVSDupDelModeOption, + NormalizeService, + ServiceMeta, +) +from variation.schemas.token_response_schema import GnomadVcfToken, Token +from variation.schemas.translation_response_schema import ( + AC_PRIORITY_LABELS, + TranslationResult, + VrsSeqLocAcStatus, +) +from variation.to_vrs import ToVRS +from variation.tokenize import Tokenize +from variation.translate import Translate +from variation.utils import update_warnings_for_no_resp +from variation.validate import Validate +from variation.version import __version__ + + +class Normalize(ToVRS): + """The Normalize class used to normalize a given variation.""" + + def __init__( + self, + seqrepo_access: SeqRepoAccess, + tokenizer: Tokenize, + classifier: Classify, + validator: Validate, + translator: Translate, + uta: UtaDatabase, + ) -> None: + """Initialize Normalize class. + + :param seqrepo_access: Access to SeqRepo + :param tokenizer: Tokenizer class for tokenizing + :param classifier: Classifier class for classifying tokens + :param validator: Validator class for validating valid inputs + :param translator: Translating valid inputs + :param UtaDatabase uta: Access to db containing alignment data + """ + super().__init__( + seqrepo_access, + tokenizer, + classifier, + validator, + translator, + ) + self.uta = uta + + @staticmethod + def _get_priority_translation_result( + translations: List[TranslationResult], ac_status: VrsSeqLocAcStatus + ) -> Optional[TranslationResult]: + """Get prioritized translation result. Tries to find translation results with + the same `vrs_seq_loc_ac_status` as `ac_status`. If more than one translation + result is found, will try to find translation result where `og_ac` (original + accession used to get the translation) is the same as `vrs_seq_loc_ac`. If not + found, will sort the translations and return the first translation result in + the list + + :param translations: List of translation results + :param ac_status: Accession status to filter by + :return: Prioritized translation result with `ac_status` if found. Else, `None` + """ + preferred_translations = [ + t for t in translations if t.vrs_seq_loc_ac_status == ac_status + ] + len_preferred_translations = len(preferred_translations) + + # Need to handle cases where there are multiple translations. + # Different `og_ac`'s can lead to different translation results. + # We must be consistent in what we return in /normalize + if len_preferred_translations > 1: + preferred_translations.sort( + key=lambda t: (t.og_ac.split(".")[0], int(t.og_ac.split(".")[1])), + reverse=True, + ) + og_ac_preferred_match = ( + [t for t in preferred_translations if t.og_ac == t.vrs_seq_loc_ac] + or [None] + )[0] + + # We'll first see if `og_ac` (starting ac) matches the `ac_status` + # accession. If that doesn't match, we'll just sort the original + # acs and return the first element. Later on, we'll want to figure + # out a better way to do this. + if og_ac_preferred_match: + translation_result = og_ac_preferred_match + else: + translation_result = preferred_translations[0] + elif len_preferred_translations == 1: + translation_result = preferred_translations[0] + else: + translation_result = None + + return translation_result + + @staticmethod + def get_hgvs_dup_del_mode( + tokens: List[Token], + hgvs_dup_del_mode: Optional[HGVSDupDelModeOption] = None, + baseline_copies: Optional[int] = None, + ) -> Tuple[Optional[HGVSDupDelModeOption], Optional[str]]: + """Get option to use for hgvs dup del mode + + :param tokens: List of tokens found in an input query + :param hgvs_dup_del_mode: The hgvs dup del mode option provided in the input + query. Mode to use for interpreting HGVS duplications and deletions. + gnomad vcf token will always set to `HGVSDupDelModeOption.ALLELE`. + :param baseline_copies: The baseline copies provided in the input query. + Required when `hgvs_dup_del_mode == HGVSDupDelModeOption.COPY_NUMBER_COUNT`. + :return: Tuple containing the hgvs dup del mode option and warnings + """ + warning = None + if len(tokens) == 1 and isinstance(tokens[0], GnomadVcfToken): + hgvs_dup_del_mode = HGVSDupDelModeOption.ALLELE + else: + if not hgvs_dup_del_mode: + hgvs_dup_del_mode = HGVSDupDelModeOption.DEFAULT + + if hgvs_dup_del_mode == HGVSDupDelModeOption.COPY_NUMBER_COUNT: + if not baseline_copies: + warning = f"{hgvs_dup_del_mode.value} mode requires `baseline_copies`" # noqa: E501 + return None, warning + + return hgvs_dup_del_mode, warning + + async def normalize( + self, + q: str, + hgvs_dup_del_mode: Optional[ + HGVSDupDelModeOption + ] = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + ) -> NormalizeService: + """Normalize a given variation. + + :param q: HGVS, gnomAD VCF or Free Text description on GRCh37 or GRCh38 assembly + :param hgvs_dup_del_mode: This parameter determines how to interpret HGVS + dup/del expressions in VRS. + :param baseline_copies: Baseline copies for HGVS duplications and deletions + :param copy_change: The copy change for HGVS duplications and deletions + represented as Copy Number Change Variation. + :return: NormalizeService with variation and warnings + """ + label = q.strip() + variation = None + warnings = [] + params = { + "variation_query": q, + "variation": variation, + "warnings": warnings, + "service_meta_": ServiceMeta( + version=__version__, response_datetime=datetime.now() + ), + } + + # Get tokens for input query + tokens = self.tokenizer.perform(unquote(q.strip()), warnings) + if warnings: + update_warnings_for_no_resp(label, warnings) + params["warnings"] = warnings + return NormalizeService(**params) + + # Get HGVS dup del mode option to use + hgvs_dup_del_mode, warning = self.get_hgvs_dup_del_mode( + tokens, hgvs_dup_del_mode=hgvs_dup_del_mode, baseline_copies=baseline_copies + ) + if warning: + warnings.append(warning) + update_warnings_for_no_resp(label, warnings) + params["warnings"] = warnings + return NormalizeService(**params) + + # Get classification for list of tokens + classification = self.classifier.perform(tokens) + if not classification: + warnings.append(f"Unable to find classification for: {q}") + params["warnings"] = warnings + return NormalizeService(**params) + + # Get validation summary for classification + validation_summary = await self.validator.perform(classification) + if not validation_summary: + update_warnings_for_no_resp(label, validation_summary.warnings) + params["warnings"] = warnings + return NormalizeService(**params) + + variation = None + if validation_summary.valid_results: + # Get translated VRS representations for valid results + translations, warnings = await self.get_translations( + validation_summary.valid_results, + warnings, + endpoint_name=Endpoint.NORMALIZE, + hgvs_dup_del_mode=hgvs_dup_del_mode, + baseline_copies=baseline_copies, + copy_change=copy_change, + do_liftover=True, + ) + if translations: + # Get prioritized translation result so that output is always the same + for ac_status in AC_PRIORITY_LABELS: + translation_result = self._get_priority_translation_result( + translations, ac_status + ) + if translation_result: + if ( + translation_result.vrs_seq_loc_ac_status + == VrsSeqLocAcStatus.NA + ): + classification_type = ( + translation_result.validation_result.classification.classification_type.value + ) + if classification_type.startswith(("protein", "cdna")): + # Only supports protein/cDNA at the moment + warnings.append("Unable to find MANE representation") + break + + try: + variation = translation_result.vrs_variation + except AttributeError as e: + # vrs_ref_allele_seq = None + warnings.append(str(e)) + else: + pass + # valid_result = validation_summary.valid_results[0] + # classification_type = valid_result.classification.classification_type + # if classification_type not in { + # ClassificationType.GENOMIC_DELETION_AMBIGUOUS, + # ClassificationType.GENOMIC_DUPLICATION_AMBIGUOUS, + # ClassificationType.AMPLIFICATION, + # }: + # variation_type = variation["type"] + # if variation_type in { + # "Allele", "CopyNumberChange", "CopyNumberCount" + # }: + # vrs_ref_allele_seq = self.get_ref_allele_seq( + # variation["location"], translation_result.vrs_seq_loc_ac + # ) + # else: + # vrs_ref_allele_seq = None + + if not variation: + update_warnings_for_no_resp(label, warnings) + else: + update_warnings_for_no_resp(label, warnings) + else: + # No valid results were found for input query + update_warnings_for_no_resp(label, warnings) + + params["variation"] = variation + params["warnings"] = warnings + return NormalizeService(**params) + + # def get_ref_allele_seq(self, location: Dict, ac: str) -> Optional[str]: + # """Return ref allele seq for transcript. + + # :param location: VRS Location object + # :param identifier: Identifier for allele + # :return: Ref seq allele + # """ + # ref = None + # start = location["start"] + # end = location["end"] + # if isinstance(start, int) and isinstance(end, int): + # if start != end: + # ref, _ = self.seqrepo_access.get_reference_sequence( + # ac, start, end, residue_mode=ResidueMode.INTER_RESIDUE + # ) + + # return ref diff --git a/variation/query.py b/variation/query.py new file mode 100644 index 0000000..87add09 --- /dev/null +++ b/variation/query.py @@ -0,0 +1,70 @@ +"""Module for providing methods for handling queries.""" +from typing import Optional + +from cool_seq_tool.app import CoolSeqTool +from ga4gh.vrs.extras.translator import AlleleTranslator as VrsPythonTranslator +from gene.database import create_db +from gene.query import QueryHandler as GeneQueryHandler + +from variation.classify import Classify +from variation.hgvs_dup_del_mode import HGVSDupDelMode +from variation.normalize import Normalize +from variation.to_copy_number_variation import ToCopyNumberVariation +from variation.to_vrs import ToVRS, VRSRepresentation +from variation.tokenize import Tokenize +from variation.tokenizers import GeneSymbol +from variation.translate import Translate +from variation.validate import Validate + + +class QueryHandler: + """Class for initializing handlers that make app queries.""" + + def __init__( + self, + gene_query_handler: Optional[GeneQueryHandler] = None, + ) -> None: + """Initialize QueryHandler instance. + :param gene_query_handler: Gene normalizer query handler instance. If this is + provided, will use a current instance. If this is not provided, will create + a new instance. + """ + cool_seq_tool = CoolSeqTool() + self.seqrepo_access = cool_seq_tool.seqrepo_access + + if not gene_query_handler: + gene_query_handler = GeneQueryHandler(create_db()) + + vrs_representation = VRSRepresentation(self.seqrepo_access) + gene_symbol = GeneSymbol(gene_query_handler) + tokenizer = Tokenize(gene_symbol) + classifier = Classify() + uta_db = cool_seq_tool.uta_db + self.alignment_mapper = cool_seq_tool.alignment_mapper + mane_transcript = cool_seq_tool.mane_transcript + transcript_mappings = cool_seq_tool.transcript_mappings + self.vrs_python_tlr = VrsPythonTranslator(data_proxy=self.seqrepo_access) + validator = Validate( + self.seqrepo_access, transcript_mappings, uta_db, gene_query_handler + ) + hgvs_dup_del_mode = HGVSDupDelMode(self.seqrepo_access) + translator = Translate( + self.seqrepo_access, + mane_transcript, + uta_db, + vrs_representation, + hgvs_dup_del_mode, + ) + to_vrs_params = [ + self.seqrepo_access, + tokenizer, + classifier, + validator, + translator, + ] + self.to_vrs_handler = ToVRS(*to_vrs_params) + normalize_params = to_vrs_params + [uta_db] + self.normalize_handler = Normalize(*normalize_params) + self.to_copy_number_handler = ToCopyNumberVariation( + *to_vrs_params + [gene_query_handler, uta_db] + ) diff --git a/variation/regex.py b/variation/regex.py new file mode 100644 index 0000000..7a4528c --- /dev/null +++ b/variation/regex.py @@ -0,0 +1,212 @@ +"""Module containing regex patterns""" +import re +from typing import Any, List, Tuple + +from variation.schemas.app_schemas import AmbiguousRegexType +from variation.schemas.classification_response_schema import ClassificationType +from variation.schemas.token_response_schema import TokenType + +CDNA_GENOMIC_SUBSTITUTION = re.compile( + r"^(?P\d+)(?P[ACTGN])>(?P[ACTGN])$" +) + +CDNA_GENOMIC_REFERENCE_AGREE = re.compile(r"^(?P\d+)=$") + +CNDA_GENOMIC_DELETION = re.compile( + r"^(?P\d+)(_(?P\d+))?del(?P[ACTGN]+)?$" +) + +GENOMIC_DELETION_AMBIGUOUS_1 = re.compile( + r"^\((?P\?|\d+)_(?P\?|\d+)\)_\((?P\?|\d+)_(?P\?|\d+)\)del$" +) + +GENOMIC_DELETION_AMBIGUOUS_2 = re.compile( + r"^\((?P\?|\d+)_(?P\?|\d+)\)_(?P\d+)del$" +) + +GENOMIC_DELETION_AMBIGUOUS_3 = re.compile( + r"^(?P\d+)_\((?P\?|\d+)_(?P\?|\d+)\)del$" +) + +CDNA_GENOMIC_DELINS = re.compile( + r"^(?P\d+)(_(?P\d+))?delins(?P[ACTGN]+)$" +) + +CDNA_GENOMIC_INSERTION = re.compile( + r"^(?P\d+)_(?P\d+)ins(?P[ACTGN]+)$" +) + +PROTEIN_SUBSTITUTION = re.compile( + r"^(?P[a-zA-z]+)(?P\d+)(?P([a-zA-Z]|Ter|\*)+)$" +) + +PROTEIN_INSERTION = re.compile( + r"^(?P[a-zA-z]+)(?P\d+)_(?P[a-zA-z]+)(?P\d+)ins(?P[a-zA-z]+)$" # noqa: E501 +) + +PROTEIN_DELINS = re.compile( + r"^(?P[a-zA-z]+)(?P\d+)(_(?P[a-zA-z]+)(?P\d+))?delins(?P[a-zA-z]+)$" # noqa: E501 +) + +PROTEIN_DELETION = re.compile( + r"^(?P[a-zA-z]+)(?P\d+)(_(?P[a-zA-z]+)(?P\d+))?del(?P[a-zA-z]+)?$" # noqa: E501 +) + +PROTEIN_REFERENCE_AGREE = re.compile(r"^(?P[a-zA-z]+)(?P\d+)=$") + +GENOMIC_DUPLICATION = re.compile(r"^(?P\d+)(_(?P\d+))?dup$") + +# (#_#)_(#_#) OR (?_#)_(#_?) +GENOMIC_DUPLICATION_AMBIGUOUS_1 = re.compile( + r"^\((?P\?|\d+)_(?P\?|\d+)\)_\((?P\?|\d+)_(?P\?|\d+)\)dup$" +) + +# (?_#)_#, (#_?)_# OR (#_#)_# +GENOMIC_DUPLICATION_AMBIGUOUS_2 = re.compile( + r"^\((?P\?|\d+)_(?P\?|\d+)\)_(?P\d+)dup$" +) + +# #_(#_?) OR #_(#_#) +GENOMIC_DUPLICATION_AMBIGUOUS_3 = re.compile( + r"^(?P\d+)_\((?P\?|\d+)_(?P\?|\d+)\)dup$" +) + +# (#_#)_(#_#) OR (?_#)_(#_?) +GENOMIC_DELETION_AMBIGUOUS_1 = re.compile( + r"^\((?P\?|\d+)_(?P\?|\d+)\)_\((?P\?|\d+)_(?P\?|\d+)\)del$" +) + +# (?_#)_#, (#_?)_# OR (#_#)_# +GENOMIC_DELETION_AMBIGUOUS_2 = re.compile( + r"^\((?P\?|\d+)_(?P\?|\d+)\)_(?P\d+)del$" +) + +# #_(#_?) OR #_(#_#) +GENOMIC_DELETION_AMBIGUOUS_3 = re.compile( + r"^(?P\d+)_\((?P\?|\d+)_(?P\?|\d+)\)del$" +) + +# _REGEXPRS are used to help group the regex pattern and associated token type and +# classification type + +# Note: Order matters for regexprs +PROTEIN_REGEXPRS: List[Tuple[Any, TokenType, ClassificationType]] = [ + (PROTEIN_DELINS, TokenType.PROTEIN_DELINS, ClassificationType.PROTEIN_DELINS), + (PROTEIN_DELETION, TokenType.PROTEIN_DELETION, ClassificationType.PROTEIN_DELETION), + ( + PROTEIN_SUBSTITUTION, + TokenType.PROTEIN_SUBSTITUTION, + ClassificationType.PROTEIN_SUBSTITUTION, + ), + ( + PROTEIN_REFERENCE_AGREE, + TokenType.PROTEIN_REFERENCE_AGREE, + ClassificationType.PROTEIN_REFERENCE_AGREE, + ), + ( + PROTEIN_INSERTION, + TokenType.PROTEIN_INSERTION, + ClassificationType.PROTEIN_INSERTION, + ), +] + +# Note: Order matters for regexprs +CDNA_REGEXPRS: List[Tuple[Any, TokenType, ClassificationType]] = [ + (CDNA_GENOMIC_DELINS, TokenType.CDNA_DELINS, ClassificationType.CDNA_DELINS), + (CNDA_GENOMIC_DELETION, TokenType.CDNA_DELETION, ClassificationType.CDNA_DELETION), + ( + CDNA_GENOMIC_SUBSTITUTION, + TokenType.CDNA_SUBSTITUTION, + ClassificationType.CDNA_SUBSTITUTION, + ), + ( + CDNA_GENOMIC_REFERENCE_AGREE, + TokenType.CDNA_REFERENCE_AGREE, + ClassificationType.CDNA_REFERENCE_AGREE, + ), + ( + CDNA_GENOMIC_INSERTION, + TokenType.CDNA_INSERTION, + ClassificationType.CDNA_INSERTION, + ), +] + +# Note: Order matters for regexprs +GENOMIC_REGEXPRS: List[Tuple[Any, TokenType, ClassificationType]] = [ + (CDNA_GENOMIC_DELINS, TokenType.GENOMIC_DELINS, ClassificationType.GENOMIC_DELINS), + ( + CNDA_GENOMIC_DELETION, + TokenType.GENOMIC_DELETION, + ClassificationType.GENOMIC_DELETION, + ), + ( + CDNA_GENOMIC_SUBSTITUTION, + TokenType.GENOMIC_SUBSTITUTION, + ClassificationType.GENOMIC_SUBSTITUTION, + ), + ( + CDNA_GENOMIC_REFERENCE_AGREE, + TokenType.GENOMIC_REFERENCE_AGREE, + ClassificationType.GENOMIC_REFERENCE_AGREE, + ), + ( + CDNA_GENOMIC_INSERTION, + TokenType.GENOMIC_INSERTION, + ClassificationType.GENOMIC_INSERTION, + ), + ( + GENOMIC_DUPLICATION, + TokenType.GENOMIC_DUPLICATION, + ClassificationType.GENOMIC_DUPLICATION, + ), +] + + +# Note: Order matters for regexprs +GENOMIC_DUP_AMBIGUOUS_REGEXPRS: List[ + Tuple[Any, TokenType, ClassificationType, AmbiguousRegexType] +] = [ + ( + GENOMIC_DUPLICATION_AMBIGUOUS_1, + TokenType.GENOMIC_DUPLICATION_AMBIGUOUS, + ClassificationType.GENOMIC_DUPLICATION_AMBIGUOUS, + AmbiguousRegexType.REGEX_1, + ), + ( + GENOMIC_DUPLICATION_AMBIGUOUS_2, + TokenType.GENOMIC_DUPLICATION_AMBIGUOUS, + ClassificationType.GENOMIC_DUPLICATION_AMBIGUOUS, + AmbiguousRegexType.REGEX_2, + ), + ( + GENOMIC_DUPLICATION_AMBIGUOUS_3, + TokenType.GENOMIC_DUPLICATION_AMBIGUOUS, + ClassificationType.GENOMIC_DUPLICATION_AMBIGUOUS, + AmbiguousRegexType.REGEX_3, + ), +] + + +# Note: Order matters for regexprs +GENOMIC_DEL_AMBIGUOUS_REGEXPRS: List[ + Tuple[Any, TokenType, ClassificationType, AmbiguousRegexType] +] = [ + ( + GENOMIC_DELETION_AMBIGUOUS_1, + TokenType.GENOMIC_DELETION_AMBIGUOUS, + ClassificationType.GENOMIC_DELETION_AMBIGUOUS, + AmbiguousRegexType.REGEX_1, + ), + ( + GENOMIC_DELETION_AMBIGUOUS_2, + TokenType.GENOMIC_DELETION_AMBIGUOUS, + ClassificationType.GENOMIC_DELETION_AMBIGUOUS, + AmbiguousRegexType.REGEX_2, + ), + ( + GENOMIC_DELETION_AMBIGUOUS_3, + TokenType.GENOMIC_DELETION_AMBIGUOUS, + ClassificationType.GENOMIC_DELETION_AMBIGUOUS, + AmbiguousRegexType.REGEX_3, + ), +] diff --git a/variation/schemas/__init__.py b/variation/schemas/__init__.py new file mode 100644 index 0000000..04fec28 --- /dev/null +++ b/variation/schemas/__init__.py @@ -0,0 +1,3 @@ +"""Package level import.""" +from .normalize_response_schema import NormalizeService, ServiceMeta +from .to_vrs_response_schema import ToVRSService diff --git a/variation/schemas/app_schemas.py b/variation/schemas/app_schemas.py new file mode 100644 index 0000000..17f9d09 --- /dev/null +++ b/variation/schemas/app_schemas.py @@ -0,0 +1,21 @@ +"""Module for schemas used throughout the app""" +from enum import Enum, IntEnum + + +class Endpoint(str, Enum): + """Define endpoint names in app that lead to decisions such as hgvs_dup_del_mode + option. + """ + + TO_VRS = "to_vrs" + NORMALIZE = "normalize" + HGVS_TO_COPY_NUMBER_COUNT = "hgvs_to_copy_number_count" + HGVS_TO_COPY_NUMBER_CHANGE = "hgvs_to_copy_number_change" + + +class AmbiguousRegexType(IntEnum): + """Helps determine the regex that was used in ambiguous expressions""" + + REGEX_1 = 1 + REGEX_2 = 2 + REGEX_3 = 3 diff --git a/variation/schemas/classification_response_schema.py b/variation/schemas/classification_response_schema.py new file mode 100644 index 0000000..9b66fd9 --- /dev/null +++ b/variation/schemas/classification_response_schema.py @@ -0,0 +1,238 @@ +"""Module for Classification schema.""" +from enum import Enum +from typing import List, Literal, Optional + +from pydantic import BaseModel, StrictStr + +from variation.schemas.token_response_schema import GeneToken, Token +from variation.schemas.variation_schema import ( + Deletion, + DelIns, + DupDelAmbiguous, + Duplication, + Insertion, + ProteinDeletion, + ProteinDelIns, + ProteinInsertion, + ProteinReferenceAgree, + ReferenceAgree, + StopGain, + Substitution, +) + + +class Nomenclature(str, Enum): + """Define nomenclatures that are supported""" + + FREE_TEXT = "free_text" + HGVS = "hgvs" + GNOMAD_VCF = "gnomad_vcf" + + +class ClassificationType(str, Enum): + """Enums for Classification Types.""" + + PROTEIN_SUBSTITUTION = "protein_substitution" + PROTEIN_STOP_GAIN = "protein_stop_gain" + PROTEIN_REFERENCE_AGREE = "protein_reference_agree" + PROTEIN_DELINS = "protein_delins" + CDNA_SUBSTITUTION = "cdna_substitution" + GENOMIC_SUBSTITUTION = "genomic_substitution" + CDNA_REFERENCE_AGREE = "cdna_reference_agree" + GENOMIC_REFERENCE_AGREE = "genomic_reference_agree" + CDNA_DELINS = "cdna_delins" + GENOMIC_DELINS = "genomic_delins" + PROTEIN_DELETION = "protein_deletion" + CDNA_DELETION = "cdna_deletion" + GENOMIC_DELETION = "genomic_deletion" + GENOMIC_DELETION_AMBIGUOUS = "genomic_deletion_ambiguous" + PROTEIN_INSERTION = "protein_insertion" + CDNA_INSERTION = "cdna_insertion" + GENOMIC_INSERTION = "genomic_insertion" + GENOMIC_DUPLICATION = "genomic_duplication" + GENOMIC_DUPLICATION_AMBIGUOUS = "genomic_duplication_ambiguous" + AMPLIFICATION = "amplification" + + +class Classification(BaseModel): + """Classification for a list of tokens.""" + + classification_type: ClassificationType + matching_tokens: List[Token] = [] + nomenclature: Nomenclature + gene_token: Optional[GeneToken] = None + ac: Optional[StrictStr] = None + + +class ProteinSubstitutionClassification(Classification, Substitution): + """Define protein substitution classification""" + + classification_type: Literal[ + ClassificationType.PROTEIN_SUBSTITUTION + ] = ClassificationType.PROTEIN_SUBSTITUTION + + +class GenomicSubstitutionClassification(Classification, Substitution): + """Define genomic substitution classification""" + + classification_type: Literal[ + ClassificationType.GENOMIC_SUBSTITUTION + ] = ClassificationType.GENOMIC_SUBSTITUTION + + +class CdnaSubstitutionClassification(Classification, Substitution): + """Define cdna substitution classification""" + + classification_type: Literal[ + ClassificationType.CDNA_SUBSTITUTION + ] = ClassificationType.CDNA_SUBSTITUTION + + +class ProteinStopGainClassification(Classification, StopGain): + """Define protein stop gain classification""" + + classification_type: Literal[ + ClassificationType.PROTEIN_STOP_GAIN + ] = ClassificationType.PROTEIN_STOP_GAIN + + +class ProteinReferenceAgreeClassification(Classification, ProteinReferenceAgree): + """Define protein reference agree classification""" + + classification_type: Literal[ + ClassificationType.PROTEIN_REFERENCE_AGREE + ] = ClassificationType.PROTEIN_REFERENCE_AGREE + + +class CdnaReferenceAgreeClassification(Classification, ReferenceAgree): + """Define cdna reference agree classification""" + + classification_type: Literal[ + ClassificationType.CDNA_REFERENCE_AGREE + ] = ClassificationType.CDNA_REFERENCE_AGREE + + +class GenomicReferenceAgreeClassification(Classification, ReferenceAgree): + """Define genomic reference agree classification""" + + classification_type: Literal[ + ClassificationType.GENOMIC_REFERENCE_AGREE + ] = ClassificationType.GENOMIC_REFERENCE_AGREE + + +class ProteinInsertionClassification(Classification, ProteinInsertion): + """Define protein insertion classification""" + + classification_type: Literal[ + ClassificationType.PROTEIN_INSERTION + ] = ClassificationType.PROTEIN_INSERTION + + +class CdnaInsertionClassification(Classification, Insertion): + """Define cdna insertion classification""" + + classification_type: Literal[ + ClassificationType.CDNA_INSERTION + ] = ClassificationType.CDNA_INSERTION + + +class GenomicInsertionClassification(Classification, Insertion): + """Define genomic insertion classification""" + + classification_type: Literal[ + ClassificationType.GENOMIC_INSERTION + ] = ClassificationType.GENOMIC_INSERTION + + +class ProteinDeletionClassification(Classification, ProteinDeletion): + """Define protein deletion classification""" + + classification_type: Literal[ + ClassificationType.PROTEIN_DELETION + ] = ClassificationType.PROTEIN_DELETION + + +class GenomicDeletionClassification(Classification, Deletion): + """Define genomic deletion classification""" + + classification_type: Literal[ + ClassificationType.GENOMIC_DELETION + ] = ClassificationType.GENOMIC_DELETION + + +class CdnaDeletionClassification(Classification, Deletion): + """Define cdna classification""" + + classification_type: Literal[ + ClassificationType.CDNA_DELETION + ] = ClassificationType.CDNA_DELETION + + +class ProteinDelInsClassification(Classification, ProteinDelIns): + """Define protein delins classification""" + + classification_type: Literal[ + ClassificationType.PROTEIN_DELINS + ] = ClassificationType.PROTEIN_DELINS + + +class CdnaDelInsClassification(Classification, DelIns): + """Define cdna delins classification""" + + classification_type: Literal[ + ClassificationType.CDNA_DELINS + ] = ClassificationType.CDNA_DELINS + + +class GenomicDelInsClassification(Classification, DelIns): + """Define genomic delins classification""" + + classification_type: Literal[ + ClassificationType.GENOMIC_DELINS + ] = ClassificationType.GENOMIC_DELINS + + +class GenomicDuplicationClassification(Classification, Duplication): + """Define genomic duplication classification""" + + classification_type: Literal[ + ClassificationType.GENOMIC_DUPLICATION + ] = ClassificationType.GENOMIC_DUPLICATION + + +class AmbiguousType(str, Enum): + """Define ambiguous type which helps determine the ambiguous expression format""" + + AMBIGUOUS_1 = "(#_#)_(#_#)" + AMBIGUOUS_2 = "(?_#)_(#_?)" + AMBIGUOUS_3 = "(#_?)_(?_#)" # Not yet supported + AMBIGUOUS_4 = "(#_#)_#" # Not yet supported + AMBIGUOUS_5 = "(?_#)_#" + AMBIGUOUS_6 = "#_(#_#)" # Not yet supported + AMBIGUOUS_7 = "#_(#_?)" + + +class GenomicDuplicationAmbiguousClassification(Classification, DupDelAmbiguous): + """Define genomic duplication ambiguous classification""" + + classification_type: Literal[ + ClassificationType.GENOMIC_DUPLICATION_AMBIGUOUS + ] = ClassificationType.GENOMIC_DUPLICATION_AMBIGUOUS + ambiguous_type: AmbiguousType + + +class GenomicDeletionAmbiguousClassification(Classification, DupDelAmbiguous): + """Define genomic deletion ambiguous classification""" + + classification_type: Literal[ + ClassificationType.GENOMIC_DELETION_AMBIGUOUS + ] = ClassificationType.GENOMIC_DELETION_AMBIGUOUS + ambiguous_type: AmbiguousType + + +class AmplificationClassification(Classification): + """Define amplification classification""" + + classification_type: Literal[ + ClassificationType.AMPLIFICATION + ] = ClassificationType.AMPLIFICATION diff --git a/variation/schemas/copy_number_schema.py b/variation/schemas/copy_number_schema.py new file mode 100644 index 0000000..991a271 --- /dev/null +++ b/variation/schemas/copy_number_schema.py @@ -0,0 +1,402 @@ +"""Module containing schemas for services""" +import re +from enum import Enum +from typing import Dict, Optional + +from ga4gh.vrs import models +from pydantic import ( + BaseModel, + ConfigDict, + Field, + StrictBool, + StrictInt, + StrictStr, + model_validator, +) + +from variation.schemas.normalize_response_schema import ServiceResponse +from variation.version import __version__ + + +class ParsedPosType(str, Enum): + """Define position type for parsed to cnv endpoints""" + + NUMBER = "number" + DEFINITE_RANGE = "definite_range" + INDEFINITE_RANGE = "indefinite_range" + + +class Comparator(str, Enum): + """A range comparator.""" + + LT_OR_EQUAL = "<=" + GT_OR_EQUAL = ">=" + + +class ClinVarAssembly(str, Enum): + """Define assemblies in ClinVar""" + + GRCH38 = "GRCh38" + GRCH37 = "GRCh37" + NCBI36 = "NCBI36" + HG38 = "hg38" + HG19 = "hg19" + HG18 = "hg18" + + +def validate_parsed_fields(cls, v: Dict) -> Dict: + """Validate base copy number query fields + - `accession` or both `assembly` and `chromosome` must be provided + - `start1` is required when `start_pos_type` is a definite + range. + - `end1` is required when `end_pos_type` is a Definite Range. + - `start_pos_comparator` is required when `start_pos_type` is an Indefinite + Range + - `end_pos_comparator` is required when `end_pos_type` is an Indefinite Range + - End positions must be greater than start positions + """ + ac_assembly_chr_msg = "Must provide either `accession` or both `assembly` and `chromosome`" # noqa: E501 + assembly = v.assembly + chromosome = v.chromosome + assembly_chr_set = assembly and chromosome + assert v.accession or assembly_chr_set, ac_assembly_chr_msg # noqa: E501 + + if assembly_chr_set: + pattern = r"^chr(X|Y|([1-9]|1[0-9]|2[0-2]))$" + assert re.match( + pattern, chromosome + ), f"`chromosome`, {chromosome}, does not match r'{pattern}'" # noqa: E501 + + start0 = v.start0 + start1 = v.start1 + if v.start_pos_type == ParsedPosType.DEFINITE_RANGE: + assert start1 is not None, "`start1` is required for definite ranges" + assert start1 > start0, "`start0` must be less than `start1`" + elif v.start_pos_type == ParsedPosType.INDEFINITE_RANGE: + assert ( + v.start_pos_comparator + ), "`start_pos_comparator` is required for indefinite ranges" # noqa: E501 + + end0 = v.end0 + end1 = v.end1 + if v.end_pos_type == ParsedPosType.DEFINITE_RANGE: + assert end1 is not None, "`end1` is required for definite ranges" + assert end1 > end0, "`end0` must be less than `end1`" + elif v.end_pos_type == ParsedPosType.INDEFINITE_RANGE: + assert ( + v.end_pos_comparator + ), "`end_pos_comparator` is required for indefinite ranges" # noqa: E501 + + err_msg = "end positions must be greater than start" + if start1 is None: + assert end0 > start0, err_msg + else: + assert end0 > start1, err_msg + + +class ParsedToCopyNumberQuery(BaseModel): + """Define base model for parsed to copy number queries""" + + assembly: Optional[ClinVarAssembly] = Field( + default=None, + description=( + "Assembly. Ignored, along with `chromosome`, if `accession` is " "provided." + ), + ) + chromosome: Optional[StrictStr] = Field( + default=None, + description=( + "Chromosome. Must contain `chr` prefix, i.e. 'chr7'. Must provide " + "when `assembly` is provided." + ), + ) + accession: Optional[StrictStr] = Field( + default=None, + description=( + "Genomic RefSeq accession. If `accession` is provided, will " + "ignore `assembly` and `chromosome`. If `accession` is not " + "provided, must provide both `assembly` and `chromosome`." + ), + ) + start0: StrictInt = Field( + description=( + "Start position (residue coords). If `start_pos_type` is a " + "Definite Range, this will be the min start position." + ), + ) + end0: StrictInt = Field( + description=( + "End position (residue coords). If `end_pos_type` is a definite " + "range, this will be the min end position." + ), + ) + start_pos_comparator: Optional[Comparator] = Field( + default=None, + description=( + "Must provide when `start_pos_type` is an Indefinite Range. " + "Indicates which direction the range is indefinite. To represent " + "(#_?), set to '<='. To represent (?_#), set to '>='." + ), + ) + end_pos_comparator: Optional[Comparator] = Field( + default=None, + description=( + "Must provide when `end_pos_type` is an Indefinite Range. " + "Indicates which direction the range is indefinite. To represent " + "(#_?), set to '<='. To represent (?_#), set to '>='." + ), + ) + start_pos_type: ParsedPosType = Field( + default=ParsedPosType.NUMBER, + description="The type of the start value in the VRS SequenceLocation", + ) + end_pos_type: ParsedPosType = Field( + default=ParsedPosType.NUMBER, + description="Type of the end value in the VRS SequenceLocation", + ) + start1: Optional[StrictInt] = Field( + default=None, + description=( + "Only provided when `start_pos_type` is a Definite Range, this " + "will be the max start position." + ), + ) + end1: Optional[StrictInt] = Field( + default=None, + description=( + "Only provided when `end_pos_type` is a Definite Range, this " + "will be the max end position." + ), + ) + do_liftover: StrictBool = Field( + default=False, description="Whether or not to liftover to GRCh38 assembly" + ) + + +class ParsedToCnVarQuery(ParsedToCopyNumberQuery): + """Define query for parsed to copy number count variation endpoint""" + + copies0: StrictInt = Field( + description=( + "Number of copies. When `copies_type` is a Number or Indefinite " + "Range, this will be the `value` for copies. When `copies_type` " + "is an Definite Range, this will be the `min` copies." + ), + ) + copies1: Optional[StrictInt] = Field( + default=None, + description=( + "Must provide when `copies_type` is a Definite Range. This will " + "be the `max` copies." + ), + ) + copies_type: ParsedPosType = Field( + default=ParsedPosType.NUMBER, + description="Type for the `copies` in the `location`", + ) + copies_comparator: Optional[Comparator] = Field( + default=None, + description=( + "Must provide when `copies_type` is an Indefinite Range. " + "Indicates which direction the range is indefinite." + ), + ) + + @model_validator(mode="after") + def validate_fields(cls, v: Dict) -> Dict: + """Validate fields. + + - `copies1` should exist when `copies_type == ParsedPosType.DEFINITE_RANGE` + - `copies_comparator` should exist when + `copies_type == ParsedPosType.INDEFINITE_RANGE` + """ + validate_parsed_fields(cls, v) + copies1 = v.copies1 + copies_type = v.copies_type + copies_comparator = v.copies_comparator + + if copies_type == ParsedPosType.DEFINITE_RANGE: + assert ( + copies1 + ), "`copies1` must be provided for `copies_type == ParsedPosType.DEFINITE_RANGE`" # noqa: E501 + elif copies_type == ParsedPosType.INDEFINITE_RANGE: + assert ( + copies_comparator + ), "`copies_comparator` must be provided for `copies_type == ParsedPosType.INDEFINITE_RANGE`" # noqa: E501 + + return v + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "assembly": "GRCh37", + "chromosome": "chr1", + "accession": None, + "start0": 143134063, + "end0": 143284670, + "copies0": 3, + "copies1": None, + "copies_comparator": None, + "copies_type": "number", + "start_pos_comparator": "<=", + "end_pos_comparator": ">=", + "start_pos_type": "indefinite_range", + "end_pos_type": "indefinite_range", + "start1": None, + "end1": None, + "do_liftover": False, + } + } + ) + + +class ParsedToCnVarService(ServiceResponse): + """A response for translating parsed components to Copy Number Count""" + + copy_number_count: Optional[models.CopyNumberCount] = None + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "copy_number_count": { + "id": "ga4gh:CN.Qrs0TaGCcJiibMvhcML6BTSCVtX95FBl", + "type": "CopyNumberCount", + "location": { + "id": "ga4gh:SL.g6xj5oKF99OysSxcfHyGYbh8NFNn2r61", + "type": "SequenceLocation", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.S_KjnFVz-FE7M0W6yoaUDgYxLPc1jyWU", + }, + "start": [None, 143134062], + "end": [143284670, None], + }, + "copies": 3, + }, + "service_meta_": { + "name": "variation-normalizer", + "version": __version__, + "response_datetime": "2022-01-26T22:23:41.821673", + "url": "https://github.com/cancervariants/variation-normalization", + }, + } + } + ) + + +class ParsedToCxVarQuery(ParsedToCopyNumberQuery): + """Define query for parsed to copy number change variation endpoint""" + + copy_change: models.CopyChange + + @model_validator(mode="after") + def validate_fields(cls, v: Dict) -> Dict: + """Validate fields""" + validate_parsed_fields(cls, v) + return v + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "assembly": "GRCh38", + "chromosome": "chrY", + "accession": None, + "start0": 10001, + "end0": 1223133, + "copy_change": "efo:0030069", + "start_pos_type": "number", + "end_pos_type": "number", + "start1": None, + "end1": None, + "do_liftover": False, + } + } + ) + + +class ParsedToCxVarService(ServiceResponse): + """A response for translating parsed components to Copy Number Change""" + + copy_number_change: Optional[models.CopyNumberChange] = None + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "copy_number_change": { + "type": "CopyNumberChange", + "id": "ga4gh:CX.BTNwndSs3RylLhtL9Y45GePsVX35eeTT", + "location": { + "type": "SequenceLocation", + "id": "ga4gh:SL.Pu3oAKHColJSZ3zY_Xu5MeezINaTFlNq", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", + }, + "start": 10000, + "end": 1223133, + }, + "copyChange": "efo:0030069", + }, + "service_meta_": { + "name": "variation-normalizer", + "version": __version__, + "response_datetime": "2022-01-26T22:23:41.821673", + "url": "https://github.com/cancervariants/variation-normalization", + }, + } + } + ) + + +class AmplificationToCxVarQuery(BaseModel): + """Define query for amplification to copy number change variation endpoint""" + + gene: str + sequence_id: Optional[str] = None + start: Optional[int] = None + end: Optional[int] = None + sequence_location: Optional[models.SequenceLocation] = None + + +class AmplificationToCxVarService(ServiceResponse): + """A response for translating Amplification queries to Copy Number Change""" + + query: Optional[AmplificationToCxVarQuery] = None + amplification_label: Optional[str] + copy_number_change: Optional[models.CopyNumberChange] + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "query": { + "gene": "braf", + "sequence_id": None, + "start": None, + "end": None, + "sequence_location": None, + }, + "amplification_label": "BRAF Amplification", + "copy_number_change": { + "id": "ga4gh:CX.89PECTeQjhhXnNW9yg24DheWOQMgmKk2", + "type": "CopyNumberChange", + "location": { + "id": "ga4gh:SL.uNBZoxhjhohl24VlIut-JxPJAGfJ7EQE", + "type": "SequenceLocation", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + }, + "start": 140713327, + "end": 140924929, + }, + "copyChange": "efo:0030072", + }, + "service_meta_": { + "version": __version__, + "response_datetime": "2022-09-29T15:08:18.696882", + "name": "variation-normalizer", + "url": "https://github.com/cancervariants/variation-normalization", + }, + } + } + ) diff --git a/variation/schemas/hgvs_to_copy_number_schema.py b/variation/schemas/hgvs_to_copy_number_schema.py new file mode 100644 index 0000000..4ddbd19 --- /dev/null +++ b/variation/schemas/hgvs_to_copy_number_schema.py @@ -0,0 +1,80 @@ +"""Module containing schemas used in HGVS To Copy Number endpoints""" +from typing import Optional + +from ga4gh.vrs import models +from pydantic import ConfigDict, StrictStr + +from variation.schemas.normalize_response_schema import ServiceResponse +from variation.version import __version__ + + +class HgvsToCopyNumberCountService(ServiceResponse): + """A response for translating HGVS to copy number count.""" + + hgvs_expr: StrictStr + copy_number_count: Optional[models.CopyNumberCount] = None + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "hgvs_expr": "NC_000003.12:g.49531262dup", + "copy_number_count": { + "id": "ga4gh:CN.07iM14yvZ80N_AiaM7G_V4f1pCkmFYz4", + "type": "CopyNumberCount", + "location": { + "id": "ga4gh:SL.y4-cVA2VxMCDxb9gV2oFrzC386yrEVqh", + "type": "SequenceLocation", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", + }, + "start": 49531261, + "end": 49531262, + }, + "copies": 3, + }, + "service_meta_": { + "name": "variation-normalizer", + "version": __version__, + "response_datetime": "2022-01-26T22:23:41.821673", + "url": "https://github.com/cancervariants/variation-normalization", + }, + } + } + ) + + +class HgvsToCopyNumberChangeService(ServiceResponse): + """A response for translating HGVS to copy number change.""" + + hgvs_expr: StrictStr + copy_number_change: Optional[models.CopyNumberChange] = None + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "hgvs_expr": "NC_000003.12:g.49531262dup", + "copy_number_change": { + "id": "ga4gh:CX.d8BWSLNKN0K4n8ySG0jWPCr4cJIqEf5g", + "type": "CopyNumberChange", + "location": { + "id": "ga4gh:SL.y4-cVA2VxMCDxb9gV2oFrzC386yrEVqh", + "type": "SequenceLocation", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX", + }, + "start": 49531261, + "end": 49531262, + }, + "copyChange": "efo:0030069", + }, + "service_meta_": { + "name": "variation-normalizer", + "version": __version__, + "response_datetime": "2022-01-26T22:23:41.821673", + "url": "https://github.com/cancervariants/variation-normalization", + }, + } + } + ) diff --git a/variation/schemas/normalize_response_schema.py b/variation/schemas/normalize_response_schema.py new file mode 100644 index 0000000..fa10ade --- /dev/null +++ b/variation/schemas/normalize_response_schema.py @@ -0,0 +1,143 @@ +"""Module for normalize endpoint response schema.""" +from datetime import datetime +from enum import Enum +from typing import List, Literal, Optional, Union + +from ga4gh.vrs import models +from pydantic import BaseModel, ConfigDict, StrictStr, model_validator + +from variation.version import __version__ + + +class HGVSDupDelModeOption(str, Enum): + """Define options for HGVSDupDelMode. + This mode determines how to interpret HGVS dup/del. + """ + + DEFAULT = "default" + COPY_NUMBER_COUNT = "copy_number_count" + COPY_NUMBER_CHANGE = "copy_number_change" + ALLELE = "allele" + + +class ServiceMeta(BaseModel): + """Metadata regarding the variation-normalization service.""" + + name: Literal["variation-normalizer"] = "variation-normalizer" + version: StrictStr + response_datetime: datetime + url: Literal[ + "https://github.com/cancervariants/variation-normalization" + ] = "https://github.com/cancervariants/variation-normalization" + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "name": "variation-normalizer", + "version": __version__, + "response_datetime": "2021-04-05T16:44:15.367831", + "url": "https://github.com/cancervariants/variation-normalization", + } + } + ) + + +class ServiceResponse(BaseModel): + """Base response model for services""" + + warnings: List[StrictStr] = [] + service_meta_: ServiceMeta + + @model_validator(mode="after") + def unique_warnings(cls, v): + """Ensure unique warnings""" + v.warnings = list(set(v.warnings)) + return v + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "warnings": [], + "service_meta_": { + "name": "variation-normalizer", + "version": __version__, + "response_datetime": "2021-04-05T16:44:15.367831", + "url": "https://github.com/cancervariants/variation-normalization", + }, + } + } + ) + + +class NormalizeService(ServiceResponse): + """A response to normalizing a variation to a single GA4GH VRS Variation""" + + variation_query: StrictStr + variation: Optional[ + Union[models.Allele, models.CopyNumberCount, models.CopyNumberChange] + ] = None + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "variation_query": "BRAF V600E", + "variation": { + "id": "ga4gh:VA.4XBXAxSAk-WyAu5H0S1-plrk_SCTW1PO", + "location": { + "id": "ga4gh:SL.ZA1XNKhCT_7m2UtmnYb8ZYOVS4eplMEK", + "end": 600, + "start": 599, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.cQvw4UsHHRRlogxbWCB8W-mKD4AraM9y", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "E", "type": "LiteralSequenceExpression"}, + "type": "Allele", + }, + "service_meta_": { + "name": "variation-normalizer", + "version": __version__, + "response_datetime": "2022-01-26T22:23:41.821673", + "url": "https://github.com/cancervariants/variation-normalization", + }, + } + } + ) + + +class TranslateIdentifierService(ServiceResponse): + """A response to translating identifiers.""" + + identifier_query: StrictStr + aliases: List[StrictStr] = [] + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "identifier_query": "NP_004324.2", + "warnings": [], + "aliases": [ + "Ensembl:ENSP00000288602.6", + "ensembl:ENSP00000288602.6", + "Ensembl:ENSP00000493543.1", + "ensembl:ENSP00000493543.1", + "MD5:74c9b69323bd112084c1b5b385e7e6c5", + "NCBI:NP_004324.2", + "refseq:NP_004324.2", + "SEGUID:sfzILpNpX8UFB/vgH9LOKLpl/+g", + "SHA1:b1fcc82e93695fc50507fbe01fd2ce28ba65ffe8", + "VMC:GS_cQvw4UsHHRRlogxbWCB8W-mKD4AraM9y", + "sha512t24u:cQvw4UsHHRRlogxbWCB8W-mKD4AraM9y", + "ga4gh:SQ.cQvw4UsHHRRlogxbWCB8W-mKD4AraM9y", + ], + "service_meta_": { + "name": "variation-normalizer", + "version": __version__, + "response_datetime": "2021-11-18T14:10:53.909158", + "url": "https://github.com/cancervariants/variation-normalization", + }, + } + } + ) diff --git a/variation/schemas/service_schema.py b/variation/schemas/service_schema.py new file mode 100644 index 0000000..acc1466 --- /dev/null +++ b/variation/schemas/service_schema.py @@ -0,0 +1,73 @@ +"""Module containing schemas for services""" +from enum import Enum + +from cool_seq_tool.schemas import ToCdnaService as ToCdna +from cool_seq_tool.schemas import ToGenomicService as ToGenomic +from pydantic import ConfigDict + +from variation.schemas.normalize_response_schema import ServiceMeta +from variation.version import __version__ + + +class ClinVarAssembly(str, Enum): + """Define assemblies in ClinVar""" + + GRCH38 = "GRCh38" + GRCH37 = "GRCh37" + NCBI36 = "NCBI36" + HG38 = "hg38" + HG19 = "hg19" + HG18 = "hg18" + + +class ToCdnaService(ToCdna): + """Service model response for protein -> cDNA""" + + service_meta: ServiceMeta + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "c_data": { + "c_ac": "NM_004333.6", + "c_start_pos": 1797, + "c_end_pos": 1800, + "cds_start": 226, + "residue_mode": "inter-residue", + }, + "warnings": [], + "service_meta": { + "version": __version__, + "response_datetime": "2022-09-29T15:08:18.696882", + "name": "variation-normalizer", + "url": "https://github.com/cancervariants/variation-normalization", + }, + } + } + ) + + +class ToGenomicService(ToGenomic): + """Model response for genomic representation""" + + service_meta: ServiceMeta + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "g_data": { + "g_ac": "NC_000007.13", + "g_start_pos": 140453134, + "g_end_pos": 140453137, + "residue_mode": "inter-residue", + }, + "warnings": [], + "service_meta": { + "version": __version__, + "response_datetime": "2022-09-29T15:08:18.696882", + "name": "variation-normalizer", + "url": "https://github.com/cancervariants/variation-normalization", + }, + } + } + ) diff --git a/variation/schemas/to_vrs_response_schema.py b/variation/schemas/to_vrs_response_schema.py new file mode 100644 index 0000000..ed89932 --- /dev/null +++ b/variation/schemas/to_vrs_response_schema.py @@ -0,0 +1,98 @@ +"""Module for to_vrs endpoint response schema.""" +from typing import List, Union + +from ga4gh.vrs import models +from pydantic import BaseModel, ConfigDict, StrictStr + +from variation.schemas.normalize_response_schema import ServiceMeta +from variation.version import __version__ + + +class ToVRSService(BaseModel): + """Define model for translation response.""" + + search_term: StrictStr + warnings: List[StrictStr] = [] + variations: Union[ + List[models.Allele], + List[models.CopyNumberCount], + List[models.CopyNumberChange], + ] = [] + service_meta_: ServiceMeta + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "search_term": "BRAF V600E", + "warnings": [], + "variations": [ + { + "id": "ga4gh:VA.PJu8CCaVzEyqXMAEcMNegyDWyvT_jzNn", + "location": { + "id": "ga4gh:SL.EpHaD2ygDuPMvyURI9L4yetEwF3W0G7G", + "end": 600, + "start": 599, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.ZJwurRo2HLY018wghYjDKSfIlEH0Y8At", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "E", "type": "LiteralSequenceExpression"}, + "type": "Allele", + }, + { + "id": "ga4gh:VA.4XBXAxSAk-WyAu5H0S1-plrk_SCTW1PO", + "location": { + "id": "ga4gh:SL.ZA1XNKhCT_7m2UtmnYb8ZYOVS4eplMEK", + "end": 600, + "start": 599, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.cQvw4UsHHRRlogxbWCB8W-mKD4AraM9y", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "E", "type": "LiteralSequenceExpression"}, + "type": "Allele", + }, + { + "id": "ga4gh:VA.c-oRhbu7nDrBrSW2fPbFlDM15V6jiaho", + "location": { + "id": "ga4gh:SL.gkevJbLNOScKXhxhzOZXiG3hW8zeyo-q", + "start": 599, + "end": 600, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.lKdPZpuT-VNvRuKDjsUItNgutfWYgWQd", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "E", "type": "LiteralSequenceExpression"}, + "type": "Allele", + }, + { + "id": "ga4gh:VA.3ex0cvKXjHbq8NLuitOAfVwSPzqZUFrR", + "location": { + "id": "ga4gh:SL.Q4MXez2kHFPQqGJKLP8quVHAskuCrOAA", + "start": 599, + "end": 600, + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.0Q-SgJX1V3seUUIu3qVUtEa55CQsGmEU", + }, + "type": "SequenceLocation", + }, + "state": {"sequence": "E", "type": "LiteralSequenceExpression"}, + "type": "Allele", + }, + ], + "service_meta_": { + "name": "variation-normalizer", + "version": __version__, + "response_datetime": "2023-08-24T09:05:03.622667", + "url": "https://github.com/cancervariants/variation-normalization", + }, + } + } + ) diff --git a/variation/schemas/token_response_schema.py b/variation/schemas/token_response_schema.py new file mode 100644 index 0000000..e7b84db --- /dev/null +++ b/variation/schemas/token_response_schema.py @@ -0,0 +1,257 @@ +"""Module for schemas related to tokenization.""" +from enum import Enum +from typing import Literal, Optional + +from cool_seq_tool.schemas import AnnotationLayer +from ga4gh.core import core_models +from pydantic import BaseModel, StrictInt, StrictStr + +from variation.schemas.app_schemas import AmbiguousRegexType +from variation.schemas.variation_schema import ( + Deletion, + DelIns, + DupDelAmbiguous, + Duplication, + Insertion, + ProteinDeletion, + ProteinDelIns, + ProteinInsertion, + ProteinReferenceAgree, + ReferenceAgree, + StopGain, + Substitution, +) + + +class TokenType(str, Enum): + """Define token types.""" + + AMPLIFICATION = "amplification" + CDNA_DELETION = "cdna_deletion" + CDNA_DELINS = "cdna_delins" + CDNA_INSERTION = "cdna_insertion" + CDNA_REFERENCE_AGREE = "cdna_reference_agree" + CDNA_SUBSTITUTION = "cdna_substitution" + GENE = "gene" + GENOMIC_DELETION = "genomic_deletion" + GENOMIC_DELETION_AMBIGUOUS = "genomic_deletion_ambiguous" + GENOMIC_DELINS = "genomic_delins" + GENOMIC_DUPLICATION = "genomic_duplication" + GENOMIC_DUPLICATION_AMBIGUOUS = "genomic_duplication_ambiguous" + GENOMIC_INSERTION = "genomic_insertion" + GENOMIC_REFERENCE_AGREE = "genomic_reference_agree" + GENOMIC_SUBSTITUTION = "genomic_substitution" + GNOMAD_VCF = "gnomad_vcf" + HGVS = "hgvs" + PROTEIN_STOP_GAIN = "protein_stop_gain" + PROTEIN_DELETION = "protein_deletion" + PROTEIN_DELINS = "protein_delins" + PROTEIN_INSERTION = "protein_insertion" + PROTEIN_SUBSTITUTION = "protein_substitution" + PROTEIN_REFERENCE_AGREE = "protein_reference_agree" + UNKNOWN = "unknown" + + +class AltType(str, Enum): + """Define alteration types.""" + + AMPLIFICATION = "amplification" + DELETION = "deletion" + DELETION_AMBIGUOUS = "deletion_ambiguous" + DELINS = "delins" + DUPLICATION = "duplication" + DUPLICATION_AMBIGUOUS = "duplication_ambiguous" + INSERTION = "insertion" + NONSENSE = "nonsense" + REFERENCE_AGREE = "reference_agree" + SUBSTITUTION = "substitution" + STOP_GAIN = "stop_gain" + + +# Ambiguous region alt types +AMBIGUOUS_REGIONS = {AltType.DELETION_AMBIGUOUS, AltType.DUPLICATION_AMBIGUOUS} + + +class Token(BaseModel): + """A string from a given query.""" + + token: StrictStr + token_type: TokenType + input_string: StrictStr + + +class HgvsToken(Token): + """HGVS Token""" + + token_type: Literal[TokenType.HGVS] = TokenType.HGVS + accession: StrictStr + coordinate_type: AnnotationLayer + change: StrictStr + + +class GnomadVcfToken(Token): + """Gnomad VCF Token""" + + token_type: Literal[TokenType.GNOMAD_VCF] = TokenType.GNOMAD_VCF + coordinate_type: Literal[AnnotationLayer.GENOMIC] = AnnotationLayer.GENOMIC + chromosome: StrictStr + pos: StrictInt + ref: StrictStr + alt: StrictStr + + +class GenomicSubstitutionToken(Token, Substitution): + """Genomic substitution token""" + + token_type: Literal[TokenType.GENOMIC_SUBSTITUTION] = TokenType.GENOMIC_SUBSTITUTION + coordinate_type: Literal[AnnotationLayer.GENOMIC] = AnnotationLayer.GENOMIC + + +class CdnaSubstitutionToken(Token, Substitution): + """Token for substitution on cDNA reference sequence""" + + token_type: Literal[TokenType.CDNA_SUBSTITUTION] = TokenType.CDNA_SUBSTITUTION + coordinate_type: Literal[AnnotationLayer.CDNA] = AnnotationLayer.CDNA + + +class ProteinSubstitutionToken(Token, Substitution): + """Token for substitution on protein reference sequence""" + + token_type: Literal[TokenType.PROTEIN_SUBSTITUTION] = TokenType.PROTEIN_SUBSTITUTION + coordinate_type: Literal[AnnotationLayer.PROTEIN] = AnnotationLayer.PROTEIN + + +class ProteinStopGainToken(Token, StopGain): + """Token for stop gain on protein reference sequence""" + + token_type: Literal[TokenType.PROTEIN_STOP_GAIN] = TokenType.PROTEIN_STOP_GAIN + coordinate_type: Literal[AnnotationLayer.PROTEIN] = AnnotationLayer.PROTEIN + + +class ProteinReferenceAgreeToken(Token, ProteinReferenceAgree): + """Token for reference agree on protein reference sequence""" + + token_type: Literal[ + TokenType.PROTEIN_REFERENCE_AGREE + ] = TokenType.PROTEIN_REFERENCE_AGREE + coordinate_type: Literal[AnnotationLayer.PROTEIN] = AnnotationLayer.PROTEIN + + +class CdnaReferenceAgreeToken(Token, ReferenceAgree): + """Token for reference agree on cDNA reference sequence""" + + coordinate_type: Literal[AnnotationLayer.CDNA] = AnnotationLayer.CDNA + token_type: Literal[TokenType.CDNA_REFERENCE_AGREE] = TokenType.CDNA_REFERENCE_AGREE + + +class GenomicReferenceAgreeToken(Token, ReferenceAgree): + """Token for reference agree on genomic reference sequence""" + + coordinate_type: Literal[AnnotationLayer.GENOMIC] = AnnotationLayer.GENOMIC + token_type: Literal[ + TokenType.GENOMIC_REFERENCE_AGREE + ] = TokenType.GENOMIC_REFERENCE_AGREE + + +class ProteinDeletionToken(Token, ProteinDeletion): + """Token for deletion on protein reference sequence""" + + token_type: Literal[TokenType.PROTEIN_DELETION] = TokenType.PROTEIN_DELETION + coordinate_type: Literal[AnnotationLayer.PROTEIN] = AnnotationLayer.PROTEIN + + +class CdnaDeletionToken(Token, Deletion): + """Token for deletion on cdna reference sequence""" + + token_type: Literal[TokenType.CDNA_DELETION] = TokenType.CDNA_DELETION + coordinate_type: Literal[AnnotationLayer.CDNA] = AnnotationLayer.CDNA + + +class GenomicDeletionToken(Token, Deletion): + """Token for deletion on genomic reference sequence""" + + token_type: Literal[TokenType.GENOMIC_DELETION] = TokenType.GENOMIC_DELETION + coordinate_type: Literal[AnnotationLayer.GENOMIC] = AnnotationLayer.GENOMIC + + +class GenomicDeletionAmbiguousToken(Token, DupDelAmbiguous): + """Token for ambiguous deletion on genomic reference sequence""" + + token_type: Literal[ + TokenType.GENOMIC_DELETION_AMBIGUOUS + ] = TokenType.GENOMIC_DELETION_AMBIGUOUS + coordinate_type: Literal[AnnotationLayer.GENOMIC] = AnnotationLayer.GENOMIC + ambiguous_regex_type: AmbiguousRegexType + + +class ProteinDelInsToken(Token, ProteinDelIns): + """Token for delins on protein reference sequence""" + + token_type: Literal[TokenType.PROTEIN_DELINS] = TokenType.PROTEIN_DELINS + coordinate_type: Literal[AnnotationLayer.PROTEIN] = AnnotationLayer.PROTEIN + + +class CdnaDelInsToken(Token, DelIns): + """Token for delins on cdna reference sequence""" + + token_type: Literal[TokenType.CDNA_DELINS] = TokenType.CDNA_DELINS + coordinate_type: Literal[AnnotationLayer.CDNA] = AnnotationLayer.CDNA + + +class GenomicDelInsToken(Token, DelIns): + """Token for delins on genomic reference sequence""" + + token_type: Literal[TokenType.GENOMIC_DELINS] = TokenType.GENOMIC_DELINS + coordinate_type: Literal[AnnotationLayer.GENOMIC] = AnnotationLayer.GENOMIC + + +class CdnaInsertionToken(Token, Insertion): + """Token for insertion on cdna reference sequence""" + + token_type: Literal[TokenType.CDNA_INSERTION] = TokenType.CDNA_INSERTION + coordinate_type: Literal[AnnotationLayer.CDNA] = AnnotationLayer.CDNA + + +class GenomicInsertionToken(Token, Insertion): + """Token for insertion on genomic reference sequence""" + + token_type: Literal[TokenType.GENOMIC_INSERTION] = TokenType.GENOMIC_INSERTION + coordinate_type: Literal[AnnotationLayer.GENOMIC] = AnnotationLayer.GENOMIC + + +class ProteinInsertionToken(Token, ProteinInsertion): + """Token for insertion on protein reference sequence""" + + token_type: Literal[TokenType.PROTEIN_INSERTION] = TokenType.PROTEIN_INSERTION + coordinate_type: Literal[AnnotationLayer.PROTEIN] = AnnotationLayer.PROTEIN + + +class GenomicDuplicationToken(Token, Duplication): + """Duplication on genomic reference sequence""" + + token_type: Literal[TokenType.GENOMIC_DUPLICATION] = TokenType.GENOMIC_DUPLICATION + coordinate_type: Literal[AnnotationLayer.GENOMIC] = AnnotationLayer.GENOMIC + + +class GenomicDuplicationAmbiguousToken(Token, DupDelAmbiguous): + """Ambiguous duplication on genomic reference sequence""" + + token_type: Literal[ + TokenType.GENOMIC_DUPLICATION_AMBIGUOUS + ] = TokenType.GENOMIC_DUPLICATION_AMBIGUOUS + coordinate_type: Literal[AnnotationLayer.GENOMIC] = AnnotationLayer.GENOMIC + ambiguous_regex_type: AmbiguousRegexType + + +class AmplificationToken(Token): + """Token for amplification""" + + token_type: Literal[TokenType.AMPLIFICATION] = TokenType.AMPLIFICATION + + +class GeneToken(Token): + """Token for genes""" + + matched_value: StrictStr + token_type: Literal[TokenType.GENE] = TokenType.GENE + gene: Optional[core_models.Gene] = None diff --git a/variation/schemas/translation_response_schema.py b/variation/schemas/translation_response_schema.py new file mode 100644 index 0000000..2652ae8 --- /dev/null +++ b/variation/schemas/translation_response_schema.py @@ -0,0 +1,34 @@ +"""Module for Translation Response Schema.""" +from enum import Enum +from typing import Dict, Optional + +from cool_seq_tool.schemas import TranscriptPriority +from pydantic import BaseModel, StrictStr + +from variation.schemas.validation_response_schema import ValidationResult + + +class VrsSeqLocAcStatus(str, Enum): + """Create enum for VRS SequenceLocation accession status. + Order when defining matters. + First has highest priority, last has lowest priority + """ + + MANE_SELECT = TranscriptPriority.MANE_SELECT.value + MANE_PLUS_CLINICAL = TranscriptPriority.MANE_PLUS_CLINICAL.value + LONGEST_COMPATIBLE_REMAINING = TranscriptPriority.LONGEST_COMPATIBLE_REMAINING.value + GRCH38 = TranscriptPriority.GRCH38.value + NA = "na" + + +AC_PRIORITY_LABELS = [m for m in VrsSeqLocAcStatus.__members__.values()] + + +class TranslationResult(BaseModel): + """Translation Result""" + + vrs_variation: Optional[Dict] = {} + vrs_seq_loc_ac: Optional[StrictStr] = None + vrs_seq_loc_ac_status: VrsSeqLocAcStatus = VrsSeqLocAcStatus.NA + og_ac: Optional[StrictStr] = None + validation_result: ValidationResult diff --git a/variation/schemas/validation_response_schema.py b/variation/schemas/validation_response_schema.py new file mode 100644 index 0000000..00e8eb2 --- /dev/null +++ b/variation/schemas/validation_response_schema.py @@ -0,0 +1,24 @@ +"""Module for Validation Response Schema.""" +from typing import List, Optional + +from pydantic import BaseModel, StrictBool, StrictInt, StrictStr + +from variation.schemas.classification_response_schema import Classification + + +class ValidationResult(BaseModel): + """Validation Results for a given input""" + + accession: Optional[StrictStr] = None + cds_start: Optional[StrictInt] = None # This is only for cDNA + classification: Classification + is_valid: StrictBool + errors: List[StrictStr] = [] + + +class ValidationSummary(BaseModel): + """Give Valid and Invalid Results for a given input.""" + + valid_results: List[ValidationResult] = [] + invalid_results: List[ValidationResult] = [] + warnings: List[StrictStr] = [] diff --git a/variation/schemas/variation_schema.py b/variation/schemas/variation_schema.py new file mode 100644 index 0000000..b384df7 --- /dev/null +++ b/variation/schemas/variation_schema.py @@ -0,0 +1,91 @@ +"""Define supported variation types""" +from typing import Literal, Optional, Union + +from pydantic import BaseModel, StrictInt, StrictStr + + +class Substitution(BaseModel): + """Define model for substitution variation""" + + pos: StrictInt + ref: StrictStr + alt: StrictStr + + +class StopGain(Substitution): + """Define model for stop gain variation""" + + alt: Literal["*"] = "*" + + +class Deletion(BaseModel): + """Define model for deletion variation""" + + pos0: StrictInt + pos1: Optional[StrictInt] = None + deleted_sequence: Optional[StrictStr] = None + + +class ProteinDeletion(Deletion): + """Define model for protein deletion""" + + aa0: StrictStr + aa1: Optional[StrictStr] = None + + +class Insertion(BaseModel): + """Define model for insertion variation""" + + pos0: StrictInt + pos1: StrictInt + inserted_sequence: StrictStr + + +class ProteinInsertion(Insertion): + """Define model for protein insertion variation""" + + aa0: StrictStr + aa1: StrictStr + + +class ReferenceAgree(BaseModel): + """Define model for reference agree variation""" + + pos: StrictInt + + +class ProteinReferenceAgree(ReferenceAgree): + """Define model for protein reference agree variation""" + + ref: StrictStr + + +class DelIns(BaseModel): + """Define model for delins variation""" + + pos0: StrictInt + pos1: Optional[StrictInt] = None + inserted_sequence: StrictStr + + +class ProteinDelIns(DelIns): + """Define model for protein delins variation""" + + aa0: StrictStr + aa1: Optional[StrictStr] = None + + +class Duplication(BaseModel): + """Define model for duplication variation""" + + pos0: StrictInt + pos1: Optional[StrictInt] = None + + +class DupDelAmbiguous(BaseModel): + """Define model for duplication/deletion ambiguous variation""" + + pos0: Union[StrictInt, Literal["?"]] + pos1: Optional[Union[StrictInt, Literal["?"]]] = None + pos2: Union[StrictInt, Literal["?"]] + pos3: Optional[Union[StrictInt, Literal["?"]]] = None diff --git a/variation/schemas/vrs_python_translator_schema.py b/variation/schemas/vrs_python_translator_schema.py new file mode 100644 index 0000000..6e36b00 --- /dev/null +++ b/variation/schemas/vrs_python_translator_schema.py @@ -0,0 +1,122 @@ +"""Module for vrs-python translator endpoint response schema""" +from enum import Enum +from typing import List, Literal, Optional, Union + +from ga4gh.vrs import models +from pydantic import BaseModel, ConfigDict, StrictStr + +from variation.schemas.normalize_response_schema import ServiceMeta + + +class VrsPythonMeta(BaseModel): + """Metadata regarding vrs-python dependency""" + + name: Literal["vrs-python"] = "vrs-python" + version: StrictStr + url: Literal[ + "https://github.com/ga4gh/vrs-python" + ] = "https://github.com/ga4gh/vrs-python" + + +class TranslateFromFormat(str, Enum): + """Enums for formats that vrs-python can translate from""" + + HGVS = "hgvs" + BEACON = "beacon" + GNOMAD = "gnomad" + SPDI = "spdi" + + +class TranslateToFormat(str, Enum): + """Enums for formats that vrs-python can translate to""" + + HGVS = "hgvs" + SPDI = "spdi" + + +class TranslateToQuery(BaseModel): + """Query fields for Translate To Service""" + + variation: models.Allele + fmt: TranslateToFormat + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "variation": { + "id": "ga4gh:VA.ztz4yxckrW1j7YFSprOz_T9gwLdMc6LB", + "type": "Allele", + "location": { + "id": "ga4gh:SL.txr-jqnTLuz_3RVrPamx9cYniAFJg977", + "type": "SequenceLocation", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.IW78mgV5Cqf6M24hy52hPjyyo5tCCd86", + }, + "start": 140453135, + "end": 140453136, + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "T"}, + }, + "fmt": "hgvs", + } + } + ) + + +class TranslateToHGVSQuery(BaseModel): + """Query fields for Translate To HGVS Service""" + + variation: models.Allele + namespace: Optional[str] = None + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "variation": { + "id": "ga4gh:VA.ztz4yxckrW1j7YFSprOz_T9gwLdMc6LB", + "type": "Allele", + "location": { + "id": "ga4gh:SL.txr-jqnTLuz_3RVrPamx9cYniAFJg977", + "type": "SequenceLocation", + "sequenceReference": { + "type": "SequenceReference", + "refgetAccession": "SQ.IW78mgV5Cqf6M24hy52hPjyyo5tCCd86", + }, + "start": 140453135, + "end": 140453136, + }, + "state": {"type": "LiteralSequenceExpression", "sequence": "T"}, + }, + "namespace": "refseq", + } + } + ) + + +class TranslateFromQuery(BaseModel): + """Query fields for Translate From Service""" + + variation: StrictStr + fmt: Optional[TranslateFromFormat] = None + + +class TranslateService(BaseModel): + """Response schema for vrs-python translator endpoints""" + + query: Union[TranslateFromQuery, TranslateToQuery, TranslateToHGVSQuery] + warnings: List[StrictStr] = [] + service_meta_: ServiceMeta + vrs_python_meta_: VrsPythonMeta + + +class TranslateFromService(TranslateService): + """Response schema for vrs-python translate from endpoint""" + + variation: Optional[models.Allele] = None + + +class TranslateToService(TranslateService): + """Response schema for vrs-python translate to endpoint""" + + variations: List[StrictStr] diff --git a/variation/to_copy_number_variation.py b/variation/to_copy_number_variation.py new file mode 100644 index 0000000..3c71f39 --- /dev/null +++ b/variation/to_copy_number_variation.py @@ -0,0 +1,720 @@ +"""Module for to copy number variation translation""" +from datetime import datetime +from typing import Dict, List, NamedTuple, Optional, Tuple, Union +from urllib.parse import unquote + +from cool_seq_tool.handlers import SeqRepoAccess +from cool_seq_tool.sources import UtaDatabase +from ga4gh.core import ga4gh_identify +from ga4gh.vrs import models +from gene.query import QueryHandler as GeneQueryHandler +from gene.schemas import MatchType as GeneMatchType +from pydantic import ValidationError + +from variation.classify import Classify +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ClassificationType +from variation.schemas.copy_number_schema import ( + AmplificationToCxVarQuery, + AmplificationToCxVarService, + Comparator, + ParsedPosType, + ParsedToCnVarQuery, + ParsedToCnVarService, + ParsedToCxVarQuery, + ParsedToCxVarService, +) +from variation.schemas.hgvs_to_copy_number_schema import ( + HgvsToCopyNumberChangeService, + HgvsToCopyNumberCountService, +) +from variation.schemas.normalize_response_schema import ( + HGVSDupDelModeOption, + ServiceMeta, +) +from variation.schemas.service_schema import ClinVarAssembly +from variation.schemas.token_response_schema import TokenType +from variation.schemas.validation_response_schema import ValidationResult +from variation.to_vrs import ToVRS +from variation.tokenize import Tokenize +from variation.translate import Translate +from variation.utils import get_priority_sequence_location +from variation.validate import Validate +from variation.version import __version__ + +VALID_CLASSIFICATION_TYPES = [ + ClassificationType.GENOMIC_DUPLICATION, + ClassificationType.GENOMIC_DUPLICATION_AMBIGUOUS, + ClassificationType.GENOMIC_DELETION, + ClassificationType.GENOMIC_DELETION_AMBIGUOUS, +] + + +class ToCopyNumberError(Exception): + """Custom exceptions when representing copy number""" + + +class ParsedAccessionSummary(NamedTuple): + """Represents accession for parsed endpoints""" + + accession: str + lifted_over: bool + + +class ParsedChromosomeSummary(NamedTuple): + """Represents chromosome and assembly for parsed endpoints""" + + accession: str + chromosome: str + lifted_over: bool + + +class ToCopyNumberVariation(ToVRS): + """Class for representing copy number variation""" + + def __init__( + self, + seqrepo_access: SeqRepoAccess, + tokenizer: Tokenize, + classifier: Classify, + validator: Validate, + translator: Translate, + gene_normalizer: GeneQueryHandler, + uta: UtaDatabase, + ) -> None: + """Initialize theToCopyNumberVariation class + + :param seqrepo_access: Access to SeqRepo + :param tokenizer: Instance for tokenizing input strings + :param classifier: Instance for classifying list of ordered tokens + :param validator: Instance for validating classification + :param translator: Instance for translating valid results to VRS representations + :param gene_normalizer: Client for normalizing gene concepts + :param uta: Access to UTA queries + """ + super().__init__(seqrepo_access, tokenizer, classifier, validator, translator) + self.gene_normalizer = gene_normalizer + self.uta = uta + + async def _get_valid_results(self, q: str) -> Tuple[List[ValidationResult], List]: + """Get valid results for to copy number variation endpoint + + :param q: Input query string + :return: Valid results and list of warnings + """ + valid_results = [] + warnings = [] + + # Get tokens for input query + tokens = self.tokenizer.perform(unquote(q.strip()), warnings) + if not tokens: + return valid_results, warnings + + # Get classification for list of tokens + classification = self.classifier.perform(tokens) + if not classification: + warnings.append(f"Unable to find classification for: {q}") + return valid_results, warnings + + # Ensure that classification is HGVS duplication or deletion + tmp_classification = None + if all( + ( + classification.classification_type in VALID_CLASSIFICATION_TYPES, + TokenType.HGVS + in {t.token_type for t in classification.matching_tokens}, + ) + ): + tmp_classification = classification + + classification = tmp_classification + if not classification: + warnings = [f"{q} is not a supported HGVS genomic duplication or deletion"] + return valid_results, warnings + + # Get validation summary for classification + validation_summary = await self.validator.perform(classification) + if validation_summary.valid_results: + valid_results = validation_summary.valid_results + else: + warnings = validation_summary.warnings + valid_results = [] + + return valid_results, warnings + + async def _hgvs_to_cnv_resp( + self, + copy_number_type: HGVSDupDelModeOption, + do_liftover: bool, + valid_results: Tuple[List[ValidationResult], List[str]], + warnings: List[str], + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + ) -> Tuple[ + Optional[Union[models.CopyNumberCount, models.CopyNumberChange]], List[str] + ]: + """Return copy number variation and warnings response + + :param copy_number_type: The type of copy number variation. Must be either + `copy_number_count` or `copy_number_change` + :param hgvs_expr: HGVS expression + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :param Valid results and warnings for hgvs_expr + :param warnings: List of warnings + :return: CopyNumberVariation and warnings + """ + variation = None + if valid_results: + if copy_number_type == HGVSDupDelModeOption.COPY_NUMBER_CHANGE: + endpoint_name = Endpoint.HGVS_TO_COPY_NUMBER_CHANGE + else: + endpoint_name = Endpoint.HGVS_TO_COPY_NUMBER_COUNT + + translations, warnings = await self.get_translations( + valid_results, + warnings, + hgvs_dup_del_mode=copy_number_type, + endpoint_name=endpoint_name, + copy_change=copy_change, + baseline_copies=baseline_copies, + do_liftover=do_liftover, + ) + if translations: + variation = translations[0].vrs_variation + + if variation: + if copy_number_type == HGVSDupDelModeOption.COPY_NUMBER_COUNT: + variation = models.CopyNumberCount(**variation) + else: + variation = models.CopyNumberChange(**variation) + return variation, warnings + + async def hgvs_to_copy_number_count( + self, + hgvs_expr: str, + baseline_copies: int, + do_liftover: bool = False, + ) -> HgvsToCopyNumberCountService: + """Given hgvs, return abolute copy number variation + + :param hgvs_expr: HGVS expression + :param baseline_copies: Baseline copies number + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: HgvsToCopyNumberCountService containing Copy Number Count + Variation and warnings + """ + valid_results, warnings = await self._get_valid_results(hgvs_expr) + cn_var, warnings = await self._hgvs_to_cnv_resp( + HGVSDupDelModeOption.COPY_NUMBER_COUNT, + do_liftover, + valid_results, + warnings, + baseline_copies=baseline_copies, + ) + + return HgvsToCopyNumberCountService( + hgvs_expr=hgvs_expr, + warnings=warnings, + service_meta_=ServiceMeta( + version=__version__, response_datetime=datetime.now() + ), + copy_number_count=cn_var, + ) + + async def hgvs_to_copy_number_change( + self, + hgvs_expr: str, + copy_change: Optional[models.CopyChange], + do_liftover: bool = False, + ) -> HgvsToCopyNumberChangeService: + """Given hgvs, return copy number change variation + + :param hgvs_expr: HGVS expression + :param copy_change: The copy change + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: HgvsToCopyNumberChangeService containing Copy Number Change + Variation and warnings + """ + valid_results, warnings = await self._get_valid_results(hgvs_expr) + cx_var, warnings = await self._hgvs_to_cnv_resp( + HGVSDupDelModeOption.COPY_NUMBER_CHANGE, + do_liftover, + valid_results, + warnings, + copy_change=copy_change, + ) + + return HgvsToCopyNumberChangeService( + hgvs_expr=hgvs_expr, + warnings=warnings, + service_meta_=ServiceMeta( + version=__version__, response_datetime=datetime.now() + ), + copy_number_change=cx_var, + ) + + def _get_parsed_ac( + self, assembly: ClinVarAssembly, chromosome: str, use_grch38: bool = False + ) -> ParsedAccessionSummary: + """Get accession for parsed components + + :param assembly: Assembly + :param chromosome: Chromosome + :param use_grch38: Whether or not to use GRCh38 assembly + :raises ToCopyNumberError: If unable to translate assembly and chromosome + to an accession + :return: ParsedAccessionSummary containing accession and whether or not it was + lifted over + """ + accession = None + lifted_over = False + og_assembly = assembly + + if assembly == ClinVarAssembly.HG38: + assembly = ClinVarAssembly.GRCH38 + elif assembly == ClinVarAssembly.HG19: + assembly = ClinVarAssembly.GRCH37 + elif assembly == ClinVarAssembly.HG18: + assembly = ClinVarAssembly.NCBI36 + + if use_grch38 and assembly != ClinVarAssembly.GRCH38: + lifted_over = True + assembly = ClinVarAssembly.GRCH38 + + if assembly != ClinVarAssembly.NCBI36: + # Variation Normalizer does not support NCBI36 yet + query = f"{assembly.value}:{chromosome}" + aliases, error = self.seqrepo_access.translate_identifier(query, "ga4gh") + if aliases: + accession = aliases[0] + else: + raise ToCopyNumberError(str(error)) + else: + raise ToCopyNumberError( + f"{og_assembly.value} assembly is not currently supported" + ) + + return ParsedAccessionSummary(lifted_over=lifted_over, accession=accession) + + def _get_parsed_ac_chr( + self, accession: str, do_liftover: bool + ) -> ParsedChromosomeSummary: + """Get accession and chromosome for parsed components + + :param accession: Genomic accession + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :raises ToCopyNumberError: If unable to translate accession + :return: ParsedChromosomeSummary containing chromosome, accession, and whether + or not it was lifted over + """ + chromosome = None + new_ac = None + lifted_over = False + + aliases, error = self.seqrepo_access.translate_identifier(accession) + if error: + raise ToCopyNumberError(error) + + grch_aliases = [ + a for a in aliases if a.startswith(("GRCh38:chr", "GRCh37:chr")) + ] + + if grch_aliases: + grch_record = grch_aliases[0] + chromosome = grch_record.split(":")[-1] + + if grch_record.startswith("GRCh38") or not do_liftover: + new_ac = [a for a in aliases if a.startswith("ga4gh")][0] + else: + grch38_query = grch_record.replace("GRCh37", "GRCh38") + aliases, error = self.seqrepo_access.translate_identifier( + grch38_query, "ga4gh" + ) + + if error: + raise ToCopyNumberError(error) + + lifted_over = True + new_ac = aliases[0] + else: + raise ToCopyNumberError(f"Not a supported genomic accession: {accession}") + + return ParsedChromosomeSummary( + accession=new_ac, chromosome=chromosome, lifted_over=lifted_over + ) + + def _validate_ac_pos(self, accession: str, pos: int) -> None: + """Validate position for parsed components + + :param accession: Genomic accession + :param pos: Position on accession + :raises ToCopyNumberError: If position is not valid on accession or + if accession is not found in seqrepo + """ + try: + ref = self.seqrepo_access.sr[accession][pos - 1] + except ValueError as e: + raise ToCopyNumberError( + f"SeqRepo ValueError: {str(e).replace('start', 'Position')}" + ) + except KeyError: + raise ToCopyNumberError(f"Accession not found in SeqRepo: {accession}") + else: + if ref == "": + raise ToCopyNumberError(f"Position ({pos}) is not valid on {accession}") + + def _get_vrs_loc_start_or_end( + self, + accession: str, + pos0: int, + pos_type: ParsedPosType, + is_start: bool = True, + pos1: Optional[int] = None, + comparator: Optional[Comparator] = None, + ) -> Union[int, models.Range]: + """Get VRS Sequence Location start and end values + + :param accession: Genomic accession for sequence + :param pos0: Position (residue coords). If `pos_type` is a definite range, + this will be the min start position + :param pos_type: Type of the pos value in VRS Sequence Location + :param is_start: `True` if position(s) describing VRS start value. `False` if + position(s) describing VRS end value + :param pos1: Only set when end is a definite range, this will be the max end + position + :param comparator: Must provide when `pos_type` is an Indefinite Range. + Indicates which direction the range is indefinite. To represent (#_?), set + to '<='. To represent (?_#), set to '>='. + :raises ToCopyNumberError: If position is not valid on accession when + using definite range + :return: VRS start or end value for sequence location + """ + if pos_type == ParsedPosType.NUMBER: + vrs_val = pos0 - 1 if is_start else pos0 + elif pos_type == ParsedPosType.DEFINITE_RANGE: + self._validate_ac_pos(accession, pos1) + vrs_val = models.Range( + [pos0 - 1 if is_start else pos0, pos1 - 1 if is_start else pos1] + ) + else: + if comparator == Comparator.LT_OR_EQUAL: + vrs_val = models.Range([None, pos0 - 1 if is_start else pos0]) + else: + vrs_val = models.Range([pos0 - 1 if is_start else pos0, None]) + + return vrs_val + + def _get_parsed_seq_loc( + self, + accession: str, + chromosome: str, + start0: int, + start_pos_type: ParsedPosType, + end0: int, + end_pos_type: ParsedPosType, + start1: Optional[int] = None, + end1: Optional[int] = None, + liftover_pos: bool = False, + start_pos_comparator: Optional[Comparator] = None, + end_pos_comparator: Optional[Comparator] = None, + ) -> Tuple[Optional[Dict], Optional[str]]: + """Get sequence location for parsed components. Accession will be validated. + + :param accession: Genomic accession for sequence + :param chromosome: Chromosome + :param start0: Start position (residue coords). If start is a definite range, + this will be the min start position + :param start_pos_type: Type of the start value in VRS Sequence Location + :param end0: End position (residue coords). If end is a definite range, this + will be the min end position + :param end_pos_type: Type of the end value in VRS Sequence Location + :param start1: Only set when start is a definite range, this will be the max + start position + :param end1: Only set when end is a definite range, this will be the max end + position + :param liftover_pos: Whether or not to liftover positions + :param start_pos_comparator: Must provide when `start_pos_type` is an Indefinite + Range. Indicates which direction the range is indefinite. To represent + (#_?), set to '<='. To represent (?_#), set to '>='. + :param end_pos_comparator: Must provide when `end_pos_type` is an Indefinite + Range. Indicates which direction the range is indefinite. To represent + (#_?), set to '<='. To represent (?_#), set to '>='. + :raises ToCopyNumberError: If error lifting over positions, translating + accession, positions not valid on accession, + :return: Tuple containing VRS sequence location represented as dict (if valid) + and warning (if invalid) + """ + seq_loc = None + + # Liftover pos if needed + if liftover_pos: + liftover_pos = self._liftover_pos(chromosome, start0, end0, start1, end1) + start0 = liftover_pos["start0"] + end0 = liftover_pos["end0"] + start1 = liftover_pos["start1"] + end1 = liftover_pos["end1"] + + sequences, error = self.seqrepo_access.translate_identifier(accession, "ga4gh") + if error: + raise ToCopyNumberError(error) + + sequence = sequences[0].split("ga4gh:")[-1] + + for pos in [start0, end0]: + # validate start0 and end0 since they're always required + self._validate_ac_pos(accession, pos) + + start_vrs = self._get_vrs_loc_start_or_end( + accession, + start0, + start_pos_type, + is_start=True, + pos1=start1, + comparator=start_pos_comparator, + ) + + end_vrs = self._get_vrs_loc_start_or_end( + accession, + end0, + end_pos_type, + is_start=False, + pos1=end1, + comparator=end_pos_comparator, + ) + + seq_loc = models.SequenceLocation( + sequenceReference=models.SequenceReference(refgetAccession=sequence), + start=start_vrs, + end=end_vrs, + ) + seq_loc.id = ga4gh_identify(seq_loc) + + return seq_loc.model_dump(exclude_none=True) if seq_loc else seq_loc + + def _liftover_pos( + self, + chromosome: str, + start0: int, + end0: int, + start1: Optional[int], + end1: Optional[int], + ) -> Dict: + """Liftover GRCh37 positions to GRCh38 positions + + :param chromosome: Chromosome. Must be contain 'chr' prefix, i.e 'chr7'. + :param start0: Start position (residue coords) GRCh37 assembly. If start is a + definite range, this will be the min start position + :param end0: End position (residue coords) GRCh37 assembly. If end is a definite + range, this will be the min end position + :param start1: Only set when start is a definite range, this will be the max + start position. GRCh37 assembly + :param end1: Only set when end is a definite range, this will be the max end + position. GRCh37 assembly + :raises ToCopyNumberError: If unable to liftover position + :return: Dictionary containing lifted over positions + ('start0', 'end0', 'start1', 'end1') + """ + liftover_pos = {"start0": None, "end0": None, "start1": None, "end1": None} + + for k, pos in [ + ("start0", start0), + ("end0", end0), + ("start1", start1), + ("end1", end1), + ]: + if pos is not None: + liftover = self.uta.liftover_37_to_38.convert_coordinate( + chromosome, pos + ) + if not liftover: + raise ToCopyNumberError( + f"Unable to liftover: {chromosome} with pos {pos}" + ) + else: + liftover_pos[k] = liftover[0][1] + + return liftover_pos + + def parsed_to_copy_number( + self, request_body: Union[ParsedToCnVarQuery, ParsedToCxVarQuery] + ) -> Union[ParsedToCnVarService, ParsedToCxVarService]: + """Given parsed genomic components, return Copy Number Count or Copy Number + Change Variation + + :param request_body: request body + :return: If `copy_number_type` is Copy Number Count, return ParsedToCnVarService + containing Copy Number Count variation and list of warnings. Else, return + ParsedToCxVarService containing Copy Number Change variation and list of + warnings + """ + variation = None + warnings = [] + + is_cx = isinstance(request_body, ParsedToCxVarQuery) + lifted_over = False + + try: + if not request_body.accession: + accession_summary = self._get_parsed_ac( + request_body.assembly, + request_body.chromosome, + use_grch38=request_body.do_liftover, + ) + chromosome = request_body.chromosome + accession = accession_summary.accession + lifted_over = accession_summary.lifted_over + else: + chr_summary = self._get_parsed_ac_chr( + request_body.accession, request_body.do_liftover + ) + accession = chr_summary.accession + chromosome = chr_summary.chromosome + lifted_over = chr_summary.lifted_over + + seq_loc = self._get_parsed_seq_loc( + accession, + chromosome, + request_body.start0, + request_body.start_pos_type, + request_body.end0, + request_body.end_pos_type, + start1=request_body.start1, + end1=request_body.end1, + start_pos_comparator=request_body.start_pos_comparator, + end_pos_comparator=request_body.end_pos_comparator, + liftover_pos=request_body.do_liftover and lifted_over, + ) + except ToCopyNumberError as e: + warnings.append(str(e)) + else: + if is_cx: + variation = models.CopyNumberChange( + location=seq_loc, copyChange=request_body.copy_change + ) + variation.id = ga4gh_identify(variation) + else: + if request_body.copies_type == ParsedPosType.NUMBER: + copies = request_body.copies0 + elif request_body.copies_type == ParsedPosType.DEFINITE_RANGE: + copies = models.Range([request_body.copies0, request_body.copies1]) + else: + if request_body.copies_comparator == Comparator.LT_OR_EQUAL: + copies = models.Range([None, request_body.copies0]) + else: + copies = models.Range([request_body.copies0, None]) + variation = models.CopyNumberCount(location=seq_loc, copies=copies) + variation.id = ga4gh_identify(variation) + + service_params = { + "warnings": warnings, + "service_meta_": ServiceMeta( + version=__version__, response_datetime=datetime.now() + ), + } + + if is_cx: + service_params["copy_number_change"] = variation + else: + service_params["copy_number_count"] = variation + + return ( + ParsedToCxVarService(**service_params) + if is_cx + else ParsedToCnVarService(**service_params) + ) + + def amplification_to_cx_var( + self, + gene: str, + sequence_id: Optional[str] = None, + start: Optional[int] = None, + end: Optional[int] = None, + ) -> AmplificationToCxVarService: + """Return Copy Number Change Variation for Amplification query + Parameter priority: + 1. sequence_id, start, end (must provide ALL) + 2. use the gene-normalizer to get the SequenceLocation + + :param gene: Gene query + :param sequence_id: Sequence ID for the location. If set, must also provide + `start` and `end` + :param start: Start position as residue coordinate for the sequence location. If + set, must also provide `sequence` and `end` + :param end: End position as residue coordinate for the sequence location. If + set, must also provide `sequence` and `start` + :return: AmplificationToCxVarService containing Copy Number Change and + list of warnings + """ + warnings = list() + amplification_label = None + variation = None + try: + og_query = AmplificationToCxVarQuery( + gene=gene, sequence_id=sequence_id, start=start, end=end + ) + except ValidationError as e: + warnings.append(str(e)) + og_query = None + else: + # Need to validate the input gene + gene_norm_resp = self.gene_normalizer.normalize(gene) + if gene_norm_resp.match_type != GeneMatchType.NO_MATCH: + vrs_location = None + gene = gene_norm_resp.gene + gene_norm_label = gene.label + amplification_label = f"{gene_norm_label} Amplification" + if all((sequence_id, start, end)): + # User provided input to make sequence location + seq_id, w = self.seqrepo_access.translate_identifier( + sequence_id, "ga4gh" + ) + if w: + warnings.append(w) + else: + # Validate start/end are actually on the sequence + _, w = self.seqrepo_access.get_reference_sequence( + sequence_id, start=start, end=end + ) + if w: + warnings.append(w) + else: + vrs_location = models.SequenceLocation( + sequenceReference=models.SequenceReference( + refgetAccession=seq_id[0].split("ga4gh:")[-1] + ), + start=start - 1, + end=end, + ) + else: + # Use gene normalizer to get sequence location + seq_loc = get_priority_sequence_location(gene, self.seqrepo_access) + if seq_loc: + vrs_location = models.SequenceLocation(**seq_loc) + else: + warnings.append( + f"gene-normalizer could not find a priority sequence " + f"location for gene: {gene_norm_label}" + ) + + if vrs_location: + vrs_location.id = ga4gh_identify(vrs_location) + vrs_cx = models.CopyNumberChange( + location=vrs_location, + copyChange=models.CopyChange.EFO_0030072.value, + ) + vrs_cx.id = ga4gh_identify(vrs_cx) + variation = models.CopyNumberChange( + **vrs_cx.model_dump(exclude_none=True) + ) + else: + warnings.append(f"gene-normalizer returned no match for gene: {gene}") + + return AmplificationToCxVarService( + query=og_query, + amplification_label=amplification_label, + copy_number_change=variation, + warnings=warnings, + service_meta_=ServiceMeta( + version=__version__, response_datetime=datetime.now() + ), + ) diff --git a/variation/to_vrs.py b/variation/to_vrs.py new file mode 100644 index 0000000..9a1bd2c --- /dev/null +++ b/variation/to_vrs.py @@ -0,0 +1,147 @@ +"""Module for to_vrs endpoint.""" +from datetime import datetime +from typing import List, Optional, Tuple +from urllib.parse import unquote + +from cool_seq_tool.handlers import SeqRepoAccess +from ga4gh.vrs import models + +from variation.classify import Classify +from variation.schemas.app_schemas import Endpoint +from variation.schemas.normalize_response_schema import ( + HGVSDupDelModeOption, + ServiceMeta, +) +from variation.schemas.to_vrs_response_schema import ToVRSService +from variation.schemas.translation_response_schema import TranslationResult +from variation.schemas.validation_response_schema import ValidationResult +from variation.tokenize import Tokenize +from variation.translate import Translate +from variation.validate import Validate +from variation.version import __version__ +from variation.vrs_representation import VRSRepresentation + + +class ToVRS(VRSRepresentation): + """The class for translating variation strings to VRS representations.""" + + def __init__( + self, + seqrepo_access: SeqRepoAccess, + tokenizer: Tokenize, + classifier: Classify, + validator: Validate, + translator: Translate, + ) -> None: + """Initialize the ToVRS class. + + :param SeqRepoAccess seqrepo_access: Access to SeqRepo + :param Tokenize tokenizer: Tokenizer class for tokenizing + :param Classify classifier: Classifier class for classifying tokens + :param Validate validator: Validator class for validating valid inputs + :param Translate translator: Translating valid inputs + """ + super().__init__(seqrepo_access) + self.tokenizer = tokenizer + self.classifier = classifier + self.validator = validator + self.translator = translator + + async def get_translations( + self, + valid_results: List[ValidationResult], + warnings: List, + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Tuple[List[TranslationResult], List[str]]: + """Get translation results + + :param valid_results: List of valid results for a given input + :param warnings: List of warnings + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The copy change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRC3h8 assembly + :return: Tuple containing list of translations and list of warnings + """ + translations = [] + for valid_result in valid_results: + tr = await self.translator.perform( + valid_result, + warnings, + endpoint_name=endpoint_name, + hgvs_dup_del_mode=hgvs_dup_del_mode, + baseline_copies=baseline_copies, + copy_change=copy_change, + do_liftover=do_liftover, + ) + if tr and tr not in translations: + translations.append(tr) + + if not translations and not warnings: + warnings.append("Unable to translate variation") + + return translations, warnings + + async def to_vrs(self, q: str) -> ToVRSService: + """Return a VRS-like representation of all validated variations for a query. + + :param str q: The variation to translate (HGVS, gnomAD VCF, or free text) on + GRCh37 or GRCh38 assembly + :return: ToVRSService containing VRS variations and warnings + """ + warnings = [] + variations = [] + params = { + "search_term": q, + "variations": variations, + "service_meta_": ServiceMeta( + version=__version__, response_datetime=datetime.now() + ), + "warnings": warnings, + } + + # Get tokens for input query + tokens = self.tokenizer.perform(unquote(q.strip()), warnings) + if warnings: + params["warnings"] = warnings + return ToVRSService(**params) + + # Get classification for list of tokens + classification = self.classifier.perform(tokens) + if not classification: + params["warnings"] = [f"Unable to find classification for: {q}"] + return ToVRSService(**params) + + # Get validation summary for classification + validation_summary = await self.validator.perform(classification) + if validation_summary.valid_results: + # Get translated VRS representation for valid results + translations, warnings = await self.get_translations( + validation_summary.valid_results, + warnings, + endpoint_name=Endpoint.TO_VRS, + hgvs_dup_del_mode=HGVSDupDelModeOption.DEFAULT, + do_liftover=False, + ) + else: + translations = [] + warnings = validation_summary.warnings + + if not translations: + variations = [] + else: + variations = [] + # Ensure only unique VRS variations are in the list of variations returned + for tr in translations: + if tr.vrs_variation not in variations: + variations.append(tr.vrs_variation) + + params["warnings"] = warnings + params["variations"] = variations + return ToVRSService(**params) diff --git a/variation/tokenize.py b/variation/tokenize.py new file mode 100644 index 0000000..26d2f49 --- /dev/null +++ b/variation/tokenize.py @@ -0,0 +1,99 @@ +"""A module for tokenization.""" +from typing import List + +from variation.schemas.token_response_schema import Token, TokenType +from variation.tokenizers import ( + HGVS, + CdnaDeletion, + CdnaDelIns, + CdnaGenomicReferenceAgree, + CdnaInsertion, + CdnaSubstitution, + FreeTextCategorical, + GeneSymbol, + GenomicDeletion, + GenomicDelIns, + GenomicDuplication, + GenomicInsertion, + GenomicSubstitution, + GnomadVCF, + ProteinDeletion, + ProteinDelIns, + ProteinInsertion, + ProteinReferenceAgree, + ProteinSubstitution, +) +from variation.tokenizers.tokenizer import Tokenizer + +r"(\((\?|d+)_(\?|\d+)\))_(\((\?|\d+)_(\?|\d+)\))dup" + + +class Tokenize: + """The tokenize class.""" + + def __init__(self, gene_symbol: GeneSymbol) -> None: + """Initialize the tokenize class.""" + self.gene_symbol = gene_symbol + self.tokenizers: List[Tokenizer] = [ + HGVS(), + GnomadVCF(), + self.gene_symbol, + FreeTextCategorical(), + # Substitution + ProteinSubstitution(), + GenomicSubstitution(), + CdnaSubstitution(), + # Reference Agree + ProteinReferenceAgree(), + CdnaGenomicReferenceAgree(), + # Delins + ProteinDelIns(), + CdnaDelIns(), + GenomicDelIns(), + # Deletion + ProteinDeletion(), + CdnaDeletion(), + GenomicDeletion(), + # Insertion + ProteinInsertion(), + CdnaInsertion(), + GenomicInsertion(), + # Duplication + GenomicDuplication(), + ] + + def perform(self, search_string: str, warnings: List[str]) -> List[Token]: + """Return a list of tokens for a given search string + + :param search_string: The input string to search on + :param warnings: List of warnings + :return: A list of tokens found + """ + terms = search_string.split() + + tokens: List[Token] = [] + for term in terms: + if not term: + continue + + matched = False + for tokenizer in self.tokenizers: + res = tokenizer.match(term) + if res: + if isinstance(res, List): + for r in res: + tokens.append(r) + if not matched: + matched = True + else: + tokens.append(res) + matched = True + break + + if not matched: + warnings.append(f"Unable to tokenize: {term}") + tokens.append( + Token(token=term, token_type=TokenType.UNKNOWN, input_string=term) + ) + + return tokens diff --git a/variation/tokenizers/__init__.py b/variation/tokenizers/__init__.py new file mode 100644 index 0000000..0da1986 --- /dev/null +++ b/variation/tokenizers/__init__.py @@ -0,0 +1,20 @@ +"""Module to load and init namespace at package level.""" +from .cdna_and_genomic_reference_agree import CdnaGenomicReferenceAgree +from .cdna_deletion import CdnaDeletion +from .cdna_delins import CdnaDelIns +from .cdna_insertion import CdnaInsertion +from .cdna_substitution import CdnaSubstitution +from .free_text_categorical import FreeTextCategorical +from .gene_symbol import GeneSymbol +from .genomic_deletion import GenomicDeletion +from .genomic_delins import GenomicDelIns +from .genomic_duplication import GenomicDuplication +from .genomic_insertion import GenomicInsertion +from .genomic_substitution import GenomicSubstitution +from .gnomad_vcf import GnomadVCF +from .hgvs import HGVS +from .protein_deletion import ProteinDeletion +from .protein_delins import ProteinDelIns +from .protein_insertion import ProteinInsertion +from .protein_reference_agree import ProteinReferenceAgree +from .protein_substitution import ProteinSubstitution diff --git a/variation/tokenizers/cdna_and_genomic_reference_agree.py b/variation/tokenizers/cdna_and_genomic_reference_agree.py new file mode 100644 index 0000000..f47ca53 --- /dev/null +++ b/variation/tokenizers/cdna_and_genomic_reference_agree.py @@ -0,0 +1,45 @@ +"""A module for Reference Agree Tokenization on cDNA and genomic reference sequence.""" +from typing import Optional, Union + +from cool_seq_tool.schemas import AnnotationLayer + +from variation.regex import CDNA_GENOMIC_REFERENCE_AGREE +from variation.schemas.token_response_schema import ( + CdnaReferenceAgreeToken, + GenomicReferenceAgreeToken, +) +from variation.tokenizers.tokenizer import Tokenizer + + +class CdnaGenomicReferenceAgree(Tokenizer): + """Class for tokenizing Reference Agree on cDNA and genomic reference sequence.""" + + def match( + self, input_string: str + ) -> Optional[Union[CdnaReferenceAgreeToken, GenomicReferenceAgreeToken]]: + """Return a CdnaReferenceAgreeToken or GenomicReferenceAgreeToken match if + one exists. + + :param input_string: The input string to match + :return: A CdnaReferenceAgreeToken or GenomicReferenceAgreeToken if a match + exists. Otherwise, None. + """ + og_input_string = input_string + coordinate_type, input_string = self.strip_coord_prefix(input_string) + if not any((coordinate_type, input_string)): + return None + + match = CDNA_GENOMIC_REFERENCE_AGREE.match(input_string) + if match: + match_dict = match.groupdict() + params = { + "input_string": og_input_string, + "token": input_string, + "coordinate_type": coordinate_type, + "pos": int(match_dict["pos"]), + } + + if coordinate_type == AnnotationLayer.GENOMIC: + return GenomicReferenceAgreeToken(**params) + elif coordinate_type == AnnotationLayer.CDNA: + return CdnaReferenceAgreeToken(**params) diff --git a/variation/tokenizers/cdna_deletion.py b/variation/tokenizers/cdna_deletion.py new file mode 100644 index 0000000..01b6eb9 --- /dev/null +++ b/variation/tokenizers/cdna_deletion.py @@ -0,0 +1,40 @@ +"""A module for Cdna Deletion Tokenization.""" +from typing import Optional + +from cool_seq_tool.schemas import AnnotationLayer + +from variation.regex import CNDA_GENOMIC_DELETION +from variation.schemas.token_response_schema import CdnaDeletionToken +from variation.tokenizers.tokenizer import Tokenizer + + +class CdnaDeletion(Tokenizer): + """Class for tokenizing Deletion at the cdna reference sequence.""" + + def match(self, input_string: str) -> Optional[CdnaDeletionToken]: + """Return a CdnaDeletionToken match if one exists. + + :param input_string: The input string to match + :return: A CdnaDeletionToken if a match exists. + Otherwise, None. + """ + og_input_string = input_string + _, input_string = self.strip_coord_prefix( + input_string, match_coord_type=AnnotationLayer.CDNA + ) + + if not input_string: + return None + + match = CNDA_GENOMIC_DELETION.match(input_string) + + if match: + match_dict = match.groupdict() + + return CdnaDeletionToken( + input_string=og_input_string, + token=input_string, + pos0=int(match_dict["pos0"]), + pos1=int(match_dict["pos1"]) if match_dict["pos1"] else None, + deleted_sequence=match_dict["deleted_sequence"], + ) diff --git a/variation/tokenizers/cdna_delins.py b/variation/tokenizers/cdna_delins.py new file mode 100644 index 0000000..7169e01 --- /dev/null +++ b/variation/tokenizers/cdna_delins.py @@ -0,0 +1,39 @@ +"""A module for Cdna Deletion Tokenization.""" +from typing import Optional + +from cool_seq_tool.schemas import AnnotationLayer + +from variation.regex import CDNA_GENOMIC_DELINS +from variation.schemas.token_response_schema import CdnaDelInsToken +from variation.tokenizers.tokenizer import Tokenizer + + +class CdnaDelIns(Tokenizer): + """Class for tokenizing delins at the cdna reference sequence.""" + + def match(self, input_string: str) -> Optional[CdnaDelInsToken]: + """Return a CdnaDelInsToken match if one exists. + + :param input_string: The input string to match + :return: A CdnaDelInsToken if a match exists. + Otherwise, None. + """ + og_input_string = input_string + _, input_string = self.strip_coord_prefix( + input_string, match_coord_type=AnnotationLayer.CDNA + ) + if not input_string: + return None + + match = CDNA_GENOMIC_DELINS.match(input_string) + + if match: + match_dict = match.groupdict() + + return CdnaDelInsToken( + input_string=og_input_string, + token=input_string, + pos0=int(match_dict["pos0"]), + pos1=int(match_dict["pos1"]) if match_dict["pos1"] else None, + inserted_sequence=match_dict["inserted_sequence"], + ) diff --git a/variation/tokenizers/cdna_insertion.py b/variation/tokenizers/cdna_insertion.py new file mode 100644 index 0000000..87ae61c --- /dev/null +++ b/variation/tokenizers/cdna_insertion.py @@ -0,0 +1,42 @@ +"""A module for Cdna Insertion Tokenization.""" +from typing import Optional + +from cool_seq_tool.schemas import AnnotationLayer + +from variation.regex import CDNA_GENOMIC_INSERTION +from variation.schemas.token_response_schema import CdnaInsertionToken +from variation.tokenizers.tokenizer import Tokenizer + + +class CdnaInsertion(Tokenizer): + """Class for tokenizing Insertion at the cdna reference sequence.""" + + def match(self, input_string: str) -> Optional[CdnaInsertionToken]: + """Return a CdnaInsertionToken match if one exists. + + :param input_string: The input string to match + :return: A CdnaInsertionToken if a match exists. + Otherwise, None. + """ + og_input_string = input_string + _, input_string = self.strip_coord_prefix( + input_string, match_coord_type=AnnotationLayer.CDNA + ) + if not input_string: + return None + + match = CDNA_GENOMIC_INSERTION.match(input_string) + + if match: + match_dict = match.groupdict() + pos0 = int(match_dict["pos0"]) + pos1 = int(match_dict["pos1"]) + inserted_sequence = match_dict["inserted_sequence"] + + return CdnaInsertionToken( + input_string=og_input_string, + token=f"{pos0}_{pos1}{inserted_sequence}", + pos0=pos0, + pos1=pos1, + inserted_sequence=inserted_sequence, + ) diff --git a/variation/tokenizers/cdna_substitution.py b/variation/tokenizers/cdna_substitution.py new file mode 100644 index 0000000..7c166d8 --- /dev/null +++ b/variation/tokenizers/cdna_substitution.py @@ -0,0 +1,39 @@ +"""A module for Cdna Substitution Tokenization.""" +from typing import Optional + +from cool_seq_tool.schemas import AnnotationLayer + +from variation.regex import CDNA_GENOMIC_SUBSTITUTION +from variation.schemas.token_response_schema import CdnaSubstitutionToken +from variation.tokenizers.tokenizer import Tokenizer + + +class CdnaSubstitution(Tokenizer): + """Class for tokenizing Substitution at the cdna reference sequence.""" + + def match(self, input_string: str) -> Optional[CdnaSubstitutionToken]: + """Return a CdnaSubstitutionToken match if one exists. + + :param input_string: The input string to match + :return: A CdnaSubstitutionToken if a match exists. + Otherwise, None. + """ + og_input_string = input_string + _, input_string = self.strip_coord_prefix( + input_string, match_coord_type=AnnotationLayer.CDNA + ) + if not input_string: + return None + + match = CDNA_GENOMIC_SUBSTITUTION.match(input_string) + + if match: + match_dict = match.groupdict() + + return CdnaSubstitutionToken( + input_string=og_input_string, + token=input_string, + pos=int(match_dict["pos"]), + ref=match_dict["ref"], + alt=match_dict["alt"], + ) diff --git a/variation/tokenizers/free_text_categorical.py b/variation/tokenizers/free_text_categorical.py new file mode 100644 index 0000000..7edec06 --- /dev/null +++ b/variation/tokenizers/free_text_categorical.py @@ -0,0 +1,21 @@ +"""A module for free text categorical variation tokenization""" +from typing import Optional + +from variation.schemas.token_response_schema import AmplificationToken +from variation.tokenizers.tokenizer import Tokenizer + + +class FreeTextCategorical(Tokenizer): + """The Free Text Categorical tokenizer class""" + + def match(self, input_string: str) -> Optional[AmplificationToken]: + """Return tokens that match the input string. + Only supports amplification for now + + :param input_string: Input string + :return: AmplificationToken token if a match is found + """ + if input_string.lower() == "amplification": + return AmplificationToken(token=input_string, input_string=input_string) + + return None diff --git a/variation/tokenizers/gene_symbol.py b/variation/tokenizers/gene_symbol.py new file mode 100644 index 0000000..fde7cbe --- /dev/null +++ b/variation/tokenizers/gene_symbol.py @@ -0,0 +1,40 @@ +"""Module for Gene Symbol tokenization.""" +from typing import Optional + +from gene.query import QueryHandler as GeneQueryHandler + +from variation.schemas.token_response_schema import GeneToken +from variation.tokenizers.tokenizer import Tokenizer + + +class GeneSymbol(Tokenizer): + """Class for gene symbol tokenization.""" + + def __init__(self, gene_normalizer: GeneQueryHandler) -> None: + """Initialize the gene symbol tokenizer class. + + :param gene_normalizer: Instance to gene normalizer QueryHandler + """ + self.gene_normalizer = gene_normalizer + + def match(self, input_string: str) -> Optional[GeneToken]: + """Return tokens that are genes + + :param input_string: Input string + :return: GeneToken if match was found + """ + norm_resp = self.gene_normalizer.normalize(input_string) + norm_match_type = norm_resp.match_type + + if norm_match_type != 0: + gene = norm_resp.gene + label = gene.label + gene_match_token = GeneToken( + token=label, + input_string=input_string, + matched_value=label, + gene=gene, + ) + return gene_match_token + + return None diff --git a/variation/tokenizers/genomic_deletion.py b/variation/tokenizers/genomic_deletion.py new file mode 100644 index 0000000..0d2f93b --- /dev/null +++ b/variation/tokenizers/genomic_deletion.py @@ -0,0 +1,100 @@ +"""A module for Genomic Deletion Tokenization.""" +from typing import Optional + +from cool_seq_tool.schemas import AnnotationLayer + +from variation.regex import ( + CNDA_GENOMIC_DELETION, + GENOMIC_DELETION_AMBIGUOUS_1, + GENOMIC_DELETION_AMBIGUOUS_2, + GENOMIC_DELETION_AMBIGUOUS_3, +) +from variation.schemas.app_schemas import AmbiguousRegexType +from variation.schemas.token_response_schema import ( + GenomicDeletionAmbiguousToken, + GenomicDeletionToken, +) +from variation.tokenizers.tokenizer import Tokenizer + + +class GenomicDeletion(Tokenizer): + """Class for tokenizing Deletion at the genomic reference sequence.""" + + def match(self, input_string: str) -> Optional[GenomicDeletionToken]: + """Return a GenomicDeletionToken match if one exists. + + :param input_string: The input string to match + :return: A GenomicDeletionToken if a match exists. + Otherwise, None. + """ + og_input_string = input_string + _, input_string = self.strip_coord_prefix( + input_string, match_coord_type=AnnotationLayer.GENOMIC + ) + + if not input_string: + return None + + # First try matching on simple genomic deletions + match = CNDA_GENOMIC_DELETION.match(input_string) + + if match: + match_dict = match.groupdict() + + return GenomicDeletionToken( + input_string=og_input_string, + token=input_string, + pos0=int(match_dict["pos0"]), + pos1=int(match_dict["pos1"]) if match_dict["pos1"] else None, + deleted_sequence=match_dict["deleted_sequence"], + ) + else: + # Going to try ambiguous genomic duplications + match = GENOMIC_DELETION_AMBIGUOUS_1.match(input_string) + if match: + match_dict = match.groupdict() + pos0 = match_dict["pos0"] + pos1 = match_dict["pos1"] + pos2 = match_dict["pos2"] + pos3 = match_dict["pos3"] + + # (?_?)_(#_#), (#_#)_(?, ?), (?_?)_(?_?) are not supported + if not any( + ((pos0 == "?" and pos1 == "?"), (pos2 == "?" and pos3 == "?")) + ): + return GenomicDeletionAmbiguousToken( + input_string=og_input_string, + token=input_string, + pos0=int(pos0) if pos0 != "?" else pos0, + pos1=int(pos1) if pos1 != "?" else pos1, + pos2=int(pos2) if pos2 != "?" else pos2, + pos3=int(pos3) if pos3 != "?" else pos3, + ambiguous_regex_type=AmbiguousRegexType.REGEX_1, + ) + + else: + for pattern_re, regex_type in [ + (GENOMIC_DELETION_AMBIGUOUS_2, AmbiguousRegexType.REGEX_2), + (GENOMIC_DELETION_AMBIGUOUS_3, AmbiguousRegexType.REGEX_3), + ]: + match = pattern_re.match(input_string) + + if match: + matched_pos = dict() + match_dict = match.groupdict() + for k in match_dict: + v = match_dict[k] + if v: + v = int(v) if v != "?" else v + + matched_pos[k] = v + + return GenomicDeletionAmbiguousToken( + input_string=og_input_string, + token=input_string, + pos0=matched_pos["pos0"], + pos1=matched_pos.get("pos1"), + pos2=matched_pos["pos2"], + pos3=matched_pos.get("pos3"), + ambiguous_regex_type=regex_type, + ) diff --git a/variation/tokenizers/genomic_delins.py b/variation/tokenizers/genomic_delins.py new file mode 100644 index 0000000..9c7603d --- /dev/null +++ b/variation/tokenizers/genomic_delins.py @@ -0,0 +1,41 @@ +"""A module for Genomic DelIns Tokenization.""" +from typing import Optional + +from cool_seq_tool.schemas import AnnotationLayer + +from variation.regex import CDNA_GENOMIC_DELINS +from variation.schemas.token_response_schema import GenomicDelInsToken +from variation.tokenizers.tokenizer import Tokenizer + + +class GenomicDelIns(Tokenizer): + """Class for tokenizing DelIns at the linear + genomic reference sequence. + """ + + def match(self, input_string: str) -> Optional[GenomicDelInsToken]: + """Return a GenomicDelInsToken match if one exists. + + :param input_string: The input string to match + :return: A GenomicDelInsToken if a match exists. + Otherwise, None. + """ + og_input_string = input_string + _, input_string = self.strip_coord_prefix( + input_string, match_coord_type=AnnotationLayer.GENOMIC + ) + if not input_string: + return None + + match = CDNA_GENOMIC_DELINS.match(input_string) + + if match: + match_dict = match.groupdict() + + return GenomicDelInsToken( + input_string=og_input_string, + token=input_string, + pos0=int(match_dict["pos0"]), + pos1=int(match_dict["pos1"]) if match_dict["pos1"] else None, + inserted_sequence=match_dict["inserted_sequence"], + ) diff --git a/variation/tokenizers/genomic_duplication.py b/variation/tokenizers/genomic_duplication.py new file mode 100644 index 0000000..f03246a --- /dev/null +++ b/variation/tokenizers/genomic_duplication.py @@ -0,0 +1,98 @@ +"""A module for Genomic Duplication Tokenization.""" +from typing import Optional + +from cool_seq_tool.schemas import AnnotationLayer + +from variation.regex import ( + GENOMIC_DUPLICATION, + GENOMIC_DUPLICATION_AMBIGUOUS_1, + GENOMIC_DUPLICATION_AMBIGUOUS_2, + GENOMIC_DUPLICATION_AMBIGUOUS_3, +) +from variation.schemas.app_schemas import AmbiguousRegexType +from variation.schemas.token_response_schema import ( + GenomicDuplicationAmbiguousToken, + GenomicDuplicationToken, +) +from variation.tokenizers.tokenizer import Tokenizer + + +class GenomicDuplication(Tokenizer): + """Class for tokenizing duplications on the genomic coordinate.""" + + def match(self, input_string: str) -> Optional[GenomicDuplicationToken]: + """Return a GenomicDelInsToken match if one exists. + + :param input_string: The input string to match + :return: A GenomicDelInsToken if a match exists. + Otherwise, None. + """ + og_input_string = input_string + _, input_string = self.strip_coord_prefix( + input_string, match_coord_type=AnnotationLayer.GENOMIC + ) + if not input_string: + return None + + # First try matching on simple genomic duplications + match = GENOMIC_DUPLICATION.match(input_string) + + if match: + match_dict = match.groupdict() + + return GenomicDuplicationToken( + input_string=og_input_string, + token=input_string, + pos0=int(match_dict["pos0"]), + pos1=int(match_dict["pos1"]) if match_dict["pos1"] else None, + ) + else: + # Going to try ambiguous genomic duplications + match = GENOMIC_DUPLICATION_AMBIGUOUS_1.match(input_string) + if match: + match_dict = match.groupdict() + pos0 = match_dict["pos0"] + pos1 = match_dict["pos1"] + pos2 = match_dict["pos2"] + pos3 = match_dict["pos3"] + + # (?_?)_(#_#), (#_#)_(?, ?), (?_?)_(?_?) are not supported + if not any( + ((pos0 == "?" and pos1 == "?"), (pos2 == "?" and pos3 == "?")) + ): + return GenomicDuplicationAmbiguousToken( + input_string=og_input_string, + token=input_string, + pos0=int(pos0) if pos0 != "?" else pos0, + pos1=int(pos1) if pos1 != "?" else pos1, + pos2=int(pos2) if pos2 != "?" else pos2, + pos3=int(pos3) if pos3 != "?" else pos3, + ambiguous_regex_type=AmbiguousRegexType.REGEX_1, + ) + + else: + for pattern_re, regex_type in [ + (GENOMIC_DUPLICATION_AMBIGUOUS_2, AmbiguousRegexType.REGEX_2), + (GENOMIC_DUPLICATION_AMBIGUOUS_3, AmbiguousRegexType.REGEX_3), + ]: + match = pattern_re.match(input_string) + + if match: + matched_pos = dict() + match_dict = match.groupdict() + for k in match_dict: + v = match_dict[k] + if v: + v = int(v) if v != "?" else v + + matched_pos[k] = v + + return GenomicDuplicationAmbiguousToken( + input_string=og_input_string, + token=input_string, + pos0=matched_pos["pos0"], + pos1=matched_pos.get("pos1"), + pos2=matched_pos["pos2"], + pos3=matched_pos.get("pos3"), + ambiguous_regex_type=regex_type, + ) diff --git a/variation/tokenizers/genomic_insertion.py b/variation/tokenizers/genomic_insertion.py new file mode 100644 index 0000000..43576fe --- /dev/null +++ b/variation/tokenizers/genomic_insertion.py @@ -0,0 +1,44 @@ +"""A module for Genomic Insertion Tokenization.""" +from typing import Optional + +from cool_seq_tool.schemas import AnnotationLayer + +from variation.regex import CDNA_GENOMIC_INSERTION +from variation.schemas.token_response_schema import ( + GenomicInsertionToken, +) +from variation.tokenizers.tokenizer import Tokenizer + + +class GenomicInsertion(Tokenizer): + """Class for tokenizing Insertion at the genomic reference sequence.""" + + def match(self, input_string: str) -> Optional[GenomicInsertionToken]: + """Return a GenomicInsertionToken match if one exists. + + :param input_string: The input string to match + :return: A GenomicInsertionToken if a match exists. + Otherwise, None. + """ + og_input_string = input_string + _, input_string = self.strip_coord_prefix( + input_string, match_coord_type=AnnotationLayer.GENOMIC + ) + if not input_string: + return None + + match = CDNA_GENOMIC_INSERTION.match(input_string) + + if match: + match_dict = match.groupdict() + pos0 = int(match_dict["pos0"]) + pos1 = int(match_dict["pos1"]) + inserted_sequence = match_dict["inserted_sequence"] + + return GenomicInsertionToken( + input_string=og_input_string, + token=f"{pos0}_{pos1}{inserted_sequence}", + pos0=pos0, + pos1=pos1, + inserted_sequence=inserted_sequence, + ) diff --git a/variation/tokenizers/genomic_substitution.py b/variation/tokenizers/genomic_substitution.py new file mode 100644 index 0000000..8024238 --- /dev/null +++ b/variation/tokenizers/genomic_substitution.py @@ -0,0 +1,43 @@ +"""A module for Genomic Substitution Tokenization.""" +from typing import Optional + +from cool_seq_tool.schemas import AnnotationLayer + +from variation.regex import CDNA_GENOMIC_SUBSTITUTION +from variation.schemas.token_response_schema import ( + GenomicSubstitutionToken, +) +from variation.tokenizers.tokenizer import Tokenizer + + +class GenomicSubstitution(Tokenizer): + """Class for tokenizing SNV Substitution at the linear genomic + reference sequence. + """ + + def match(self, input_string: str) -> Optional[GenomicSubstitutionToken]: + """Return a GenomicSubstitutionToken match if one exists. + + :param input_string: The input string to match + :return: A GenomicSubstitutionToken if a match exists. + Otherwise, None. + """ + og_input_string = input_string + _, input_string = self.strip_coord_prefix( + input_string, match_coord_type=AnnotationLayer.GENOMIC + ) + if not input_string: + return None + + match = CDNA_GENOMIC_SUBSTITUTION.match(input_string) + + if match: + match_dict = match.groupdict() + + return GenomicSubstitutionToken( + input_string=og_input_string, + token=input_string, + pos=int(match_dict["pos"]), + ref=match_dict["ref"], + alt=match_dict["alt"], + ) diff --git a/variation/tokenizers/gnomad_vcf.py b/variation/tokenizers/gnomad_vcf.py new file mode 100644 index 0000000..6cf309b --- /dev/null +++ b/variation/tokenizers/gnomad_vcf.py @@ -0,0 +1,41 @@ +"""A module for gnomad VCF tokenization""" +import re +from typing import Optional + +from variation.schemas.token_response_schema import GnomadVcfToken +from variation.tokenizers.tokenizer import Tokenizer + + +class GnomadVCF(Tokenizer): + """The gnomad VCF tokenizer class""" + + splitter = re.compile( + r"^(chr|chromosome)?(?P([1-9]|[1][0-9]|[2][0-2]|X|Y))-" + r"(?P[1-9]\d*)-(?P[actg]+)-(?P[actg]+)$", + re.IGNORECASE, + ) + + def match(self, input_string: str) -> Optional[GnomadVcfToken]: + """Return a GnomadVCFToken if a match exists. + + :param input_string: The input string to match + :return: `Token` if gnomAD VCF match was found, else `None` + """ + match = self.splitter.match(input_string) + if match: + match_dict = match.groupdict() + chromosome = match_dict["chromosome"].upper() + pos = int(match_dict["pos"]) + ref = match_dict["ref"].upper() + alt = match_dict["alt"].upper() + + return GnomadVcfToken( + token=f"{chromosome}-{pos}-{ref}-{alt}", + input_string=input_string, + chromosome=chromosome, + pos=pos, + ref=ref, + alt=alt, + ) + + return None diff --git a/variation/tokenizers/hgvs.py b/variation/tokenizers/hgvs.py new file mode 100644 index 0000000..507d589 --- /dev/null +++ b/variation/tokenizers/hgvs.py @@ -0,0 +1,36 @@ +"""Module for HGVS tokenization.""" +import re +from typing import Optional + +from cool_seq_tool.schemas import AnnotationLayer + +from variation.schemas.token_response_schema import HgvsToken +from variation.tokenizers.tokenizer import Tokenizer + + +class HGVS(Tokenizer): + """The HGVS tokenizer class.""" + + splitter = re.compile( + r"^(?P(NC_|NM_|NP_|ENSP|ENST)[^:\s]+):(?P[cgnpr])\.(?P\S+)$" # noqa: E501 + ) + + def match(self, input_string: str) -> Optional[HgvsToken]: + """Return HGVS token matches from input string. + + :param input_string: The input string to match + :return: `HgvsToken` if HGVS match was found, else `None` + """ + match = self.splitter.match(input_string) + if match: + match_dict = match.groupdict() + + return HgvsToken( + token=input_string, + input_string=input_string, + accession=match_dict["accession"], + coordinate_type=AnnotationLayer(match_dict["coordinate"]), + change=match_dict["change"], + ) + else: + return None diff --git a/variation/tokenizers/protein_deletion.py b/variation/tokenizers/protein_deletion.py new file mode 100644 index 0000000..903611a --- /dev/null +++ b/variation/tokenizers/protein_deletion.py @@ -0,0 +1,94 @@ +"""A module for tokenizing Protein Deletions.""" +import re +from typing import Optional + +from bioutils.sequences import aa1_to_aa3, aa3_to_aa1 + +from variation.regex import PROTEIN_DELETION +from variation.schemas.token_response_schema import ProteinDeletionToken +from variation.tokenizers.tokenizer import Tokenizer + + +class ProteinDeletion(Tokenizer): + """Class for tokenizing Deletions on the protein reference sequence.""" + + pattern = ( + r"^(?P[a-z]+)(?P\d+)" + r"(_(?P[a-z]+)(?P\d+))?del(?P[a-z]+)?$" + ) + splitter = re.compile(pattern) + + def match(self, input_string: str) -> Optional[ProteinDeletionToken]: + """Return a ProteinDeletionToken match if one exists. + + :param input_string: The input string to match + :return: A ProteinDeletionToken if a match exists. + Otherwise, None. + """ + og_input_string = input_string + + if input_string.startswith(("(p.", "p.(")) and input_string.endswith(")"): + input_string = input_string[3:-1] + elif input_string.startswith("p."): + input_string = input_string[2:] + elif input_string[0] == "(" and input_string[-1] == ")": + input_string = input_string[1:-1] + + match = PROTEIN_DELETION.match(input_string) + if match: + match_dict = match.groupdict() + + aa0 = match_dict["aa0"] + pos0 = int(match_dict["pos0"]) + aa1 = match_dict["aa1"] + pos1 = int(match_dict["pos1"]) if match_dict["pos1"] else None + deleted_sequence = match_dict["deleted_sequence"] + + # One letter codes for aa0, aa1, and inserted sequence + one_letter_aa0 = None + one_letter_aa1 = None + one_letter_del_seq = None + + # Should use the same 1 or 3 letter AA codes + try: + # see if it's 1 AA already + aa1_to_aa3(aa0) + + if aa1: + aa1_to_aa3(aa1) + + if deleted_sequence: + aa1_to_aa3(deleted_sequence) + except KeyError: + # maybe 3 letter AA code was used + try: + one_letter_aa0 = aa3_to_aa1(aa0) + + if aa1: + one_letter_aa1 = aa3_to_aa1(aa1) + + if deleted_sequence: + one_letter_del_seq = aa3_to_aa1(deleted_sequence) + except KeyError: + pass + else: + one_letter_aa0 = aa0 + one_letter_aa1 = aa1 + one_letter_del_seq = deleted_sequence + + if all( + ( + type(aa0) == type(one_letter_aa0), + type(aa1) == type(one_letter_aa1), + type(deleted_sequence) == type(one_letter_del_seq), + ) + ): + return ProteinDeletionToken( + input_string=og_input_string, + token=og_input_string, + aa0=one_letter_aa0, + pos0=pos0, + aa1=one_letter_aa1, + pos1=pos1, + deleted_sequence=one_letter_del_seq, + ) diff --git a/variation/tokenizers/protein_delins.py b/variation/tokenizers/protein_delins.py new file mode 100644 index 0000000..f80af49 --- /dev/null +++ b/variation/tokenizers/protein_delins.py @@ -0,0 +1,87 @@ +"""A module for Protein DelIns Tokenization Class.""" +from typing import Optional + +from bioutils.sequences import aa1_to_aa3, aa3_to_aa1 + +from variation.regex import PROTEIN_DELINS +from variation.schemas.token_response_schema import ProteinDelInsToken +from variation.tokenizers.tokenizer import Tokenizer + + +class ProteinDelIns(Tokenizer): + """Class for tokenizing DelIns on the protein reference sequence.""" + + def match(self, input_string: str) -> Optional[ProteinDelInsToken]: + """Return a ProteinDelInsToken match if one exists. + + :param input_string: The input string to match + :return: A ProteinDelInsToken if a match exists. + Otherwise, None. + """ + og_input_string = input_string + + if input_string.startswith(("(p.", "p.(")) and input_string.endswith(")"): + input_string = input_string[3:-1] + elif input_string.startswith("p."): + input_string = input_string[2:] + elif input_string[0] == "(" and input_string[-1] == ")": + input_string = input_string[1:-1] + + match = PROTEIN_DELINS.match(input_string) + if match: + match_dict = match.groupdict() + + aa0 = match_dict["aa0"] + pos0 = int(match_dict["pos0"]) + aa1 = match_dict["aa1"] + pos1 = int(match_dict["pos1"]) if match_dict["pos1"] else None + inserted_sequence = match_dict["inserted_sequence"] + + # One letter codes for aa0, aa1, and inserted sequence + one_letter_aa0 = None + one_letter_aa1 = None + one_letter_ins_seq = None + + # Should use the same 1 or 3 letter AA codes + try: + # see if it's 1 AA already + aa1_to_aa3(aa0) + + if aa1: + aa1_to_aa3(aa1) + + if inserted_sequence: + aa1_to_aa3(inserted_sequence) + except KeyError: + # maybe 3 letter AA code was used + try: + one_letter_aa0 = aa3_to_aa1(aa0) + + if aa1: + one_letter_aa1 = aa3_to_aa1(aa1) + + if inserted_sequence: + one_letter_ins_seq = aa3_to_aa1(inserted_sequence) + except KeyError: + pass + else: + one_letter_aa0 = aa0 + one_letter_aa1 = aa1 + one_letter_ins_seq = inserted_sequence + + if all( + ( + type(aa0) == type(one_letter_aa0), + type(aa1) == type(one_letter_aa1), + type(inserted_sequence) == type(one_letter_ins_seq), + ) + ): + return ProteinDelInsToken( + input_string=og_input_string, + token=og_input_string, + aa0=one_letter_aa0, + pos0=pos0, + aa1=one_letter_aa1, + pos1=pos1, + inserted_sequence=one_letter_ins_seq, + ) diff --git a/variation/tokenizers/protein_insertion.py b/variation/tokenizers/protein_insertion.py new file mode 100644 index 0000000..0f7c886 --- /dev/null +++ b/variation/tokenizers/protein_insertion.py @@ -0,0 +1,69 @@ +"""A module for Protein Insertion Tokenization Class.""" +from typing import Optional + +from bioutils.sequences import aa1_to_aa3, aa3_to_aa1 + +from variation.regex import PROTEIN_INSERTION +from variation.schemas.token_response_schema import ProteinInsertionToken +from variation.tokenizers.tokenizer import Tokenizer + + +class ProteinInsertion(Tokenizer): + """Class for tokenizing Insertions on the protein reference sequence.""" + + def match(self, input_string: str) -> Optional[ProteinInsertionToken]: + """Return token that match the input string.""" + og_input_string = input_string + + if input_string.startswith(("(p.", "p.(")) and input_string.endswith(")"): + input_string = input_string[3:-1] + elif input_string.startswith("p."): + input_string = input_string[2:] + elif input_string[0] == "(" and input_string[-1] == ")": + input_string = input_string[1:-1] + + match = PROTEIN_INSERTION.match(input_string) + + if match: + match_dict = match.groupdict() + + aa0 = match_dict["aa0"] + pos0 = int(match_dict["pos0"]) + aa1 = match_dict["aa1"] + pos1 = int(match_dict["pos1"]) + inserted_sequence = match_dict["inserted_sequence"] + + # One letter codes for aa0, aa1, and inserted sequence + one_letter_aa0 = None + one_letter_aa1 = None + one_letter_ins_seq = None + + # Should use the same 1 or 3 letter AA codes + try: + # see if it's 1 AA already + aa1_to_aa3(aa0) + aa1_to_aa3(aa1) + aa1_to_aa3(inserted_sequence) + except KeyError: + # maybe 3 letter AA code was used + try: + one_letter_aa0 = aa3_to_aa1(aa0) + one_letter_aa1 = aa3_to_aa1(aa1) + one_letter_ins_seq = aa3_to_aa1(inserted_sequence) + except KeyError: + pass + else: + one_letter_aa0 = aa0 + one_letter_aa1 = aa1 + one_letter_ins_seq = inserted_sequence + + if all((one_letter_aa0, one_letter_aa0, one_letter_ins_seq)): + return ProteinInsertionToken( + input_string=og_input_string, + token=og_input_string, + aa0=one_letter_aa0, + pos0=pos0, + aa1=one_letter_aa1, + pos1=pos1, + inserted_sequence=one_letter_ins_seq, + ) diff --git a/variation/tokenizers/protein_reference_agree.py b/variation/tokenizers/protein_reference_agree.py new file mode 100644 index 0000000..a14acd8 --- /dev/null +++ b/variation/tokenizers/protein_reference_agree.py @@ -0,0 +1,58 @@ +"""A module for Reference Agree Tokenization.""" +from typing import Optional + +from bioutils.sequences import aa1_to_aa3, aa3_to_aa1 + +from variation.regex import PROTEIN_REFERENCE_AGREE +from variation.schemas.token_response_schema import ProteinReferenceAgreeToken +from variation.tokenizers.tokenizer import Tokenizer + + +class ProteinReferenceAgree(Tokenizer): + """Class for tokenizing Reference Agree on protein reference sequence.""" + + def match(self, input_string: str) -> Optional[ProteinReferenceAgreeToken]: + """Return a ProteinReferenceAgreeToken match if one exists. + + :param str input_string: The input string to match + :return: A ProteinReferenceAgreeToken if a match exists. + Otherwise, None. + """ + og_input_string = input_string + + if input_string.startswith(("(p.", "p.(")) and input_string.endswith(")"): + input_string = input_string[3:-1] + elif input_string.startswith("p."): + input_string = input_string[2:] + elif input_string[0] == "(" and input_string[-1] == ")": + input_string = input_string[1:-1] + + match = PROTEIN_REFERENCE_AGREE.match(input_string) + if match: + match_dict = match.groupdict() + + ref = match_dict["ref"] + pos = int(match_dict["pos"]) + + aa1_ref = None + + # Ref and Alt should use the same 1 or 3 letter AA codes + try: + # see if it's 1 AA already + aa1_to_aa3(ref) + except KeyError: + # maybe 3 letter AA code was used + try: + aa1_ref = aa3_to_aa1(ref) + except KeyError: + pass + else: + aa1_ref = ref + + if aa1_ref: + return ProteinReferenceAgreeToken( + input_string=og_input_string, + token=f"{aa1_ref}{pos}=", + pos=pos, + ref=aa1_ref, + ) diff --git a/variation/tokenizers/protein_substitution.py b/variation/tokenizers/protein_substitution.py new file mode 100644 index 0000000..f66e28f --- /dev/null +++ b/variation/tokenizers/protein_substitution.py @@ -0,0 +1,78 @@ +"""A module for Protein Substitution Tokenization.""" +from typing import Optional, Union + +from bioutils.sequences import aa1_to_aa3, aa3_to_aa1 + +from variation.regex import PROTEIN_SUBSTITUTION +from variation.schemas.token_response_schema import ( + ProteinStopGainToken, + ProteinSubstitutionToken, +) +from variation.tokenizers.tokenizer import Tokenizer + + +class ProteinSubstitution(Tokenizer): + """Class for tokenizing Protein Substitution.""" + + def match( + self, input_string: str + ) -> Optional[Union[ProteinSubstitutionToken, ProteinStopGainToken]]: + """Return a ProteinSubstitutionToken or ProteinStopGainToken match if one + exists. + + :param input_string: The input string to match + :return: A ProteinSubstitutionToken or ProteinStopGainToken if a match exists. + Otherwise, None. + """ + og_input_string = input_string + + if input_string.startswith(("(p.", "p.(")) and input_string.endswith(")"): + input_string = input_string[3:-1] + elif input_string.startswith("p."): + input_string = input_string[2:] + elif input_string[0] == "(" and input_string[-1] == ")": + input_string = input_string[1:-1] + + match = PROTEIN_SUBSTITUTION.match(input_string) + if match: + match_dict = match.groupdict() + + ref = match_dict["ref"] + pos = int(match_dict["pos"]) + alt = match_dict["alt"] + + # One letter codes for ref and alt + aa1_ref = None + aa1_alt = None + + # Ref and Alt should use the same 1 or 3 letter AA codes + ref_upper = ref + alt_upper = alt + try: + # see if it's 1 AA already + aa1_to_aa3(ref_upper) + aa1_to_aa3(alt_upper) + except KeyError: + # maybe 3 letter AA code was used + try: + aa1_ref = aa3_to_aa1(ref) + aa1_alt = "*" if alt == "*" else aa3_to_aa1(alt) + except KeyError: + pass + else: + aa1_ref = ref + aa1_alt = alt + + if aa1_alt and aa1_ref: + params = { + "input_string": og_input_string, + "token": f"{aa1_ref}{pos}{aa1_alt}", + "pos": pos, + "ref": aa1_ref, + "alt": aa1_alt, + } + + if aa1_alt == "*": + return ProteinStopGainToken(**params) + else: + return ProteinSubstitutionToken(**params) diff --git a/variation/tokenizers/tokenizer.py b/variation/tokenizers/tokenizer.py new file mode 100644 index 0000000..5f31091 --- /dev/null +++ b/variation/tokenizers/tokenizer.py @@ -0,0 +1,75 @@ +"""Module for Tokenization.""" +from abc import ABC, abstractmethod +from typing import Optional, Tuple + +from cool_seq_tool.schemas import AnnotationLayer + +from variation.schemas.token_response_schema import Token + + +class Tokenizer(ABC): + """The tokenizer class.""" + + coord_types = {k: v.value for k, v in AnnotationLayer.__members__.items()} + + @abstractmethod + def match(self, input_string: str) -> Optional[Token]: + """Return tokens that match the input string. + + :param input_string: Input string + :return: Token if match was found + """ + raise NotImplementedError + + def strip_coord_prefix( + self, input_string: str, match_coord_type: Optional[AnnotationLayer] = None + ) -> Tuple[Optional[AnnotationLayer], Optional[str]]: + """Strip parentheses and coordinate type from string + + :param input_string: Input string + :param match_coord_type: If set, the input string must have the prefix + corresponding to this value to succeed. If this is not set, will attempt + to find the first match of a prefix and use that as the coordinate type. + :return: Tuple containing coordinate type for input string and stripped string, + if successful. + """ + coord_type = None + stripped_str = None + + def _strip( + coord_type: str, + string: str, + match_coord_type: Optional[AnnotationLayer] = None, + ) -> str: + """Strip parentheses and coordinate type from string + + :param input_string: Input string + :param match_coord_type: If set, the input string must have the prefix + corresponding to this value to succeed + :return: Stripped string + """ + if string.startswith( + (f"({coord_type}.", f"{coord_type}.(") + ) and string.endswith(")"): + string = string[3:-1] + elif string.startswith(f"{coord_type}."): + string = string[2:] + elif string[0] == "(" and string[-1] == ")": + string = string[1:-1] + else: + if match_coord_type: + string = None + + return string + + if match_coord_type: + coord_type = match_coord_type + stripped_str = _strip(coord_type.value, input_string, match_coord_type) + else: + for k, v in self.coord_types.items(): + if f"{v}." in input_string: + coord_type = AnnotationLayer[k] + stripped_str = _strip(v, input_string) + break + + return coord_type, stripped_str diff --git a/variation/translate.py b/variation/translate.py new file mode 100644 index 0000000..789d6f3 --- /dev/null +++ b/variation/translate.py @@ -0,0 +1,125 @@ +"""Module for translation.""" +from typing import List, Optional + +from cool_seq_tool.handlers import SeqRepoAccess +from cool_seq_tool.mappers import ManeTranscript +from cool_seq_tool.sources import UtaDatabase +from ga4gh.vrs import models + +from variation.hgvs_dup_del_mode import HGVSDupDelMode +from variation.schemas.app_schemas import Endpoint +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.translation_response_schema import TranslationResult +from variation.schemas.validation_response_schema import ValidationResult +from variation.translators import ( + Amplification, + CdnaDeletion, + CdnaDelIns, + CdnaInsertion, + CdnaReferenceAgree, + CdnaSubstitution, + GenomicDeletion, + GenomicDeletionAmbiguous, + GenomicDelIns, + GenomicDuplication, + GenomicDuplicationAmbiguous, + GenomicInsertion, + GenomicReferenceAgree, + GenomicSubstitution, + ProteinDeletion, + ProteinDelIns, + ProteinInsertion, + ProteinReferenceAgree, + ProteinStopGain, + ProteinSubstitution, +) +from variation.translators.translator import Translator +from variation.vrs_representation import VRSRepresentation + + +class Translate: + """Class for translating to VRS representations""" + + def __init__( + self, + seqrepo_access: SeqRepoAccess, + mane_transcript: ManeTranscript, + uta: UtaDatabase, + vrs: VRSRepresentation, + hgvs_dup_del_mode: HGVSDupDelMode, + ) -> None: + """Initialize the Translate class. Will create an instance variable, + `translators`, which is a list of Translator for supported variation types. + + + :param seqrepo_access: Access to SeqRepo data + :param mane_transcript: Access MANE Transcript information + :param uta: Access to UTA queries + :param vrs: Class for creating VRS objects + :param hgvs_dup_del_mode: Class for interpreting HGVS duplications and deletions + """ + params = [seqrepo_access, mane_transcript, uta, vrs, hgvs_dup_del_mode] + + self.translators: List[Translator] = [ + ProteinSubstitution(*params), + CdnaSubstitution(*params), + GenomicSubstitution(*params), + ProteinStopGain(*params), + ProteinReferenceAgree(*params), + CdnaReferenceAgree(*params), + GenomicReferenceAgree(*params), + ProteinDelIns(*params), + CdnaDelIns(*params), + GenomicDelIns(*params), + ProteinDeletion(*params), + CdnaDeletion(*params), + GenomicDeletion(*params), + GenomicDeletionAmbiguous(*params), + ProteinInsertion(*params), + CdnaInsertion(*params), + GenomicInsertion(*params), + GenomicDuplication(*params), + GenomicDuplicationAmbiguous(*params), + Amplification(*params), + ] + + async def perform( + self, + validation_result: ValidationResult, # this is always valid + warnings: List[str], + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + translation_result = None + for translator in self.translators: + if translator.can_translate( + validation_result.classification.classification_type + ): + result = await translator.translate( + validation_result, + warnings, + endpoint_name=endpoint_name, + hgvs_dup_del_mode=hgvs_dup_del_mode, + baseline_copies=baseline_copies, + copy_change=copy_change, + do_liftover=do_liftover, + ) + translation_result = result + break + + return translation_result diff --git a/variation/translators/__init__.py b/variation/translators/__init__.py new file mode 100644 index 0000000..de52c9b --- /dev/null +++ b/variation/translators/__init__.py @@ -0,0 +1,21 @@ +"""Translator package import.""" +from .amplification import Amplification +from .cdna_deletion import CdnaDeletion +from .cdna_delins import CdnaDelIns +from .cdna_insertion import CdnaInsertion +from .cdna_reference_agree import CdnaReferenceAgree +from .cdna_substitution import CdnaSubstitution +from .genomic_deletion import GenomicDeletion +from .genomic_deletion_ambiguous import GenomicDeletionAmbiguous +from .genomic_delins import GenomicDelIns +from .genomic_duplication import GenomicDuplication +from .genomic_duplication_ambiguous import GenomicDuplicationAmbiguous +from .genomic_insertion import GenomicInsertion +from .genomic_reference_agree import GenomicReferenceAgree +from .genomic_substitution import GenomicSubstitution +from .protein_deletion import ProteinDeletion +from .protein_delins import ProteinDelIns +from .protein_insertion import ProteinInsertion +from .protein_reference_agree import ProteinReferenceAgree +from .protein_stop_gain import ProteinStopGain +from .protein_substitution import ProteinSubstitution diff --git a/variation/translators/ambiguous_translator_base.py b/variation/translators/ambiguous_translator_base.py new file mode 100644 index 0000000..a796b66 --- /dev/null +++ b/variation/translators/ambiguous_translator_base.py @@ -0,0 +1,310 @@ +"""Module for translating genomic ambiguous deletions and duplications""" +from typing import Dict, List, Literal, NamedTuple, Optional, Union + +from ga4gh.vrs import models +from pydantic import StrictInt, StrictStr, ValidationError + +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ( + AmbiguousType, + GenomicDeletionAmbiguousClassification, + GenomicDuplicationAmbiguousClassification, + Nomenclature, +) +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.service_schema import ClinVarAssembly +from variation.schemas.token_response_schema import AltType +from variation.schemas.translation_response_schema import TranslationResult +from variation.schemas.validation_response_schema import ValidationResult +from variation.translators.translator import Translator +from variation.utils import get_assembly, get_refget_accession + + +class AmbiguousData(NamedTuple): + """Represents Ambiguous data""" + + ac: StrictStr + pos0: Union[StrictInt, Literal["?"]] + pos1: Optional[Union[StrictInt, Literal["?"]]] + pos2: Union[StrictInt, Literal["?"]] + pos3: Optional[Union[StrictInt, Literal["?"]]] + + +class AmbiguousTranslator(Translator): + """Class for translating genomic ambiguous deletions and duplications to VRS + representations + """ + + async def get_grch38_data_ambiguous( + self, + classification: Union[ + GenomicDeletionAmbiguousClassification, + GenomicDuplicationAmbiguousClassification, + ], + errors: List[str], + ac: str, + ) -> Optional[AmbiguousData]: + """Get GRCh38 data for genomic ambiguous duplication or deletion classification + + :param classification: Classification to get translation for + :param errors: List of errors. Will be mutated if errors are found + :param ac: Genomic RefSeq accession + :return: Ambiguous data on GRCh38 assembly if successful liftover. Else, `None` + """ + pos0, pos1, pos2, pos3, new_ac = None, None, None, None, None + if classification.ambiguous_type == AmbiguousType.AMBIGUOUS_1: + grch38_pos0_pos1 = await self.mane_transcript.g_to_grch38( + ac, classification.pos0, classification.pos1 + ) + if grch38_pos0_pos1: + pos0, pos1 = grch38_pos0_pos1["pos"] + ac_pos0_pos1 = grch38_pos0_pos1["ac"] + + grch38_pos2_pos3 = await self.mane_transcript.g_to_grch38( + ac, classification.pos2, classification.pos3 + ) + + if grch38_pos2_pos3: + pos2, pos3 = grch38_pos2_pos3["pos"] + ac_pos2_pos3 = grch38_pos2_pos3["ac"] + + if ac_pos0_pos1 != ac_pos2_pos3: + errors.append( + f"{ac_pos0_pos1} does not equal {ac_pos2_pos3} when lifting" + " over to GRCh38" + ) + else: + new_ac = ac_pos0_pos1 + elif classification.ambiguous_type in { + AmbiguousType.AMBIGUOUS_2, + AmbiguousType.AMBIGUOUS_5, + }: + grch38 = await self.mane_transcript.g_to_grch38( + ac, classification.pos1, classification.pos2 + ) + if grch38: + pos1, pos2 = grch38["pos"] + new_ac = grch38["ac"] + elif classification.ambiguous_type == AmbiguousType.AMBIGUOUS_7: + grch38 = await self.mane_transcript.g_to_grch38( + ac, classification.pos0, classification.pos2 + ) + if grch38: + ( + pos0, + pos2, + ) = grch38["pos"] + new_ac = grch38["ac"] + + if not new_ac: + errors.append(f"Unable to find a GRCh38 accession for: {ac}") + + try: + ambiguous_data = AmbiguousData( + ac=new_ac, pos0=pos0, pos1=pos1, pos2=pos2, pos3=pos3 + ) + except ValidationError: + ambiguous_data = None + return ambiguous_data + + def get_dup_del_ambiguous_seq_loc( + self, + ambiguous_type: AmbiguousType, + ac: str, + pos0: Union[int, Literal["?"]], + pos1: Optional[Union[int, Literal["?"]]], + pos2: Union[int, Literal["?"]], + pos3: Optional[Union[int, Literal["?"]]], + warnings: List[str], + ) -> Dict: + """Get VRS Sequence Location + + :param ambiguous_type: Type of ambiguous expression used + :param ac: Genomic RefSeq accession + :param pos0: Position 0 (residue) + :param pos1: Position 1 (residue) + :param pos2: Position 2 (residue) + :param pos3: Position 3 (residue) + :param warnings: List of warnings + :return: VRS Sequence Location as a dictionary + """ + if ambiguous_type == AmbiguousType.AMBIGUOUS_1: + start = models.Range([pos0 - 1, pos1 - 1]) + end = models.Range([pos2, pos3]) + elif ambiguous_type == AmbiguousType.AMBIGUOUS_2: + start = self.vrs.get_start_indef_range(pos1) + end = self.vrs.get_end_indef_range(pos2) + elif ambiguous_type == AmbiguousType.AMBIGUOUS_5: + start = self.vrs.get_start_indef_range(pos1) + end = pos2 + elif ambiguous_type == AmbiguousType.AMBIGUOUS_7: + start = pos0 - 1 + end = self.vrs.get_end_indef_range(pos2) + # No else since validator should catch if the ambiguous type is supported or not + + refget_accession = get_refget_accession(self.seqrepo_access, ac, warnings) + if refget_accession: + seq_loc = self.vrs.get_sequence_loc( + refget_accession, start, end + ).model_dump(exclude_none=True) + else: + seq_loc = {} + return seq_loc + + async def translate( + self, + validation_result: ValidationResult, + warnings: List[str], + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + # First will translate valid result to VRS Allele + classification = validation_result.classification + if isinstance(classification, GenomicDeletionAmbiguousClassification): + alt_type = AltType.DELETION_AMBIGUOUS + else: + alt_type = AltType.DUPLICATION_AMBIGUOUS + + grch38_data = None + + if do_liftover or endpoint_name == Endpoint.NORMALIZE: + errors = [] + assembly, w = get_assembly(self.seqrepo_access, validation_result.accession) + if w: + warnings.append(w) + return None + else: + # assembly is either 37 or 38 + if assembly == ClinVarAssembly.GRCH37: + grch38_data = await self.get_grch38_data_ambiguous( + classification, errors, validation_result.accession + ) + if errors: + warnings += errors + return None + + ac = grch38_data.ac + pos0 = grch38_data.pos0 + pos1 = grch38_data.pos1 + pos2 = grch38_data.pos2 + pos3 = grch38_data.pos3 + else: + ac = validation_result.accession + pos0 = classification.pos0 + pos1 = classification.pos1 + pos2 = classification.pos2 + pos3 = classification.pos3 + + assembly = ClinVarAssembly.GRCH38 + else: + ac = validation_result.accession + pos0 = classification.pos0 + pos1 = classification.pos1 + pos2 = classification.pos2 + pos3 = classification.pos3 + assembly = None + + if all( + ( + endpoint_name == Endpoint.NORMALIZE, + classification.nomenclature == Nomenclature.FREE_TEXT, + classification.gene_token, + ) + ): + errors = [] + if not assembly and not grch38_data: + grch38_data = await self.get_grch38_data_ambiguous( + classification, errors, ac + ) + + if errors: + warnings += errors + return None + + ac = grch38_data.ac + pos0 = grch38_data.pos0 + pos1 = grch38_data.pos1 + pos2 = grch38_data.pos2 + pos3 = grch38_data.pos3 + + self.is_valid( + classification.gene_token, + ac, + pos0, + pos1, + errors, + pos2=pos2, + pos3=pos3, + ) + else: + self.is_valid( + classification.gene_token, + ac, + pos0, + pos1, + errors, + pos2=pos2, + pos3=pos3, + ) + + if errors: + warnings += errors + return None + + seq_loc = self.get_dup_del_ambiguous_seq_loc( + classification.ambiguous_type, ac, pos0, pos1, pos2, pos3, warnings + ) + if not seq_loc: + return None + + if endpoint_name == Endpoint.NORMALIZE: + vrs_variation = self.hgvs_dup_del_mode.interpret_variation( + alt_type, + seq_loc, + warnings, + hgvs_dup_del_mode, + ac, + baseline_copies=baseline_copies, + copy_change=copy_change, + ) + elif endpoint_name == Endpoint.HGVS_TO_COPY_NUMBER_COUNT: + vrs_variation = self.hgvs_dup_del_mode.copy_number_count_mode( + alt_type, seq_loc, baseline_copies + ) + elif endpoint_name == Endpoint.HGVS_TO_COPY_NUMBER_CHANGE: + vrs_variation = self.hgvs_dup_del_mode.copy_number_change_mode( + alt_type, seq_loc, copy_change + ) + else: + vrs_variation = self.hgvs_dup_del_mode.default_mode( + alt_type, + seq_loc, + ac, + baseline_copies=baseline_copies, + copy_change=copy_change, + ) + + if vrs_variation: + return TranslationResult( + vrs_variation=vrs_variation, + vrs_seq_loc_ac=ac, + og_ac=validation_result.accession, + validation_result=validation_result, + ) + else: + return None diff --git a/variation/translators/amplification.py b/variation/translators/amplification.py new file mode 100644 index 0000000..ef2b641 --- /dev/null +++ b/variation/translators/amplification.py @@ -0,0 +1,69 @@ +"""Module for Amplification Translation.""" +from typing import List, Optional + +from ga4gh.core import ga4gh_identify +from ga4gh.vrs import models + +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ClassificationType +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.translation_response_schema import TranslationResult +from variation.schemas.validation_response_schema import ValidationResult +from variation.translators.translator import Translator +from variation.utils import get_priority_sequence_location + + +class Amplification(Translator): + """The Amplification Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.AMPLIFICATION + + async def translate( + self, + validation_result: ValidationResult, + warnings: List[str], + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + gene = validation_result.classification.gene_token.gene + priority_seq_loc = get_priority_sequence_location(gene, self.seqrepo_access) + + if priority_seq_loc: + vrs_cx = models.CopyNumberChange( + location=models.SequenceLocation(**priority_seq_loc), + copyChange=models.CopyChange.EFO_0030072, + ) + vrs_cx.id = ga4gh_identify(vrs_cx) + vrs_cx = vrs_cx.model_dump(exclude_none=True) + else: + vrs_cx = None + warnings.append(f"No VRS SequenceLocation found for gene: {gene.label}") + + if vrs_cx: + return TranslationResult( + vrs_variation=vrs_cx, validation_result=validation_result + ) + else: + return None diff --git a/variation/translators/cdna_deletion.py b/variation/translators/cdna_deletion.py new file mode 100644 index 0000000..9eb9919 --- /dev/null +++ b/variation/translators/cdna_deletion.py @@ -0,0 +1,66 @@ +"""Module for cDNA Deletion Translation.""" +from typing import List, Optional + +from cool_seq_tool.schemas import AnnotationLayer +from ga4gh.vrs import models + +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ( + CdnaDeletionClassification, + ClassificationType, +) +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.token_response_schema import AltType +from variation.schemas.translation_response_schema import TranslationResult +from variation.schemas.validation_response_schema import ValidationResult +from variation.translators.translator import Translator + + +class CdnaDeletion(Translator): + """The cDNA Deletion Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.CDNA_DELETION + + async def translate( + self, + validation_result: ValidationResult, + warnings: List[str], + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + cds_start = validation_result.cds_start + classification: CdnaDeletionClassification = validation_result.classification + + translation_result = await self.get_p_or_cdna_translation_result( + endpoint_name, + validation_result, + classification.pos0, + classification.pos1, + AltType.DELETION, + AnnotationLayer.CDNA, + warnings, + cds_start=cds_start, + ) + return translation_result diff --git a/variation/translators/cdna_delins.py b/variation/translators/cdna_delins.py new file mode 100644 index 0000000..815c30f --- /dev/null +++ b/variation/translators/cdna_delins.py @@ -0,0 +1,67 @@ +"""Module for Cdna DelIns Translation.""" +from typing import List, Optional + +from cool_seq_tool.schemas import AnnotationLayer +from ga4gh.vrs import models + +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ( + CdnaDelInsClassification, + ClassificationType, +) +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.token_response_schema import AltType +from variation.schemas.translation_response_schema import TranslationResult +from variation.schemas.validation_response_schema import ValidationResult +from variation.translators.translator import Translator + + +class CdnaDelIns(Translator): + """The Cdna DelIns Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.CDNA_DELINS + + async def translate( + self, + validation_result: ValidationResult, + warnings: List[str], + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + cds_start = validation_result.cds_start + classification: CdnaDelInsClassification = validation_result.classification + + translation_result = await self.get_p_or_cdna_translation_result( + endpoint_name, + validation_result, + classification.pos0, + classification.pos1, + AltType.DELINS, + AnnotationLayer.CDNA, + warnings, + cds_start=cds_start, + alt=classification.inserted_sequence, + ) + return translation_result diff --git a/variation/translators/cdna_insertion.py b/variation/translators/cdna_insertion.py new file mode 100644 index 0000000..8654aca --- /dev/null +++ b/variation/translators/cdna_insertion.py @@ -0,0 +1,67 @@ +"""Module for Cdna insertion Translation.""" +from typing import List, Optional + +from cool_seq_tool.schemas import AnnotationLayer +from ga4gh.vrs import models + +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ( + CdnaInsertionClassification, + ClassificationType, +) +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.token_response_schema import AltType +from variation.schemas.translation_response_schema import TranslationResult +from variation.schemas.validation_response_schema import ValidationResult +from variation.translators.translator import Translator + + +class CdnaInsertion(Translator): + """The Cdna Insertion Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.CDNA_INSERTION + + async def translate( + self, + validation_result: ValidationResult, + warnings: List[str], + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + cds_start = validation_result.cds_start + classification: CdnaInsertionClassification = validation_result.classification + + translation_result = await self.get_p_or_cdna_translation_result( + endpoint_name, + validation_result, + classification.pos0, + classification.pos1, + AltType.INSERTION, + AnnotationLayer.CDNA, + warnings, + cds_start=cds_start, + alt=classification.inserted_sequence, + ) + return translation_result diff --git a/variation/translators/cdna_reference_agree.py b/variation/translators/cdna_reference_agree.py new file mode 100644 index 0000000..d739253 --- /dev/null +++ b/variation/translators/cdna_reference_agree.py @@ -0,0 +1,68 @@ +"""Module for Cdna Reference Agree Translation.""" +from typing import List, Optional + +from cool_seq_tool.schemas import AnnotationLayer +from ga4gh.vrs import models + +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ( + CdnaReferenceAgreeClassification, + ClassificationType, +) +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.token_response_schema import AltType +from variation.schemas.translation_response_schema import TranslationResult +from variation.schemas.validation_response_schema import ValidationResult +from variation.translators.translator import Translator + + +class CdnaReferenceAgree(Translator): + """The Cdna Reference Agree Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.CDNA_REFERENCE_AGREE + + async def translate( + self, + validation_result: ValidationResult, + warnings: List[str], + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + cds_start = validation_result.cds_start + classification: CdnaReferenceAgreeClassification = ( + validation_result.classification + ) + + translation_result = await self.get_p_or_cdna_translation_result( + endpoint_name, + validation_result, + classification.pos, + classification.pos, + AltType.REFERENCE_AGREE, + AnnotationLayer.CDNA, + warnings, + cds_start=cds_start, + ) + return translation_result diff --git a/variation/translators/cdna_substitution.py b/variation/translators/cdna_substitution.py new file mode 100644 index 0000000..38a928f --- /dev/null +++ b/variation/translators/cdna_substitution.py @@ -0,0 +1,70 @@ +"""Module for cDNA Substitution Translation.""" +from typing import List, Optional + +from cool_seq_tool.schemas import AnnotationLayer +from ga4gh.vrs import models + +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ( + CdnaSubstitutionClassification, + ClassificationType, +) +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.token_response_schema import AltType +from variation.schemas.translation_response_schema import TranslationResult +from variation.schemas.validation_response_schema import ValidationResult +from variation.translators.translator import Translator + + +class CdnaSubstitution(Translator): + """The cDNA Substitution Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.CDNA_SUBSTITUTION + + async def translate( + self, + validation_result: ValidationResult, + warnings: List[str], + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + cds_start = validation_result.cds_start + classification: CdnaSubstitutionClassification = ( + validation_result.classification + ) + + translation_result = await self.get_p_or_cdna_translation_result( + endpoint_name, + validation_result, + classification.pos, + classification.pos, + AltType.SUBSTITUTION, + AnnotationLayer.CDNA, + warnings, + cds_start=cds_start, + ref=classification.ref, + alt=classification.alt, + ) + return translation_result diff --git a/variation/translators/genomic_del_dup_base.py b/variation/translators/genomic_del_dup_base.py new file mode 100644 index 0000000..3103823 --- /dev/null +++ b/variation/translators/genomic_del_dup_base.py @@ -0,0 +1,272 @@ +"""Module for Genomic Deletion Translation.""" +from typing import List, NamedTuple, Optional, Union + +from cool_seq_tool.schemas import ResidueMode +from ga4gh.vrs import models +from pydantic import StrictInt, StrictStr, ValidationError + +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ( + GenomicDeletionClassification, + GenomicDuplicationClassification, + Nomenclature, +) +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.service_schema import ClinVarAssembly +from variation.schemas.token_response_schema import AltType +from variation.schemas.translation_response_schema import ( + TranslationResult, + VrsSeqLocAcStatus, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.translators.translator import Translator +from variation.utils import get_assembly, get_refget_accession + + +class DelDupData(NamedTuple): + """Represents genomic dup/del data""" + + ac: StrictStr + pos0: StrictInt + pos1: Optional[StrictInt] + + +class GenomicDelDupTranslator(Translator): + """Class for translating genomic deletions and duplications to VRS + representations + """ + + async def get_grch38_data( + self, + classification: Union[ + GenomicDeletionClassification, GenomicDuplicationClassification + ], + errors: List[str], + ac: str, + ) -> DelDupData: + """Get GRCh38 data for genomic duplication or deletion classification + + :param classification: Classification to get translation for + :param errors: List of errors. Will be mutated if errors are found + :param ac: Genomic RefSeq accession + :return: Data on GRCh38 assembly if successful liftover. Else, `None` + """ + pos0, pos1, new_ac = None, None, None + + if classification.pos1: + grch38_pos = await self.mane_transcript.g_to_grch38( + ac, classification.pos0, classification.pos1 + ) + if grch38_pos: + pos0, pos1 = grch38_pos["pos"] + new_ac = grch38_pos["ac"] + else: + grch38_pos = await self.mane_transcript.g_to_grch38( + ac, classification.pos0, classification.pos0 + ) + if grch38_pos: + pos0, _ = grch38_pos["pos"] + new_ac = grch38_pos["ac"] + + if not new_ac: + errors.append(f"Unable to find a GRCh38 accession for: {ac}") + + try: + data = DelDupData(ac=new_ac, pos0=pos0, pos1=pos1) + except ValidationError: + data = None + return data + + async def translate( + self, + validation_result: ValidationResult, + warnings: List[str], + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + # First will translate valid result to VRS Allele + classification = validation_result.classification + if isinstance(classification, GenomicDeletionClassification): + alt_type = AltType.DELETION + else: + alt_type = AltType.DUPLICATION + + grch38_data = None + vrs_variation = None + vrs_seq_loc_ac_status = VrsSeqLocAcStatus.NA + residue_mode = ResidueMode.RESIDUE + + if do_liftover or endpoint_name == Endpoint.NORMALIZE: + errors = [] + assembly, w = get_assembly(self.seqrepo_access, validation_result.accession) + if w: + warnings.append(w) + return None + else: + # assembly is either 37 or 38 + if assembly == ClinVarAssembly.GRCH37: + grch38_data = await self.get_grch38_data( + classification, errors, validation_result.accession + ) + if errors: + warnings += errors + return None + + pos0 = grch38_data.pos0 - 1 + if grch38_data.pos1 is None: + pos1 = grch38_data.pos0 + else: + pos1 = grch38_data.pos1 + residue_mode = ResidueMode.INTER_RESIDUE + ac = grch38_data.ac + + if alt_type == AltType.DELETION: + if classification.nomenclature == Nomenclature.GNOMAD_VCF: + ref = classification.matching_tokens[0].ref + invalid_ref_msg = self.validate_reference_sequence( + ac, + pos0, + pos0 + (len(ref) - 1), + ref, + residue_mode=residue_mode, + ) + if invalid_ref_msg: + warnings.append(invalid_ref_msg) + return None + else: + pos0 = classification.pos0 + pos1 = classification.pos1 + ac = validation_result.accession + grch38_data = DelDupData(ac=ac, pos0=pos0, pos1=pos1) + + assembly = ClinVarAssembly.GRCH38 + else: + pos0 = classification.pos0 + pos1 = classification.pos1 + ac = validation_result.accession + assembly = None + + if all( + ( + endpoint_name == Endpoint.NORMALIZE, + classification.nomenclature == Nomenclature.FREE_TEXT, + classification.gene_token, + ) + ): + errors = [] + if not assembly and not grch38_data: + grch38_data = await self.get_grch38_data(classification, errors, ac) + if errors: + warnings += errors + return None + + ac = grch38_data.ac + pos0 = grch38_data.pos0 - 1 + if grch38_data.pos1 is None: + pos1 = grch38_data.pos0 + else: + pos1 = grch38_data.pos1 + residue_mode = ResidueMode.INTER_RESIDUE + self.is_valid(classification.gene_token, ac, pos0, pos1, errors) + + if errors: + warnings += errors + return None + + mane = await self.mane_transcript.get_mane_transcript( + ac, + pos0, + pos1, + "g", + try_longest_compatible=True, + residue_mode=residue_mode, + gene=classification.gene_token.token + if classification.gene_token + else None, + ) + + if mane: + # mane is 0 - based, but we are using residue + ac = mane.refseq + vrs_seq_loc_ac_status = mane.status + pos0 = mane.pos[0] + mane.coding_start_site + pos1 = mane.pos[1] + mane.coding_start_site + residue_mode = ResidueMode.INTER_RESIDUE + else: + return None + + alt = None + if classification.nomenclature == Nomenclature.GNOMAD_VCF: + if alt_type == AltType.DELETION: + pos0 -= 1 + pos1 -= 1 + alt = classification.matching_tokens[0].alt + + if alt_type == AltType.INSERTION: + alt = classification.inserted_sequence + + start = pos0 if residue_mode == ResidueMode.INTER_RESIDUE else pos0 - 1 + end = pos1 if pos1 else pos0 + + refget_accession = get_refget_accession(self.seqrepo_access, ac, warnings) + if not refget_accession: + return None + + seq_loc = self.vrs.get_sequence_loc(refget_accession, start, end).model_dump( + exclude_none=True + ) + + if endpoint_name == Endpoint.NORMALIZE: + vrs_variation = self.hgvs_dup_del_mode.interpret_variation( + alt_type, + seq_loc, + warnings, + hgvs_dup_del_mode, + ac, + baseline_copies=baseline_copies, + copy_change=copy_change, + alt=alt, + ) + elif endpoint_name == Endpoint.HGVS_TO_COPY_NUMBER_COUNT: + vrs_variation = self.hgvs_dup_del_mode.copy_number_count_mode( + alt_type, seq_loc, baseline_copies + ) + elif endpoint_name == Endpoint.HGVS_TO_COPY_NUMBER_CHANGE: + vrs_variation = self.hgvs_dup_del_mode.copy_number_change_mode( + alt_type, seq_loc, copy_change + ) + else: + vrs_variation = self.hgvs_dup_del_mode.default_mode( + alt_type, + seq_loc, + ac, + baseline_copies=baseline_copies, + copy_change=copy_change, + alt=alt, + ) + + if vrs_variation: + return TranslationResult( + vrs_variation=vrs_variation, + vrs_seq_loc_ac=ac, + vrs_seq_loc_ac_status=vrs_seq_loc_ac_status, + og_ac=validation_result.accession, + validation_result=validation_result, + ) + else: + return None diff --git a/variation/translators/genomic_deletion.py b/variation/translators/genomic_deletion.py new file mode 100644 index 0000000..7960b8a --- /dev/null +++ b/variation/translators/genomic_deletion.py @@ -0,0 +1,16 @@ +"""Module for Genomic Deletion Translation.""" +from variation.schemas.classification_response_schema import ClassificationType +from variation.translators.genomic_del_dup_base import GenomicDelDupTranslator + + +class GenomicDeletion(GenomicDelDupTranslator): + """The Genomic Deletion Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.GENOMIC_DELETION diff --git a/variation/translators/genomic_deletion_ambiguous.py b/variation/translators/genomic_deletion_ambiguous.py new file mode 100644 index 0000000..666451e --- /dev/null +++ b/variation/translators/genomic_deletion_ambiguous.py @@ -0,0 +1,16 @@ +"""Module for Genomic Deletion Ambiguous Translation.""" +from variation.schemas.classification_response_schema import ClassificationType +from variation.translators.ambiguous_translator_base import AmbiguousTranslator + + +class GenomicDeletionAmbiguous(AmbiguousTranslator): + """The Genomic Deletion Ambiguous Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.GENOMIC_DELETION_AMBIGUOUS diff --git a/variation/translators/genomic_delins.py b/variation/translators/genomic_delins.py new file mode 100644 index 0000000..a9934b7 --- /dev/null +++ b/variation/translators/genomic_delins.py @@ -0,0 +1,129 @@ +"""Module for Genomic DelIns Translation.""" +from typing import List, Optional + +from cool_seq_tool.schemas import AnnotationLayer, ResidueMode +from ga4gh.vrs import models + +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ( + CdnaDelInsClassification, + ClassificationType, + GenomicDelInsClassification, +) +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.token_response_schema import AltType +from variation.schemas.translation_response_schema import ( + TranslationResult, + VrsSeqLocAcStatus, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.translators.translator import Translator + + +class GenomicDelIns(Translator): + """The Genomic DelIns Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.GENOMIC_DELINS + + async def translate( + self, + validation_result: ValidationResult, + warnings: List[str], + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + classification: GenomicDelInsClassification = validation_result.classification + vrs_allele = None + vrs_seq_loc_ac = None + vrs_seq_loc_ac_status = VrsSeqLocAcStatus.NA + + if endpoint_name == Endpoint.NORMALIZE: + gene = ( + classification.gene_token.token if classification.gene_token else None + ) + mane = await self.mane_transcript.get_mane_transcript( + validation_result.accession, + classification.pos0, + classification.pos1 + if classification.pos1 is not None + else classification.pos0, + AnnotationLayer.GENOMIC, + try_longest_compatible=True, + residue_mode=ResidueMode.RESIDUE, + gene=gene, + ) + + if mane: + vrs_seq_loc_ac_status = mane.status + if gene: + classification = CdnaDelInsClassification( + matching_tokens=classification.matching_tokens, + nomenclature=classification.nomenclature, + gene_token=classification.gene_token, + pos0=mane.pos[0] + 1, # 1-based for classification + pos1=mane.pos[1] + 1, # 1-based for classification + inserted_sequence=classification.inserted_sequence, + ) + vrs_seq_loc_ac = mane.refseq + coord_type = AnnotationLayer.CDNA + validation_result.classification = classification + else: + vrs_seq_loc_ac = mane.alt_ac + coord_type = AnnotationLayer.GENOMIC + + vrs_allele = self.vrs.to_vrs_allele( + vrs_seq_loc_ac, + mane.pos[0], + mane.pos[1], + coord_type, + AltType.DELINS, + warnings, + alt=classification.inserted_sequence, + cds_start=mane.coding_start_site if gene else None, + residue_mode=ResidueMode.INTER_RESIDUE, + ) + else: + vrs_seq_loc_ac = validation_result.accession + vrs_allele = self.vrs.to_vrs_allele( + vrs_seq_loc_ac, + classification.pos0, + classification.pos1, + AnnotationLayer.GENOMIC, + AltType.DELINS, + warnings, + alt=classification.inserted_sequence, + residue_mode=ResidueMode.RESIDUE, + ) + + if vrs_allele and vrs_seq_loc_ac: + return TranslationResult( + vrs_variation=vrs_allele, + vrs_seq_loc_ac=vrs_seq_loc_ac, + vrs_seq_loc_ac_status=vrs_seq_loc_ac_status, + og_ac=validation_result.accession, + validation_result=validation_result, + ) + else: + return None diff --git a/variation/translators/genomic_duplication.py b/variation/translators/genomic_duplication.py new file mode 100644 index 0000000..79a7dae --- /dev/null +++ b/variation/translators/genomic_duplication.py @@ -0,0 +1,16 @@ +"""Module for Genomic Duplication Translation.""" +from variation.schemas.classification_response_schema import ClassificationType +from variation.translators.genomic_del_dup_base import GenomicDelDupTranslator + + +class GenomicDuplication(GenomicDelDupTranslator): + """The Genomic Duplication Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.GENOMIC_DUPLICATION diff --git a/variation/translators/genomic_duplication_ambiguous.py b/variation/translators/genomic_duplication_ambiguous.py new file mode 100644 index 0000000..254712b --- /dev/null +++ b/variation/translators/genomic_duplication_ambiguous.py @@ -0,0 +1,16 @@ +"""Module for Genomic Duplication Ambiguous Translation.""" +from variation.schemas.classification_response_schema import ClassificationType +from variation.translators.ambiguous_translator_base import AmbiguousTranslator + + +class GenomicDuplicationAmbiguous(AmbiguousTranslator): + """The Genomic Duplication Ambiguous Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.GENOMIC_DUPLICATION_AMBIGUOUS diff --git a/variation/translators/genomic_insertion.py b/variation/translators/genomic_insertion.py new file mode 100644 index 0000000..eec2b55 --- /dev/null +++ b/variation/translators/genomic_insertion.py @@ -0,0 +1,130 @@ +"""Module for Genomic Insertion Translation.""" +from typing import List, Optional + +from cool_seq_tool.schemas import AnnotationLayer, ResidueMode +from ga4gh.vrs import models + +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ( + CdnaInsertionClassification, + ClassificationType, + GenomicInsertionClassification, +) +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.token_response_schema import AltType +from variation.schemas.translation_response_schema import ( + TranslationResult, + VrsSeqLocAcStatus, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.translators.translator import Translator + + +class GenomicInsertion(Translator): + """The Genomic Insertion Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.GENOMIC_INSERTION + + async def translate( + self, + validation_result: ValidationResult, + warnings: List[str], + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + # First will translate valid result to VRS Allele + classification: GenomicInsertionClassification = ( + validation_result.classification + ) + vrs_allele = None + vrs_seq_loc_ac = None + vrs_seq_loc_ac_status = VrsSeqLocAcStatus.NA + + if endpoint_name == Endpoint.NORMALIZE: + gene = ( + classification.gene_token.token if classification.gene_token else None + ) + mane = await self.mane_transcript.get_mane_transcript( + validation_result.accession, + classification.pos0, + classification.pos1, + AnnotationLayer.GENOMIC, + try_longest_compatible=True, + residue_mode=ResidueMode.RESIDUE, + gene=gene, + ) + + if mane: + vrs_seq_loc_ac_status = mane.status + if gene: + classification = CdnaInsertionClassification( + matching_tokens=classification.matching_tokens, + nomenclature=classification.nomenclature, + gene_token=classification.gene_token, + pos0=mane.pos[0] + 1, # 1-based for classification + pos1=mane.pos[1] + 1, # 1-based for classification + inserted_sequence=classification.inserted_sequence, + ) + vrs_seq_loc_ac = mane.refseq + coord_type = AnnotationLayer.CDNA + validation_result.classification = classification + else: + vrs_seq_loc_ac = mane.alt_ac + coord_type = AnnotationLayer.GENOMIC + + vrs_allele = self.vrs.to_vrs_allele( + vrs_seq_loc_ac, + mane.pos[0], + mane.pos[1], + coord_type, + AltType.INSERTION, + warnings, + alt=classification.inserted_sequence, + cds_start=mane.coding_start_site if gene else None, + residue_mode=ResidueMode.INTER_RESIDUE, + ) + else: + vrs_seq_loc_ac = validation_result.accession + vrs_allele = self.vrs.to_vrs_allele( + vrs_seq_loc_ac, + classification.pos0, + classification.pos1, + AnnotationLayer.GENOMIC, + AltType.INSERTION, + warnings, + alt=classification.inserted_sequence, + residue_mode=ResidueMode.RESIDUE, + ) + + if vrs_allele and vrs_seq_loc_ac: + return TranslationResult( + vrs_variation=vrs_allele, + vrs_seq_loc_ac=vrs_seq_loc_ac, + vrs_seq_loc_ac_status=vrs_seq_loc_ac_status, + og_ac=validation_result.accession, + validation_result=validation_result, + ) + else: + return None diff --git a/variation/translators/genomic_reference_agree.py b/variation/translators/genomic_reference_agree.py new file mode 100644 index 0000000..d471999 --- /dev/null +++ b/variation/translators/genomic_reference_agree.py @@ -0,0 +1,126 @@ +"""Module for Genomic Reference Agree Translation.""" +from typing import List, Optional + +from cool_seq_tool.schemas import AnnotationLayer, ResidueMode +from ga4gh.vrs import models + +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ( + CdnaReferenceAgreeClassification, + ClassificationType, + GenomicReferenceAgreeClassification, +) +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.token_response_schema import AltType +from variation.schemas.translation_response_schema import ( + TranslationResult, + VrsSeqLocAcStatus, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.translators.translator import Translator + + +class GenomicReferenceAgree(Translator): + """The Genomic Reference Agree Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.GENOMIC_REFERENCE_AGREE + + async def translate( + self, + validation_result: ValidationResult, + warnings: List[str], + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + classification: GenomicReferenceAgreeClassification = ( + validation_result.classification + ) + vrs_allele = None + vrs_seq_loc_ac = None + vrs_seq_loc_ac_status = VrsSeqLocAcStatus.NA + + if endpoint_name == Endpoint.NORMALIZE: + gene = ( + classification.gene_token.token if classification.gene_token else None + ) + mane = await self.mane_transcript.get_mane_transcript( + validation_result.accession, + classification.pos, + classification.pos, + AnnotationLayer.GENOMIC, + try_longest_compatible=True, + residue_mode=ResidueMode.RESIDUE, + gene=gene, + ) + + if mane: + vrs_seq_loc_ac_status = mane.status + + if gene: + classification = CdnaReferenceAgreeClassification( + matching_tokens=classification.matching_tokens, + nomenclature=classification.nomenclature, + gene_token=classification.gene_token, + pos=mane.pos[0] + 1, # 1-based for classification + ) + vrs_seq_loc_ac = mane.refseq + coord_type = AnnotationLayer.CDNA + validation_result.classification = classification + else: + vrs_seq_loc_ac = mane.alt_ac + coord_type = AnnotationLayer.GENOMIC + + vrs_allele = self.vrs.to_vrs_allele( + vrs_seq_loc_ac, + mane.pos[0], + mane.pos[1], + coord_type, + AltType.REFERENCE_AGREE, + warnings, + cds_start=mane.coding_start_site if gene else None, + residue_mode=ResidueMode.INTER_RESIDUE, + ) + else: + vrs_seq_loc_ac = validation_result.accession + vrs_allele = self.vrs.to_vrs_allele( + vrs_seq_loc_ac, + classification.pos, + classification.pos, + AnnotationLayer.GENOMIC, + AltType.REFERENCE_AGREE, + warnings, + residue_mode=ResidueMode.RESIDUE, + ) + + if vrs_allele and vrs_seq_loc_ac: + return TranslationResult( + vrs_variation=vrs_allele, + vrs_seq_loc_ac=vrs_seq_loc_ac, + vrs_seq_loc_ac_status=vrs_seq_loc_ac_status, + og_ac=validation_result.accession, + validation_result=validation_result, + ) + else: + return None diff --git a/variation/translators/genomic_substitution.py b/variation/translators/genomic_substitution.py new file mode 100644 index 0000000..789c601 --- /dev/null +++ b/variation/translators/genomic_substitution.py @@ -0,0 +1,150 @@ +"""Module for Genomic Substitution Translation.""" +from typing import List, Optional + +from cool_seq_tool.schemas import AnnotationLayer, ResidueMode, Strand +from ga4gh.vrs import models + +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ( + CdnaSubstitutionClassification, + ClassificationType, + GenomicSubstitutionClassification, +) +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.token_response_schema import AltType +from variation.schemas.translation_response_schema import ( + TranslationResult, + VrsSeqLocAcStatus, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.translators.translator import Translator + + +class GenomicSubstitution(Translator): + """The Genomic Substitution Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.GENOMIC_SUBSTITUTION + + async def translate( + self, + validation_result: ValidationResult, + warnings: List[str], + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + errors = [] + + # First will translate valid result to VRS Allele + classification: GenomicSubstitutionClassification = ( + validation_result.classification + ) + vrs_allele = None + vrs_seq_loc_ac = None + vrs_seq_loc_ac_status = VrsSeqLocAcStatus.NA + + if endpoint_name == Endpoint.NORMALIZE: + gene = ( + classification.gene_token.token if classification.gene_token else None + ) + mane = await self.mane_transcript.get_mane_transcript( + validation_result.accession, + classification.pos, + classification.pos, + AnnotationLayer.GENOMIC, + try_longest_compatible=True, + residue_mode=ResidueMode.RESIDUE, + gene=gene, + ) + + if mane: + vrs_seq_loc_ac_status = mane.status + + if gene: + if mane.strand == Strand.NEGATIVE: + ref_rev = classification.ref[::-1] + alt_rev = classification.alt[::-1] + + complements = {"A": "T", "T": "A", "C": "G", "G": "C"} + + ref = "" + alt = "" + + for nt in ref_rev: + ref += complements[nt] + for nt in alt_rev: + alt += complements[nt] + else: + ref = classification.ref + alt = classification.alt + + classification = CdnaSubstitutionClassification( + matching_tokens=classification.matching_tokens, + nomenclature=classification.nomenclature, + gene_token=classification.gene_token, + pos=mane.pos[0] + 1, # 1-based for classification + ref=ref, + alt=alt, + ) + vrs_seq_loc_ac = mane.refseq + coord_type = AnnotationLayer.CDNA + validation_result.classification = classification + else: + vrs_seq_loc_ac = mane.alt_ac + coord_type = AnnotationLayer.GENOMIC + + vrs_allele = self.vrs.to_vrs_allele( + vrs_seq_loc_ac, + mane.pos[0], + mane.pos[1], + coord_type, + AltType.SUBSTITUTION, + errors, + alt=classification.alt, + cds_start=mane.coding_start_site if gene else None, + residue_mode=ResidueMode.INTER_RESIDUE, + ) + else: + vrs_seq_loc_ac = validation_result.accession + vrs_allele = self.vrs.to_vrs_allele( + vrs_seq_loc_ac, + classification.pos, + classification.pos, + AnnotationLayer.GENOMIC, + AltType.SUBSTITUTION, + errors, + alt=classification.alt, + residue_mode=ResidueMode.RESIDUE, + ) + + if vrs_allele and vrs_seq_loc_ac: + return TranslationResult( + vrs_variation=vrs_allele, + vrs_seq_loc_ac=vrs_seq_loc_ac, + vrs_seq_loc_ac_status=vrs_seq_loc_ac_status, + og_ac=validation_result.accession, + validation_result=validation_result, + ) + else: + return None diff --git a/variation/translators/protein_deletion.py b/variation/translators/protein_deletion.py new file mode 100644 index 0000000..5369fd5 --- /dev/null +++ b/variation/translators/protein_deletion.py @@ -0,0 +1,65 @@ +"""Module for Protein Deletion Translation.""" +from typing import List, Optional + +from cool_seq_tool.schemas import AnnotationLayer +from ga4gh.vrs import models + +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ( + ClassificationType, + ProteinDeletionClassification, +) +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.token_response_schema import AltType +from variation.schemas.translation_response_schema import TranslationResult +from variation.schemas.validation_response_schema import ValidationResult +from variation.translators.translator import Translator + + +class ProteinDeletion(Translator): + """The Protein Deletion Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.PROTEIN_DELETION + + async def translate( + self, + validation_result: ValidationResult, + warnings: List[str], + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + # First will translate valid result to VRS Allele + classification: ProteinDeletionClassification = validation_result.classification + + translation_result = await self.get_p_or_cdna_translation_result( + endpoint_name, + validation_result, + classification.pos0, + classification.pos1, + AltType.DELETION, + AnnotationLayer.PROTEIN, + warnings, + ) + return translation_result diff --git a/variation/translators/protein_delins.py b/variation/translators/protein_delins.py new file mode 100644 index 0000000..e72c871 --- /dev/null +++ b/variation/translators/protein_delins.py @@ -0,0 +1,66 @@ +"""Module for Protein DelIns Translation.""" +from typing import List, Optional + +from cool_seq_tool.schemas import AnnotationLayer +from ga4gh.vrs import models + +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ( + ClassificationType, + ProteinDelInsClassification, +) +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.token_response_schema import AltType +from variation.schemas.translation_response_schema import TranslationResult +from variation.schemas.validation_response_schema import ValidationResult +from variation.translators.translator import Translator + + +class ProteinDelIns(Translator): + """The Protein DelIns Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.PROTEIN_DELINS + + async def translate( + self, + validation_result: ValidationResult, + warnings: List[str], + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + # First will translate valid result to VRS Allele + classification: ProteinDelInsClassification = validation_result.classification + + translation_result = await self.get_p_or_cdna_translation_result( + endpoint_name, + validation_result, + classification.pos0, + classification.pos1, + AltType.DELINS, + AnnotationLayer.PROTEIN, + warnings, + alt=classification.inserted_sequence, + ) + return translation_result diff --git a/variation/translators/protein_insertion.py b/variation/translators/protein_insertion.py new file mode 100644 index 0000000..2bf9d0b --- /dev/null +++ b/variation/translators/protein_insertion.py @@ -0,0 +1,68 @@ +"""Module for Protein Insertion Translation.""" +from typing import List, Optional + +from cool_seq_tool.schemas import AnnotationLayer +from ga4gh.vrs import models + +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ( + ClassificationType, + ProteinInsertionClassification, +) +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.token_response_schema import AltType +from variation.schemas.translation_response_schema import TranslationResult +from variation.schemas.validation_response_schema import ValidationResult +from variation.translators.translator import Translator + + +class ProteinInsertion(Translator): + """The Protein Insertion Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.PROTEIN_INSERTION + + async def translate( + self, + validation_result: ValidationResult, + warnings: List[str], + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + # First will translate valid result to VRS Allele + classification: ProteinInsertionClassification = ( + validation_result.classification + ) + + translation_result = await self.get_p_or_cdna_translation_result( + endpoint_name, + validation_result, + classification.pos0, + classification.pos1, + AltType.INSERTION, + AnnotationLayer.PROTEIN, + warnings, + alt=classification.inserted_sequence, + ) + return translation_result diff --git a/variation/translators/protein_reference_agree.py b/variation/translators/protein_reference_agree.py new file mode 100644 index 0000000..2cfac14 --- /dev/null +++ b/variation/translators/protein_reference_agree.py @@ -0,0 +1,68 @@ +"""Module for Protein Reference Agree Translation.""" +from typing import List, Optional + +from cool_seq_tool.schemas import AnnotationLayer +from ga4gh.vrs import models + +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ( + ClassificationType, + ProteinReferenceAgreeClassification, +) +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.token_response_schema import AltType +from variation.schemas.translation_response_schema import TranslationResult +from variation.schemas.validation_response_schema import ValidationResult +from variation.translators.translator import Translator + + +class ProteinReferenceAgree(Translator): + """The Protein Reference Agree Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.PROTEIN_REFERENCE_AGREE + + async def translate( + self, + validation_result: ValidationResult, + warnings: List[str], + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + # First will translate valid result to VRS Allele + classification: ProteinReferenceAgreeClassification = ( + validation_result.classification + ) + + translation_result = await self.get_p_or_cdna_translation_result( + endpoint_name, + validation_result, + classification.pos, + classification.pos, + AltType.REFERENCE_AGREE, + AnnotationLayer.PROTEIN, + warnings, + ref=classification.ref, + ) + return translation_result diff --git a/variation/translators/protein_stop_gain.py b/variation/translators/protein_stop_gain.py new file mode 100644 index 0000000..1d1fc7a --- /dev/null +++ b/variation/translators/protein_stop_gain.py @@ -0,0 +1,66 @@ +"""Module for Protein Stop Gain Translation.""" +from typing import List, Optional + +from cool_seq_tool.schemas import AnnotationLayer +from ga4gh.vrs import models + +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ( + ClassificationType, + ProteinStopGainClassification, +) +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.token_response_schema import AltType +from variation.schemas.translation_response_schema import TranslationResult +from variation.schemas.validation_response_schema import ValidationResult +from variation.translators.translator import Translator + + +class ProteinStopGain(Translator): + """The Protein Stop Gain Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.PROTEIN_STOP_GAIN + + async def translate( + self, + validation_result: ValidationResult, + warnings: List[str], + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + # First will translate valid result to VRS Allele + classification: ProteinStopGainClassification = validation_result.classification + + translation_result = await self.get_p_or_cdna_translation_result( + endpoint_name, + validation_result, + classification.pos, + classification.pos, + AltType.STOP_GAIN, + AnnotationLayer.PROTEIN, + warnings, + alt=classification.alt, + ) + return translation_result diff --git a/variation/translators/protein_substitution.py b/variation/translators/protein_substitution.py new file mode 100644 index 0000000..b58213b --- /dev/null +++ b/variation/translators/protein_substitution.py @@ -0,0 +1,69 @@ +"""Module for Protein Substitution Translation.""" +from typing import List, Optional + +from cool_seq_tool.schemas import AnnotationLayer +from ga4gh.vrs import models + +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ( + ClassificationType, + ProteinSubstitutionClassification, +) +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.token_response_schema import AltType +from variation.schemas.translation_response_schema import TranslationResult +from variation.schemas.validation_response_schema import ValidationResult +from variation.translators.translator import Translator + + +class ProteinSubstitution(Translator): + """The Protein Substitution Translator class.""" + + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + return classification_type == ClassificationType.PROTEIN_SUBSTITUTION + + async def translate( + self, + validation_result: ValidationResult, + warnings: List[str], + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + # First will translate valid result to VRS Allele + classification: ProteinSubstitutionClassification = ( + validation_result.classification + ) + + translation_result = await self.get_p_or_cdna_translation_result( + endpoint_name, + validation_result, + classification.pos, + classification.pos, + AltType.SUBSTITUTION, + AnnotationLayer.PROTEIN, + warnings, + ref=classification.ref, + alt=classification.alt, + ) + return translation_result diff --git a/variation/translators/translator.py b/variation/translators/translator.py new file mode 100644 index 0000000..2f01c01 --- /dev/null +++ b/variation/translators/translator.py @@ -0,0 +1,249 @@ +"""Module for translation.""" +from abc import ABC, abstractmethod +from typing import List, Optional, Union + +from cool_seq_tool.handlers import SeqRepoAccess +from cool_seq_tool.mappers import ManeTranscript +from cool_seq_tool.schemas import AnnotationLayer, ResidueMode +from cool_seq_tool.sources import UtaDatabase +from ga4gh.vrs import models + +from variation.hgvs_dup_del_mode import HGVSDupDelMode +from variation.schemas.app_schemas import Endpoint +from variation.schemas.classification_response_schema import ClassificationType +from variation.schemas.normalize_response_schema import HGVSDupDelModeOption +from variation.schemas.token_response_schema import AltType, GeneToken +from variation.schemas.translation_response_schema import ( + TranslationResult, + VrsSeqLocAcStatus, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.genomic_base import GenomicBase +from variation.vrs_representation import VRSRepresentation + + +class Translator(ABC): + """Class for translating to VRS representations""" + + def __init__( + self, + seqrepo_access: SeqRepoAccess, + mane_transcript: ManeTranscript, + uta: UtaDatabase, + vrs: VRSRepresentation, + hgvs_dup_del_mode: HGVSDupDelMode, + ) -> None: + """Initialize the Translator class. + + :param seqrepo_access: Access to SeqRepo data + :param mane_transcript: Access MANE Transcript information + :param uta: Access to UTA queries + :param vrs: Class for creating VRS objects + :param hgvs_dup_del_mode: Class for interpreting HGVS duplications and deletions + """ + self.seqrepo_access = seqrepo_access + self.uta = uta + self.genomic_base = GenomicBase(self.seqrepo_access, self.uta) + self.mane_transcript = mane_transcript + self.vrs = vrs + self.hgvs_dup_del_mode = hgvs_dup_del_mode + + @abstractmethod + def can_translate(self, classification_type: ClassificationType) -> bool: + """Determine if it's possible to translate a classification. + + :param classification_type: Classification type found + :return: `True` if `classification_type` matches translator's classification + type. Otherwise, `False` + """ + + @abstractmethod + async def translate( + self, + validation_result: ValidationResult, + endpoint_name: Optional[Endpoint] = None, + hgvs_dup_del_mode: HGVSDupDelModeOption = HGVSDupDelModeOption.DEFAULT, + baseline_copies: Optional[int] = None, + copy_change: Optional[models.CopyChange] = None, + do_liftover: bool = False, + ) -> Optional[TranslationResult]: + """Translate validation result to VRS representation + + :param validation_result: Validation result for a classification + :param endpoint_name: Name of endpoint that is being used + :param hgvs_dup_del_mode: Mode to use for interpreting HGVS duplications and + deletions + :param baseline_copies: The baseline copies for a copy number count variation + :param copy_change: The change for a copy number change variation + :param do_liftover: Whether or not to liftover to GRCh38 assembly + :return: Translation result if translation was successful. If translation was + not successful, `None` + """ + + def is_valid( + self, + gene_token: GeneToken, + alt_ac: str, + pos0: int, + pos1: int, + errors: List[str], + pos2: Optional[int] = None, + pos3: Optional[int] = None, + residue_mode: ResidueMode = ResidueMode.RESIDUE, + ) -> None: + """Check that positions are valid on a gene. Will mutate `errors` if invalid. + + :param gene_token: Gene token + :param alt_ac: Genomic RefSeq accession + :param pos0: Position 0 (GRCh38 assembly) + :param pos1: Position 1 (GRCh38 assembly) + :param errors: List of errors. Will get mutated if invalid. + :param pos2: Position 2 (GRCh38 assembly) + :param pos3: Position 3 (GRCh38 assembly) + :param residue_mode: Residue mode for positions + """ + gene_start = None + gene_end = None + + for ext in gene_token.gene.extensions: + if ext.name == "ensembl_locations": + if ext.value: + ensembl_loc = ext.value[0] + gene_start = ensembl_loc["start"] + gene_end = ensembl_loc["end"] - 1 + + if gene_start is None and gene_end is None: + errors.append( + f"gene-normalizer unable to find Ensembl location for: {gene_token.token}" # noqa: E501 + ) + + for pos in [pos0, pos1, pos2, pos3]: + if pos not in {"?", None}: + if residue_mode == ResidueMode.RESIDUE: + pos -= 1 + + if not (gene_start <= pos <= gene_end): + errors.append( + f"Inter-residue position {pos} out of index on {alt_ac} on gene, {gene_token.token}" # noqa: E501 + ) + + def validate_reference_sequence( + self, + ac: str, + start_pos: int, + end_pos: int, + expected_ref: str, + residue_mode: ResidueMode = ResidueMode.RESIDUE, + ) -> Optional[str]: + """Validate that expected reference sequence matches actual reference sequence + This is also in validator, but there is a ticket to have this method be moved + to cool-seq-tool. Once added, will be removed + + :param ac: Accession + :param start_pos: Start position + :param end_pos: End position + :param expected_ref: The expected reference sequence (from input string) + :param residue_mode: Residue mode for `start_pos` and `end_pos` + :return: Invalid message if invalid. If valid, `None` + """ + actual_ref, err_msg = self.seqrepo_access.get_reference_sequence( + ac, start=start_pos, end=end_pos, residue_mode=residue_mode + ) + + if not err_msg and (actual_ref != expected_ref): + err_msg = ( + f"Expected to find {expected_ref} at positions ({start_pos}, " + f"{end_pos}) on {ac} but found {actual_ref}" + ) + + return err_msg + + async def get_p_or_cdna_translation_result( + self, + endpoint_name: Endpoint, + validation_result: ValidationResult, + start_pos: int, + end_pos: int, + alt_type: AltType, + coordinate_type: Union[AnnotationLayer.PROTEIN, AnnotationLayer.CDNA], + errors: List[str], + cds_start: Optional[int] = None, + ref: Optional[str] = None, + alt: Optional[str] = None, + ) -> Optional[TranslationResult]: + """Get translation result for validation result. Used for unambiguous + variations on protein or cDNA coordinate types + + :param endpoint_name: Name of endpoint that is being used + :param validation_result: Validation result for a classification + :param start_pos: Start position (residue-mode) + :param end_pos: End position (residue-mode) + :param alt_type: Alteration type for validation result + :param coordinate_type: Coordinate type for validation result + :param errors: List of errors. Will be mutated if errors are found + :param cds_start: Coding start site. Only required for + `coordinate_type == AnnotationLayer.CDNA`. + :param ref: Expected reference sequence + :param alt: Expected change + :return: Translation result if successful. Else, `None` + """ + vrs_allele = None + vrs_seq_loc_ac = None + vrs_seq_loc_ac_status = VrsSeqLocAcStatus.NA + + if endpoint_name == Endpoint.NORMALIZE: + mane = await self.mane_transcript.get_mane_transcript( + validation_result.accession, + start_pos, + end_pos if end_pos is not None else start_pos, + coordinate_type, + try_longest_compatible=True, + residue_mode=ResidueMode.RESIDUE, + ref=ref, + ) + + if mane: + vrs_seq_loc_ac = mane.refseq + vrs_seq_loc_ac_status = mane.status + + try: + cds_start = mane.coding_start_site + except AttributeError: + cds_start = None + + vrs_allele = self.vrs.to_vrs_allele( + vrs_seq_loc_ac, + mane.pos[0], + mane.pos[1], + coordinate_type, + alt_type, + errors, + cds_start=cds_start, + alt=alt, + residue_mode=ResidueMode.INTER_RESIDUE, + ) + + if not vrs_allele: + vrs_seq_loc_ac = validation_result.accession + vrs_allele = self.vrs.to_vrs_allele( + vrs_seq_loc_ac, + start_pos, + end_pos, + coordinate_type, + alt_type, + errors, + cds_start=cds_start, + alt=alt, + residue_mode=ResidueMode.RESIDUE, + ) + + if vrs_allele and vrs_seq_loc_ac: + return TranslationResult( + vrs_variation=vrs_allele, + vrs_seq_loc_ac=vrs_seq_loc_ac, + vrs_seq_loc_ac_status=vrs_seq_loc_ac_status, + og_ac=validation_result.accession, + validation_result=validation_result, + ) + else: + return None diff --git a/variation/utils.py b/variation/utils.py new file mode 100644 index 0000000..0f08855 --- /dev/null +++ b/variation/utils.py @@ -0,0 +1,206 @@ +"""Module for general functionality throughout the app""" +import re +from typing import Dict, List, Literal, Optional, Tuple, Union + +from bioutils.sequences import aa1_to_aa3 as _aa1_to_aa3 +from bioutils.sequences import aa3_to_aa1 as _aa3_to_aa1 +from cool_seq_tool.handlers import SeqRepoAccess +from ga4gh.core import core_models + +from variation.schemas.app_schemas import AmbiguousRegexType +from variation.schemas.classification_response_schema import AmbiguousType +from variation.schemas.service_schema import ClinVarAssembly + + +def update_warnings_for_no_resp(label: str, warnings: List[str]) -> None: + """Mutate `warnings` when unable to return a response + + :param label: Initial input query + :param warnings: List of warnings to mutate + """ + if not warnings: + warnings.append(f"Unable to translate {label}") + + +def _get_priority_sequence_location( + locations: List[Dict], seqrepo_access: SeqRepoAccess +) -> Optional[Dict]: + """Get prioritized sequence location from list of locations + Will prioritize GRCh8 over GRCh37. Will also only support chromosomes. + + :param List[Dict] locations: List of Chromosome and Sequence Locations represented + as dictionaries + :param SeqRepoAccess seqrepo_access: Client to access seqrepo + :return: SequenceLocation represented as a dictionary if found + """ + locs = [loc for loc in locations if loc["type"] == "SequenceLocation"] + location = None + if locs: + if len(locs) > 1: + loc38, loc37 = None, None + for loc in locs: + seq_id = f"ga4gh:{loc['sequenceReference']['refgetAccession']}" + aliases, _ = seqrepo_access.translate_identifier(seq_id) + if aliases: + grch_aliases = [ + a for a in aliases if re.match(r"^GRCh3\d:chr(X|Y|\d+)$", a) + ] + if grch_aliases: + grch_alias = grch_aliases[0] + if grch_alias.startswith("GRCh38"): + loc38 = loc + elif grch_alias.startswith("GRCh37"): + loc37 = loc + location = loc38 or loc37 + else: + location = locs[0] + + if location: + # DynamoDB stores as Decimal, so need to convert to int + for k in {"start", "end"}: + location[k] = int(location[k]) + return location + + +def get_priority_sequence_location( + gene: core_models.Gene, seqrepo_access: SeqRepoAccess +) -> Optional[Dict]: + """Get prioritized sequence location from a gene + Will prioritize NCBI and then Ensembl. GRCh38 will be chosen over GRCh37. + + :param gene: GA4GH Core Gene + :param seqrepo_access: Client to access seqrepo + :return: Prioritized sequence location represented as a dictionary if found + """ + extensions = gene.extensions or [] + + # HGNC only provides ChromosomeLocation + ensembl_loc, ncbi_loc = None, None + for ext in extensions: + if ext.name == "ncbi_locations": + ncbi_loc = _get_priority_sequence_location(ext.value, seqrepo_access) + elif ext.name == "ensembl_locations": + ensembl_loc = _get_priority_sequence_location(ext.value, seqrepo_access) + return ncbi_loc or ensembl_loc + + +def get_aa1_codes(aa: str) -> Optional[str]: + """Get 1 letter AA codes given possible AA string (either 1 or 3 letter). + Will also validate the input AA string. + + :param aa: Input amino acid string. Case sensitive. + :return: Amino acid string represented using 1 letter AA codes if valid. If invalid, + will return None + """ + aa1 = None + if aa == "*": + aa1 = aa + else: + try: + # see if it's already 1 AA + _aa1_to_aa3(aa) + except KeyError: + # see if it's 3 AA + try: + aa1 = _aa3_to_aa1(aa) + except KeyError: + pass + else: + aa1 = aa + + return aa1 + + +def get_ambiguous_type( + pos0: Union[int, Literal["?"]], + pos1: Optional[Union[int, Literal["?"]]], + pos2: Union[int, Literal["?"]], + pos3: Optional[Union[int, Literal["?"]]], + ambiguous_regex_type: AmbiguousRegexType, +) -> Optional[AmbiguousType]: + """Get the ambiguous type given positions and regex used + + :param pos0: Position 0 + :param pos1: Position 1 + :param pos2: Position 2 + :param pos3: Position 3 + :param ambiguous_regex_type: The matched regex pattern + :return: Corresponding ambiguous type if a match is found. Else, `None` + """ + ambiguous_type = None + if ambiguous_regex_type == AmbiguousRegexType.REGEX_1: + if all( + ( + isinstance(pos0, int), + isinstance(pos1, int), + isinstance(pos2, int), + isinstance(pos3, int), + ) + ): + ambiguous_type = AmbiguousType.AMBIGUOUS_1 + elif all( + (pos0 == "?", isinstance(pos1, int), isinstance(pos2, int), pos3 == "?") + ): + ambiguous_type = AmbiguousType.AMBIGUOUS_2 + elif ambiguous_regex_type == AmbiguousRegexType.REGEX_2: + if all( + (pos0 == "?", isinstance(pos1, int), isinstance(pos2, int), pos3 is None) + ): + ambiguous_type = AmbiguousType.AMBIGUOUS_5 + elif ambiguous_regex_type == AmbiguousRegexType.REGEX_3: + if all( + (isinstance(pos0, int), pos1 is None, isinstance(pos2, int), pos3 == "?") + ): + ambiguous_type = AmbiguousType.AMBIGUOUS_7 + + return ambiguous_type + + +def get_assembly( + seqrepo_access: SeqRepoAccess, alt_ac: str +) -> Tuple[Optional[ClinVarAssembly], Optional[str]]: + """Get GRCh assembly for given genomic RefSeq accession + + :param seqrepo_access: Access to SeqRepo client + :param alt_ac: Genomic RefSeq accession + :return: Tuple containing the corresponding GRCh assembly, if found in SeqRepo and + optional warning message if an assembly is not found in SeqRepo + """ + assembly = None + warning = None + + grch38_aliases, _ = seqrepo_access.translate_identifier(alt_ac, "GRCh38") + if grch38_aliases: + assembly = ClinVarAssembly.GRCH38 + + grch37_aliases, _ = seqrepo_access.translate_identifier(alt_ac, "GRCh37") + if grch37_aliases: + assembly = ClinVarAssembly.GRCH37 + + if not assembly: + warning = f"Unable to get GRCh37/GRCh38 assembly for: {alt_ac}" + + return assembly, warning + + +def get_refget_accession( + seqrepo_access: SeqRepoAccess, alias: str, errors: List[str] +) -> Optional[str]: + """Get refget accession for a given alias + + :param seqrepo_access: Access to SeqRepo client + :param alias: Alias to translate + :param errors: List of errors. This will get mutated if an error occurs. + :return: RefGet Accession if successful, else `None` + """ + refget_accession = None + try: + ids = seqrepo_access.translate_sequence_identifier(alias, "ga4gh") + except KeyError as e: + errors.append(str(e)) + else: + if not ids: + errors.append(f"Unable to find ga4gh sequence identifiers for: {alias}") + + refget_accession = ids[0].split("ga4gh:")[-1] + return refget_accession diff --git a/variation/validate.py b/variation/validate.py new file mode 100644 index 0000000..088469d --- /dev/null +++ b/variation/validate.py @@ -0,0 +1,120 @@ +"""Module for Validation.""" +from typing import List + +from cool_seq_tool.handlers import SeqRepoAccess +from cool_seq_tool.sources import TranscriptMappings, UtaDatabase +from gene.query import QueryHandler as GeneQueryHandler + +from variation.schemas.classification_response_schema import Classification +from variation.schemas.validation_response_schema import ValidationSummary +from variation.validators import ( + Amplification, + CdnaDeletion, + CdnaDelIns, + CdnaInsertion, + CdnaReferenceAgree, + CdnaSubstitution, + GenomicDeletion, + GenomicDeletionAmbiguous, + GenomicDelIns, + GenomicDuplication, + GenomicDuplicationAmbiguous, + GenomicInsertion, + GenomicReferenceAgree, + GenomicSubstitution, + ProteinDeletion, + ProteinDelIns, + ProteinInsertion, + ProteinReferenceAgree, + ProteinStopGain, + ProteinSubstitution, +) +from variation.validators.validator import Validator + + +class Validate: + """The validation class.""" + + def __init__( + self, + seqrepo_access: SeqRepoAccess, + transcript_mappings: TranscriptMappings, + uta: UtaDatabase, + gene_normalizer: GeneQueryHandler, + ) -> None: + """Initialize the validate class. Will create an instance variable, + `validators`, which is a list of Validators for supported variation types. + + :param seqrepo_access: Access to SeqRepo data + :param transcript_mappings: Access to transcript mappings + :param uta: Access to UTA queries + :param gene_normalizer: Access to gene-normalizer + """ + params = [seqrepo_access, transcript_mappings, uta, gene_normalizer] + self.validators: List[Validator] = [ + ProteinSubstitution(*params), + CdnaSubstitution(*params), + GenomicSubstitution(*params), + ProteinStopGain(*params), + ProteinReferenceAgree(*params), + CdnaReferenceAgree(*params), + GenomicReferenceAgree(*params), + ProteinDelIns(*params), + CdnaDelIns(*params), + GenomicDelIns(*params), + ProteinDeletion(*params), + CdnaDeletion(*params), + GenomicDeletion(*params), + GenomicDeletionAmbiguous(*params), + ProteinInsertion(*params), + CdnaInsertion(*params), + GenomicInsertion(*params), + GenomicDuplication(*params), + GenomicDuplicationAmbiguous(*params), + Amplification(*params), + ] + + async def perform(self, classification: Classification) -> ValidationSummary: + """Get validation summary containing invalid and valid results for a + classification + + :param classification: A classification for a list of tokens + :return: Validation summary for classification containing valid and invalid + results + """ + valid_possibilities = [] + invalid_possibilities = [] + + found_valid_result = False + invalid_classification = None + + for validator in self.validators: + if validator.validates_classification_type( + classification.classification_type + ): + validation_results = await validator.validate(classification) + for validation_result in validation_results: + if validation_result.is_valid: + found_valid_result = True + valid_possibilities.append(validation_result) + else: + invalid_possibilities.append(validation_result) + invalid_classification = ( + classification.classification_type.value + ) + + if found_valid_result: + break + + if not found_valid_result: + warnings = [ + f"Unable to find valid result for classification: {invalid_classification}" # noqa: E501 + ] + else: + warnings = [] + + return ValidationSummary( + valid_results=valid_possibilities, + invalid_results=invalid_possibilities, + warnings=warnings, + ) diff --git a/variation/validators/__init__.py b/variation/validators/__init__.py new file mode 100644 index 0000000..3215eff --- /dev/null +++ b/variation/validators/__init__.py @@ -0,0 +1,22 @@ +"""Validator package level import.""" +from .amplification import Amplification +from .cdna_deletion import CdnaDeletion +from .cdna_delins import CdnaDelIns +from .cdna_insertion import CdnaInsertion +from .cdna_reference_agree import CdnaReferenceAgree +from .cdna_substitution import CdnaSubstitution +from .genomic_base import GenomicBase +from .genomic_deletion import GenomicDeletion +from .genomic_deletion_ambiguous import GenomicDeletionAmbiguous +from .genomic_delins import GenomicDelIns +from .genomic_duplication import GenomicDuplication +from .genomic_duplication_ambiguous import GenomicDuplicationAmbiguous +from .genomic_insertion import GenomicInsertion +from .genomic_reference_agree import GenomicReferenceAgree +from .genomic_substitution import GenomicSubstitution +from .protein_deletion import ProteinDeletion +from .protein_delins import ProteinDelIns +from .protein_insertion import ProteinInsertion +from .protein_reference_agree import ProteinReferenceAgree +from .protein_stop_gain import ProteinStopGain +from .protein_substitution import ProteinSubstitution diff --git a/variation/validators/amplification.py b/variation/validators/amplification.py new file mode 100644 index 0000000..70cd7a3 --- /dev/null +++ b/variation/validators/amplification.py @@ -0,0 +1,52 @@ +"""Module for Amplification validation""" +from typing import List + +from variation.schemas.classification_response_schema import ( + AmplificationClassification, + Classification, + ClassificationType, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class Amplification(Validator): + """The Insertion Validator Base class.""" + + async def get_valid_invalid_results( + self, classification: AmplificationClassification, accessions: List + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + # Does not require any validation + return [ + ValidationResult( + accession=None, classification=classification, is_valid=True, errors=[] + ) + ] + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Check that classification type can be validated by validator. + + :param ClassificationType classification_type: The type of variation + :return: `True` if classification_type matches validator's + classification type. `False` otherwise. + """ + return classification_type == ClassificationType.AMPLIFICATION + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List: + """Return empty list since amplification does not require accessions + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: Empty list + """ + return [] diff --git a/variation/validators/cdna_deletion.py b/variation/validators/cdna_deletion.py new file mode 100644 index 0000000..c93ed45 --- /dev/null +++ b/variation/validators/cdna_deletion.py @@ -0,0 +1,114 @@ +"""The module for cDNA Deletion Validation.""" +from typing import List + +from variation.schemas.classification_response_schema import ( + CdnaDeletionClassification, + Classification, + ClassificationType, + Nomenclature, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class CdnaDeletion(Validator): + """The cDNA Deletion Validator class.""" + + async def get_valid_invalid_results( + self, classification: CdnaDeletionClassification, accessions: List[str] + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + invalid_pos_msg = self.validate_5_prime_to_3_prime( + classification.pos0, pos1=classification.pos1 + ) + if invalid_pos_msg: + return [ + ValidationResult( + accession=None, + classification=classification, + is_valid=False, + errors=[invalid_pos_msg], + ) + ] + + validation_results = [] + + for c_ac in accessions: + errors = [] + cds_start, cds_start_err_msg = await self.get_cds_start(c_ac) + + if cds_start_err_msg: + errors.append(cds_start_err_msg) + else: + if classification.nomenclature in { + Nomenclature.FREE_TEXT, + Nomenclature.HGVS, + }: + # # validate deleted sequence + # HGVS deleted sequence includes start and end + start = cds_start + classification.pos0 + end = ( + cds_start + classification.pos1 + if classification.pos1 is not None + else start + ) + if classification.deleted_sequence: + invalid_del_seq_msg = self.validate_reference_sequence( + c_ac, + start, + end_pos=end, + expected_ref=classification.deleted_sequence, + ) + + if invalid_del_seq_msg: + errors.append(invalid_del_seq_msg) + else: + # Validate accession and positions + invalid_ac_pos_msg = self.validate_ac_and_pos( + c_ac, + start, + end_pos=end, + ) + if invalid_ac_pos_msg: + errors.append(invalid_ac_pos_msg) + + validation_results.append( + ValidationResult( + accession=c_ac, + classification=classification, + cds_start=cds_start, + is_valid=not errors, + errors=errors, + ) + ) + + return validation_results + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Return whether or not the classification type is cdna deletion.""" + return classification_type == ClassificationType.CDNA_DELETION + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + if classification.nomenclature == Nomenclature.HGVS: + accessions = [classification.ac] + else: + accessions = self.get_cdna_accessions(classification.gene_token, errors) + return accessions diff --git a/variation/validators/cdna_delins.py b/variation/validators/cdna_delins.py new file mode 100644 index 0000000..a939394 --- /dev/null +++ b/variation/validators/cdna_delins.py @@ -0,0 +1,93 @@ +"""The module for Cdna DelIns Validation.""" +from typing import List + +from variation.schemas.classification_response_schema import ( + CdnaDelInsClassification, + Classification, + ClassificationType, + Nomenclature, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class CdnaDelIns(Validator): + """The Cdna DelIns Validator class.""" + + async def get_valid_invalid_results( + self, classification: CdnaDelInsClassification, accessions: List[str] + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + invalid_pos_msg = self.validate_5_prime_to_3_prime( + classification.pos0, pos1=classification.pos1 + ) + if invalid_pos_msg: + return [ + ValidationResult( + accession=None, + classification=classification, + is_valid=False, + errors=[invalid_pos_msg], + ) + ] + + validation_results = [] + + for c_ac in accessions: + errors = [] + cds_start, cds_start_err_msg = await self.get_cds_start(c_ac) + + if cds_start_err_msg: + errors.append(cds_start_err_msg) + else: + # Validate accession and positions + invalid_ac_pos_msg = self.validate_ac_and_pos( + c_ac, + cds_start + classification.pos0, + end_pos=cds_start + classification.pos1 + if classification.pos1 + else None, + ) + if invalid_ac_pos_msg: + errors.append(invalid_ac_pos_msg) + + validation_results.append( + ValidationResult( + accession=c_ac, + classification=classification, + cds_start=cds_start, + is_valid=not errors, + errors=errors, + ) + ) + + return validation_results + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Return whether or not the classification type is cdna delins.""" + return classification_type == ClassificationType.CDNA_DELINS + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + if classification.nomenclature == Nomenclature.HGVS: + accessions = [classification.ac] + else: + accessions = self.get_cdna_accessions(classification.gene_token, errors) + return accessions diff --git a/variation/validators/cdna_insertion.py b/variation/validators/cdna_insertion.py new file mode 100644 index 0000000..d285d41 --- /dev/null +++ b/variation/validators/cdna_insertion.py @@ -0,0 +1,91 @@ +"""The module for Cdna Insertion Validation.""" +from typing import List + +from variation.schemas.classification_response_schema import ( + CdnaInsertionClassification, + Classification, + ClassificationType, + Nomenclature, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class CdnaInsertion(Validator): + """The Cdna Insertion Validator class.""" + + async def get_valid_invalid_results( + self, classification: CdnaInsertionClassification, accessions: List[str] + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + invalid_pos_msg = self.validate_5_prime_to_3_prime( + classification.pos0, pos1=classification.pos1 + ) + if invalid_pos_msg: + return [ + ValidationResult( + accession=None, + classification=classification, + is_valid=False, + errors=[invalid_pos_msg], + ) + ] + + validation_results = [] + + for c_ac in accessions: + errors = [] + cds_start, cds_start_err_msg = await self.get_cds_start(c_ac) + + if cds_start_err_msg: + errors.append(cds_start_err_msg) + else: + # Validate accession and positions + invalid_ac_pos_msg = self.validate_ac_and_pos( + c_ac, + cds_start + classification.pos0, + end_pos=cds_start + classification.pos1, + ) + if invalid_ac_pos_msg: + errors.append(invalid_ac_pos_msg) + + validation_results.append( + ValidationResult( + accession=c_ac, + classification=classification, + cds_start=cds_start, + is_valid=not errors, + errors=errors, + ) + ) + + return validation_results + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Return whether or not the classification type is cdna insertion.""" + return classification_type == ClassificationType.CDNA_INSERTION + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + if classification.nomenclature == Nomenclature.HGVS: + accessions = [classification.ac] + else: + accessions = self.get_cdna_accessions(classification.gene_token, errors) + return accessions diff --git a/variation/validators/cdna_reference_agree.py b/variation/validators/cdna_reference_agree.py new file mode 100644 index 0000000..17d1657 --- /dev/null +++ b/variation/validators/cdna_reference_agree.py @@ -0,0 +1,75 @@ +"""The module for Cdna Substitution Validation.""" +from typing import List + +from variation.schemas.classification_response_schema import ( + CdnaReferenceAgreeClassification, + Classification, + ClassificationType, + Nomenclature, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class CdnaReferenceAgree(Validator): + """The Cdna Reference Agree Validator class.""" + + async def get_valid_invalid_results( + self, classification: CdnaReferenceAgreeClassification, accessions: List[str] + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + validation_results = [] + + for c_ac in accessions: + errors = [] + cds_start, cds_start_err_msg = await self.get_cds_start(c_ac) + + if cds_start_err_msg: + errors.append(cds_start_err_msg) + else: + invalid_ac_pos_msg = self.validate_ac_and_pos( + c_ac, cds_start + classification.pos + ) + if invalid_ac_pos_msg: + errors.append(invalid_ac_pos_msg) + + validation_results.append( + ValidationResult( + accession=c_ac, + classification=classification, + cds_start=cds_start, + is_valid=not errors, + errors=errors, + ) + ) + + return validation_results + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Return whether or not the classification type is cdna reference agree.""" + return classification_type == ClassificationType.CDNA_REFERENCE_AGREE + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + if classification.nomenclature == Nomenclature.HGVS: + accessions = [classification.ac] + else: + accessions = self.get_cdna_accessions(classification.gene_token, errors) + return accessions diff --git a/variation/validators/cdna_substitution.py b/variation/validators/cdna_substitution.py new file mode 100644 index 0000000..5c887c4 --- /dev/null +++ b/variation/validators/cdna_substitution.py @@ -0,0 +1,78 @@ +"""The module for cDNA Substitution Validation.""" +from typing import List + +from variation.schemas.classification_response_schema import ( + CdnaSubstitutionClassification, + Classification, + ClassificationType, + Nomenclature, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class CdnaSubstitution(Validator): + """The cDNA Substitution Validator class.""" + + async def get_valid_invalid_results( + self, classification: CdnaSubstitutionClassification, accessions: List[str] + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + validation_results = [] + + for c_ac in accessions: + errors = [] + cds_start, cds_start_err_msg = await self.get_cds_start(c_ac) + + if cds_start_err_msg: + errors.append(cds_start_err_msg) + else: + valid_ref_seq_msg = self.validate_reference_sequence( + c_ac, + classification.pos + cds_start, + classification.pos + cds_start, + classification.ref, + ) + if valid_ref_seq_msg: + errors.append(valid_ref_seq_msg) + + validation_results.append( + ValidationResult( + accession=c_ac, + classification=classification, + cds_start=cds_start, + is_valid=not errors, + errors=errors, + ) + ) + + return validation_results + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Return whether or not the classification type is cdna substitution.""" + return classification_type == ClassificationType.CDNA_SUBSTITUTION + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + if classification.nomenclature == Nomenclature.HGVS: + accessions = [classification.ac] + else: + accessions = self.get_cdna_accessions(classification.gene_token, errors) + return accessions diff --git a/variation/validators/genomic_base.py b/variation/validators/genomic_base.py new file mode 100644 index 0000000..b0400d1 --- /dev/null +++ b/variation/validators/genomic_base.py @@ -0,0 +1,66 @@ +"""Module for Genomic Validation methods.""" +import logging +from typing import List, Optional + +from cool_seq_tool.handlers import SeqRepoAccess +from cool_seq_tool.sources import UtaDatabase + +from variation.schemas.classification_response_schema import ( + Classification, + Nomenclature, +) + +logger = logging.getLogger("variation") +logger.setLevel(logging.DEBUG) + + +class GenomicBase: + """Genomic Base class for validation methods.""" + + def __init__(self, seqrepo_access: SeqRepoAccess, uta: UtaDatabase) -> None: + """Initialize the Genomic base class. + + :param SeqRepoAccess seqrepo_access: Access to seqrepo + :param UtaDatabase uta: Access to UTA queries + """ + self.seqrepo_access = seqrepo_access + self.uta = uta + + """The Genomic Base class.""" + + async def get_nc_accessions(self, classification: Classification) -> List[str]: + """Get NC accession for a given classification.""" + if classification.nomenclature == Nomenclature.HGVS: + nc_accessions = [classification.ac] + elif classification.nomenclature == Nomenclature.FREE_TEXT: + nc_accessions = await self.uta.get_ac_from_gene( + classification.gene_token.matched_value + ) + elif classification.nomenclature == Nomenclature.GNOMAD_VCF: + gnomad_vcf_token = classification.matching_tokens[0] + chromosome = gnomad_vcf_token.chromosome + nc_accessions = [] + + for assembly in ["GRCh37", "GRCh38"]: + ac = self.get_nc_accession(f"{assembly}:{chromosome}") + if ac: + nc_accessions.append(ac) + else: + raise NotImplementedError + + return nc_accessions + + def get_nc_accession(self, identifier: str) -> Optional[str]: + """Given an identifier (assembly+chr), return nc accession.""" + nc_accession = None + try: + translated_identifiers, _ = self.seqrepo_access.translate_identifier( + identifier + ) + except KeyError: + logger.warning("Data Proxy unable to get metadata" f"for {identifier}") + else: + aliases = [a for a in translated_identifiers if a.startswith("refseq:NC_")] + if aliases: + nc_accession = aliases[0].split(":")[-1] + return nc_accession diff --git a/variation/validators/genomic_deletion.py b/variation/validators/genomic_deletion.py new file mode 100644 index 0000000..7b2b3f8 --- /dev/null +++ b/variation/validators/genomic_deletion.py @@ -0,0 +1,127 @@ +"""The module for Genomic Deletion Validation.""" +from typing import List + +from variation.schemas.classification_response_schema import ( + Classification, + ClassificationType, + GenomicDeletionClassification, + Nomenclature, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class GenomicDeletion(Validator): + """The Genomic Deletion Validator class.""" + + async def get_valid_invalid_results( + self, classification: GenomicDeletionClassification, accessions: List[str] + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + invalid_pos_msg = self.validate_5_prime_to_3_prime( + classification.pos0, pos1=classification.pos1 + ) + if invalid_pos_msg: + return [ + ValidationResult( + accession=None, + classification=classification, + is_valid=False, + errors=[invalid_pos_msg], + ) + ] + + validation_results = [] + + for alt_ac in accessions: + errors = [] + + invalid_ac_pos = self.validate_ac_and_pos( + alt_ac, classification.pos0, end_pos=classification.pos1 + ) + if invalid_ac_pos: + errors.append(invalid_ac_pos) + else: + if classification.nomenclature in { + Nomenclature.FREE_TEXT, + Nomenclature.HGVS, + }: + # Validate deleted sequence + # HGVS deleted sequence includes start and end + if classification.deleted_sequence: + invalid_del_seq_message = self.validate_reference_sequence( + alt_ac, + classification.pos0, + classification.pos1 + if classification.pos1 + else classification.pos0, + classification.deleted_sequence, + ) + + if invalid_del_seq_message: + errors.append(invalid_del_seq_message) + + if not errors: + if classification.nomenclature == Nomenclature.GNOMAD_VCF: + # Validate reference sequence + ref = classification.matching_tokens[0].ref + validate_ref_msg = self.validate_reference_sequence( + alt_ac, + classification.pos0 - 1, + end_pos=classification.pos0 + (len(ref) - 1), + expected_ref=ref, + ) + + if validate_ref_msg: + errors.append(validate_ref_msg) + + if not errors and classification.gene_token: + # Validate positions exist within gene range + invalid_gene_pos_msg = await self._validate_gene_pos( + classification.gene_token.matched_value, + alt_ac, + classification.pos0, + classification.pos1, + ) + if invalid_gene_pos_msg: + errors.append(invalid_gene_pos_msg) + + validation_results.append( + ValidationResult( + accession=alt_ac, + classification=classification, + is_valid=not errors, + errors=errors, + ) + ) + + return validation_results + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Return whether or not the classification type is genomic deletion""" + return classification_type == ClassificationType.GENOMIC_DELETION + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + if classification.nomenclature == Nomenclature.HGVS: + accessions = [classification.ac] + else: + accessions = await self.get_genomic_accessions(classification, errors) + return accessions diff --git a/variation/validators/genomic_deletion_ambiguous.py b/variation/validators/genomic_deletion_ambiguous.py new file mode 100644 index 0000000..b0ddb8a --- /dev/null +++ b/variation/validators/genomic_deletion_ambiguous.py @@ -0,0 +1,121 @@ +"""The module for Genomic Deletion Ambiguous Validation.""" +from typing import List + +from variation.schemas.classification_response_schema import ( + AmbiguousType, + Classification, + ClassificationType, + GenomicDeletionAmbiguousClassification, + Nomenclature, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class GenomicDeletionAmbiguous(Validator): + """The Genomic Deletion Ambiguous Validator class.""" + + async def get_valid_invalid_results( + self, + classification: GenomicDeletionAmbiguousClassification, + accessions: List[str], + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + # Validate ambiguous type and positions + invalid_classification_msg = self.validate_ambiguous_classification( + classification + ) + if invalid_classification_msg: + return [ + ValidationResult( + accession=None, + classification=classification, + is_valid=False, + errors=[invalid_classification_msg], + ) + ] + + validation_results = [] + + for alt_ac in accessions: + errors = [] + + if classification.ambiguous_type == AmbiguousType.AMBIGUOUS_1: + start_pos = classification.pos0 + end_pos = classification.pos3 + elif classification.ambiguous_type == AmbiguousType.AMBIGUOUS_2: + start_pos = classification.pos1 + end_pos = classification.pos2 + elif classification.ambiguous_type == AmbiguousType.AMBIGUOUS_5: + start_pos = classification.pos1 + end_pos = classification.pos2 + elif classification.ambiguous_type == AmbiguousType.AMBIGUOUS_7: + start_pos = classification.pos0 + end_pos = classification.pos2 + else: + start_pos = None + end_pos = None + errors.append( + f"ambiguous type not supported: {classification.ambiguous_type}" + ) + + if start_pos is not None and end_pos is not None: + invalid_ac_pos = self.validate_ac_and_pos( + alt_ac, start_pos, end_pos=end_pos + ) + if invalid_ac_pos: + errors.append(invalid_ac_pos) + + if not errors and classification.gene_token: + invalid_gene_pos_msg = await self._validate_gene_pos( + classification.gene_token.matched_value, + alt_ac, + classification.pos0, + classification.pos1, + pos2=classification.pos2, + pos3=classification.pos3, + ) + if invalid_gene_pos_msg: + errors.append(invalid_gene_pos_msg) + + validation_results.append( + ValidationResult( + accession=alt_ac, + classification=classification, + is_valid=not errors, + errors=errors, + ) + ) + + return validation_results + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Return whether or not the classification type is genomic deletion + ambiguous + """ + return classification_type == ClassificationType.GENOMIC_DELETION_AMBIGUOUS + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + if classification.nomenclature == Nomenclature.HGVS: + accessions = [classification.ac] + else: + accessions = await self.get_genomic_accessions(classification, errors) + return accessions diff --git a/variation/validators/genomic_delins.py b/variation/validators/genomic_delins.py new file mode 100644 index 0000000..1e8f9b3 --- /dev/null +++ b/variation/validators/genomic_delins.py @@ -0,0 +1,100 @@ +"""The module for Genomic DelIns Validation.""" +from typing import List + +from variation.schemas.classification_response_schema import ( + Classification, + ClassificationType, + GenomicDelInsClassification, + Nomenclature, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class GenomicDelIns(Validator): + """The Genomic DelIns Validator class.""" + + async def get_valid_invalid_results( + self, classification: GenomicDelInsClassification, accessions: List[str] + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + invalid_pos_msg = self.validate_5_prime_to_3_prime( + classification.pos0, pos1=classification.pos1 + ) + if invalid_pos_msg: + return [ + ValidationResult( + accession=None, + classification=classification, + is_valid=False, + errors=[invalid_pos_msg], + ) + ] + + validation_results = [] + + if classification.nomenclature == Nomenclature.GNOMAD_VCF: + ref = classification.matching_tokens[0].ref + else: + ref = None + + for alt_ac in accessions: + errors = [] + + if ref: + # gnomAD VCF provides reference, so we should validate this + invalid_ref_msg = self.validate_reference_sequence( + alt_ac, + classification.pos0, + classification.pos1 if classification.pos1 else classification.pos0, + ref, + ) + if invalid_ref_msg: + errors.append(invalid_ref_msg) + else: + # Validate ac and pos + invalid_ac_pos = self.validate_ac_and_pos( + alt_ac, classification.pos0, end_pos=classification.pos1 + ) + if invalid_ac_pos: + errors.append(invalid_ac_pos) + + validation_results.append( + ValidationResult( + accession=alt_ac, + classification=classification, + is_valid=not errors, + errors=errors, + ) + ) + + return validation_results + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Return whether or not the classification type is genomic delins""" + return classification_type == ClassificationType.GENOMIC_DELINS + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + if classification.nomenclature == Nomenclature.HGVS: + accessions = [classification.ac] + else: + accessions = await self.get_genomic_accessions(classification, errors) + return accessions diff --git a/variation/validators/genomic_duplication.py b/variation/validators/genomic_duplication.py new file mode 100644 index 0000000..89d8225 --- /dev/null +++ b/variation/validators/genomic_duplication.py @@ -0,0 +1,95 @@ +"""The module for Genomic Duplication Validation.""" +from typing import List + +from variation.schemas.classification_response_schema import ( + Classification, + ClassificationType, + GenomicDuplicationClassification, + Nomenclature, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class GenomicDuplication(Validator): + """The Genomic Duplication Validator class.""" + + async def get_valid_invalid_results( + self, classification: GenomicDuplicationClassification, accessions: List[str] + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + invalid_pos_msg = self.validate_5_prime_to_3_prime( + classification.pos0, pos1=classification.pos1 + ) + + if invalid_pos_msg: + return [ + ValidationResult( + accession=None, + classification=classification, + is_valid=False, + errors=[invalid_pos_msg], + ) + ] + + validation_results = [] + + for alt_ac in accessions: + errors = [] + + if classification.gene_token: + invalid_gene_pos_msg = await self._validate_gene_pos( + classification.gene_token.matched_value, + alt_ac, + classification.pos0, + classification.pos1, + ) + if invalid_gene_pos_msg: + errors.append(invalid_gene_pos_msg) + + if not errors: + invalid_ac_pos = self.validate_ac_and_pos( + alt_ac, classification.pos0, end_pos=classification.pos1 + ) + if invalid_ac_pos: + errors.append(invalid_ac_pos) + + validation_results.append( + ValidationResult( + accession=alt_ac, + classification=classification, + is_valid=not errors, + errors=errors, + ) + ) + + return validation_results + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Return whether or not the classification type is genomic duplication""" + return classification_type == ClassificationType.GENOMIC_DUPLICATION + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + if classification.nomenclature == Nomenclature.HGVS: + accessions = [classification.ac] + else: + accessions = await self.get_genomic_accessions(classification, errors) + return accessions diff --git a/variation/validators/genomic_duplication_ambiguous.py b/variation/validators/genomic_duplication_ambiguous.py new file mode 100644 index 0000000..ea52239 --- /dev/null +++ b/variation/validators/genomic_duplication_ambiguous.py @@ -0,0 +1,121 @@ +"""The module for Genomic Duplication Ambiguous Validation.""" +from typing import List + +from variation.schemas.classification_response_schema import ( + AmbiguousType, + Classification, + ClassificationType, + GenomicDuplicationAmbiguousClassification, + Nomenclature, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class GenomicDuplicationAmbiguous(Validator): + """The Genomic Duplication Ambiguous Validator class.""" + + async def get_valid_invalid_results( + self, + classification: GenomicDuplicationAmbiguousClassification, + accessions: List[str], + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + # Validate ambiguous type and positions + invalid_classification_msg = self.validate_ambiguous_classification( + classification + ) + if invalid_classification_msg: + return [ + ValidationResult( + accession=None, + classification=classification, + is_valid=False, + errors=[invalid_classification_msg], + ) + ] + + validation_results = [] + + for alt_ac in accessions: + errors = [] + + if classification.ambiguous_type == AmbiguousType.AMBIGUOUS_1: + start_pos = classification.pos0 + end_pos = classification.pos3 + elif classification.ambiguous_type == AmbiguousType.AMBIGUOUS_2: + start_pos = classification.pos1 + end_pos = classification.pos2 + elif classification.ambiguous_type == AmbiguousType.AMBIGUOUS_5: + start_pos = classification.pos1 + end_pos = classification.pos2 + elif classification.ambiguous_type == AmbiguousType.AMBIGUOUS_7: + start_pos = classification.pos0 + end_pos = classification.pos2 + else: + start_pos = None + end_pos = None + errors.append( + f"ambiguous type not supported: {classification.ambiguous_type}" + ) + + if start_pos is not None and end_pos is not None: + invalid_ac_pos = self.validate_ac_and_pos( + alt_ac, start_pos, end_pos=end_pos + ) + if invalid_ac_pos: + errors.append(invalid_ac_pos) + + if not errors and classification.gene_token: + invalid_gene_pos_msg = await self._validate_gene_pos( + classification.gene_token.matched_value, + alt_ac, + classification.pos0, + classification.pos1, + pos2=classification.pos2, + pos3=classification.pos3, + ) + if invalid_gene_pos_msg: + errors.append(invalid_gene_pos_msg) + + validation_results.append( + ValidationResult( + accession=alt_ac, + classification=classification, + is_valid=not errors, + errors=errors, + ) + ) + + return validation_results + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Return whether or not the classification type is genomic duplication + ambiguous + """ + return classification_type == ClassificationType.GENOMIC_DUPLICATION_AMBIGUOUS + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + if classification.nomenclature == Nomenclature.HGVS: + accessions = [classification.ac] + else: + accessions = await self.get_genomic_accessions(classification, errors) + return accessions diff --git a/variation/validators/genomic_insertion.py b/variation/validators/genomic_insertion.py new file mode 100644 index 0000000..262128e --- /dev/null +++ b/variation/validators/genomic_insertion.py @@ -0,0 +1,100 @@ +"""The module for Genomic Insertion Validation.""" +from typing import List + +from variation.schemas.classification_response_schema import ( + Classification, + ClassificationType, + GenomicInsertionClassification, + Nomenclature, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class GenomicInsertion(Validator): + """The Genomic Insertion Validator class.""" + + async def get_valid_invalid_results( + self, classification: GenomicInsertionClassification, accessions: List[str] + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + invalid_pos_msg = self.validate_5_prime_to_3_prime( + classification.pos0, pos1=classification.pos1 + ) + if invalid_pos_msg: + return [ + ValidationResult( + accession=None, + classification=classification, + is_valid=False, + errors=[invalid_pos_msg], + ) + ] + + validation_results = [] + + if classification.nomenclature == Nomenclature.GNOMAD_VCF: + ref = classification.matching_tokens[0].ref + else: + ref = None + + for alt_ac in accessions: + errors = [] + + if ref: + # gnomAD VCF provides reference, so we should validate this + invalid_ref_msg = self.validate_reference_sequence( + alt_ac, + classification.pos0, + end_pos=classification.pos1, + expected_ref=ref, + ) + if invalid_ref_msg: + errors.append(invalid_ref_msg) + else: + # Validate ac and pos + invalid_ac_pos_msg = self.validate_ac_and_pos( + alt_ac, classification.pos0, end_pos=classification.pos1 + ) + if invalid_ac_pos_msg: + errors.append(invalid_ac_pos_msg) + + validation_results.append( + ValidationResult( + accession=alt_ac, + classification=classification, + is_valid=not errors, + errors=errors, + ) + ) + + return validation_results + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Return whether or not the classification type is genomic insertion""" + return classification_type == ClassificationType.GENOMIC_INSERTION + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + if classification.nomenclature == Nomenclature.HGVS: + accessions = [classification.ac] + else: + accessions = await self.get_genomic_accessions(classification, errors) + return accessions diff --git a/variation/validators/genomic_reference_agree.py b/variation/validators/genomic_reference_agree.py new file mode 100644 index 0000000..ec30c1d --- /dev/null +++ b/variation/validators/genomic_reference_agree.py @@ -0,0 +1,81 @@ +"""The module for Genomic Reference Agree Validation.""" +from typing import List + +from variation.schemas.classification_response_schema import ( + Classification, + ClassificationType, + GenomicReferenceAgreeClassification, + Nomenclature, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class GenomicReferenceAgree(Validator): + """The Genomic Reference Agree Validator class.""" + + async def get_valid_invalid_results( + self, classification: GenomicReferenceAgreeClassification, accessions: List[str] + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + validation_results = [] + + for alt_ac in accessions: + errors = [] + + if classification.nomenclature == Nomenclature.GNOMAD_VCF: + token = classification.matching_tokens[0] + ref = token.ref + start_pos = token.pos + end_pos = token.pos + (len(ref) - 1) + invalid_ref_msg = self.validate_reference_sequence( + alt_ac, start_pos, end_pos, ref + ) + if invalid_ref_msg: + errors.append(invalid_ref_msg) + else: + invalid_ac_pos_msg = self.validate_ac_and_pos( + alt_ac, classification.pos + ) + if invalid_ac_pos_msg: + errors.append(invalid_ac_pos_msg) + + validation_results.append( + ValidationResult( + accession=alt_ac, + classification=classification, + is_valid=not errors, + errors=errors, + ) + ) + + return validation_results + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Return whether or not the classification type is genomic reference agree""" + return classification_type == ClassificationType.GENOMIC_REFERENCE_AGREE + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + if classification.nomenclature == Nomenclature.HGVS: + accessions = [classification.ac] + else: + accessions = await self.get_genomic_accessions(classification, errors) + return accessions diff --git a/variation/validators/genomic_substitution.py b/variation/validators/genomic_substitution.py new file mode 100644 index 0000000..898750b --- /dev/null +++ b/variation/validators/genomic_substitution.py @@ -0,0 +1,78 @@ +"""The module for Genomic Substitution Validation.""" +from typing import List + +from variation.schemas.classification_response_schema import ( + Classification, + ClassificationType, + GenomicSubstitutionClassification, + Nomenclature, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class GenomicSubstitution(Validator): + """The Genomic Substitution Validator class.""" + + async def get_valid_invalid_results( + self, classification: GenomicSubstitutionClassification, accessions: List[str] + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + validation_results = [] + + if classification.nomenclature == Nomenclature.GNOMAD_VCF: + end_pos = classification.pos + (len(classification.alt) - 1) + else: + # HGVS is only 1 nuc + end_pos = classification.pos + + for alt_ac in accessions: + errors = [] + + valid_ref_seq_msg = self.validate_reference_sequence( + alt_ac, classification.pos, end_pos, classification.ref + ) + if valid_ref_seq_msg: + errors.append(valid_ref_seq_msg) + + validation_results.append( + ValidationResult( + accession=alt_ac, + classification=classification, + is_valid=not errors, + errors=errors, + ) + ) + + return validation_results + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Return whether or not the classification type is genomic + substitution. + """ + return classification_type == ClassificationType.GENOMIC_SUBSTITUTION + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + if classification.nomenclature == Nomenclature.HGVS: + accessions = [classification.ac] + else: + accessions = await self.get_genomic_accessions(classification, errors) + return accessions diff --git a/variation/validators/protein_deletion.py b/variation/validators/protein_deletion.py new file mode 100644 index 0000000..5756fb5 --- /dev/null +++ b/variation/validators/protein_deletion.py @@ -0,0 +1,127 @@ +"""The module for Protein Deletion Validation.""" +from typing import List + +from variation.schemas.classification_response_schema import ( + Classification, + ClassificationType, + Nomenclature, + ProteinDeletionClassification, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class ProteinDeletion(Validator): + """The Protein Deletion Validator class.""" + + async def get_valid_invalid_results( + self, classification: ProteinDeletionClassification, accessions: List[str] + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + invalid_pos_msg = self.validate_5_prime_to_3_prime( + classification.pos0, pos1=classification.pos1 + ) + if invalid_pos_msg: + return [ + ValidationResult( + accession=None, + classification=classification, + is_valid=False, + errors=[invalid_pos_msg], + ) + ] + + # Only HGVS Expressions are validated + # Free text is validated during tokenization + if classification.nomenclature == Nomenclature.HGVS: + invalid_classification_msgs = self.validate_protein_hgvs_classification( + classification + ) + if invalid_classification_msgs: + return [ + ValidationResult( + accession=None, + classification=classification, + is_valid=False, + errors=invalid_classification_msgs, + ) + ] + + validation_results = [] + + for p_ac in accessions: + errors = [] + + # Validate aa0 exists at pos0 on given protein accession + invalid_aa0_seq_msg = self.validate_reference_sequence( + p_ac, classification.pos0, classification.pos0, classification.aa0 + ) + if invalid_aa0_seq_msg: + errors.append(invalid_aa0_seq_msg) + + # Validate aa1 exists at pos1 + if classification.aa1 and classification.pos1: + invalid_aa1_seq_msg = self.validate_reference_sequence( + p_ac, classification.pos1, classification.pos1, classification.aa1 + ) + + if invalid_aa1_seq_msg: + errors.append(invalid_aa1_seq_msg) + + # Validate that deleted sequence matches expected + if classification.nomenclature in { + Nomenclature.FREE_TEXT, + Nomenclature.HGVS, + }: + # HGVS deleted sequence includes start and end + if classification.deleted_sequence: + if classification.pos1 is not None: + invalid_del_seq_msg = self.validate_reference_sequence( + p_ac, + classification.pos0, + classification.pos1, + classification.deleted_sequence, + ) + + if invalid_del_seq_msg: + errors.append(invalid_del_seq_msg) + + validation_results.append( + ValidationResult( + accession=p_ac, + classification=classification, + is_valid=not errors, + errors=errors, + ) + ) + + return validation_results + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Return whether or not the classification type is protein deletion.""" + return classification_type == ClassificationType.PROTEIN_DELETION + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + if classification.nomenclature == Nomenclature.HGVS: + accessions = [classification.ac] + else: + accessions = self.get_protein_accessions(classification.gene_token, errors) + return accessions diff --git a/variation/validators/protein_delins.py b/variation/validators/protein_delins.py new file mode 100644 index 0000000..3580185 --- /dev/null +++ b/variation/validators/protein_delins.py @@ -0,0 +1,109 @@ +"""The module for Protein DelIns Validation.""" +from typing import List + +from variation.schemas.classification_response_schema import ( + Classification, + ClassificationType, + Nomenclature, + ProteinDelInsClassification, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class ProteinDelIns(Validator): + """The Protein DelIns Validator class.""" + + async def get_valid_invalid_results( + self, classification: ProteinDelInsClassification, accessions: List[str] + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + invalid_pos_msg = self.validate_5_prime_to_3_prime( + classification.pos0, pos1=classification.pos1 + ) + if invalid_pos_msg: + return [ + ValidationResult( + accession=None, + classification=classification, + is_valid=False, + errors=[invalid_pos_msg], + ) + ] + + # Only HGVS Expressions are validated + # Free text is validated during tokenization + if classification.nomenclature == Nomenclature.HGVS: + invalid_classification_msgs = self.validate_protein_hgvs_classification( + classification + ) + if invalid_classification_msgs: + return [ + ValidationResult( + accession=None, + classification=classification, + is_valid=False, + errors=invalid_classification_msgs, + ) + ] + + validation_results = [] + + for p_ac in accessions: + errors = [] + + # Validate aa0 exists at pos0 on given + invalid_aa0_seq_msg = self.validate_reference_sequence( + p_ac, classification.pos0, classification.pos0, classification.aa0 + ) + if invalid_aa0_seq_msg: + errors.append(invalid_aa0_seq_msg) + + # Validate aa1 exists at pos1 + if classification.aa1 and classification.pos1: + invalid_aa1_seq_msg = self.validate_reference_sequence( + p_ac, classification.pos1, classification.pos1, classification.aa1 + ) + + if invalid_aa1_seq_msg: + errors.append(invalid_aa1_seq_msg) + + validation_results.append( + ValidationResult( + accession=p_ac, + classification=classification, + is_valid=not errors, + errors=errors, + ) + ) + + return validation_results + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Return whether or not the classification type is protein delins.""" + return classification_type == ClassificationType.PROTEIN_DELINS + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + if classification.nomenclature == Nomenclature.HGVS: + accessions = [classification.ac] + else: + accessions = self.get_protein_accessions(classification.gene_token, errors) + return accessions diff --git a/variation/validators/protein_insertion.py b/variation/validators/protein_insertion.py new file mode 100644 index 0000000..9a7087d --- /dev/null +++ b/variation/validators/protein_insertion.py @@ -0,0 +1,109 @@ +"""The module for Protein Insertion Validation.""" +from typing import List + +from variation.schemas.classification_response_schema import ( + Classification, + ClassificationType, + Nomenclature, + ProteinInsertionClassification, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class ProteinInsertion(Validator): + """The Protein Insertion Validator class.""" + + async def get_valid_invalid_results( + self, classification: ProteinInsertionClassification, accessions: List[str] + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + invalid_pos_msg = self.validate_5_prime_to_3_prime( + classification.pos0, pos1=classification.pos1 + ) + if invalid_pos_msg: + return [ + ValidationResult( + accession=None, + classification=classification, + is_valid=False, + errors=[invalid_pos_msg], + ) + ] + + # Only HGVS Expressions are validated + # Free text is validated during tokenization + if classification.nomenclature == Nomenclature.HGVS: + invalid_classification_msgs = self.validate_protein_hgvs_classification( + classification + ) + if invalid_classification_msgs: + return [ + ValidationResult( + accession=None, + classification=classification, + is_valid=False, + errors=invalid_classification_msgs, + ) + ] + + validation_results = [] + + for p_ac in accessions: + errors = [] + + # Validate aa0 exists at pos0 on given + invalid_aa0_seq_msg = self.validate_reference_sequence( + p_ac, classification.pos0, classification.pos0, classification.aa0 + ) + if invalid_aa0_seq_msg: + errors.append(invalid_aa0_seq_msg) + + # Validate aa1 exists at pos1 + if classification.aa1 and classification.pos1: + invalid_aa1_seq_msg = self.validate_reference_sequence( + p_ac, classification.pos1, classification.pos1, classification.aa1 + ) + + if invalid_aa1_seq_msg: + errors.append(invalid_aa1_seq_msg) + + validation_results.append( + ValidationResult( + accession=p_ac, + classification=classification, + is_valid=not errors, + errors=errors, + ) + ) + + return validation_results + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Return whether or not the classification type is protein insertion.""" + return classification_type == ClassificationType.PROTEIN_INSERTION + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + if classification.nomenclature == Nomenclature.HGVS: + accessions = [classification.ac] + else: + accessions = self.get_protein_accessions(classification.gene_token, errors) + return accessions diff --git a/variation/validators/protein_reference_agree.py b/variation/validators/protein_reference_agree.py new file mode 100644 index 0000000..2381114 --- /dev/null +++ b/variation/validators/protein_reference_agree.py @@ -0,0 +1,86 @@ +"""The module for Protein Reference Agree Validation.""" +from typing import List + +from variation.schemas.classification_response_schema import ( + Classification, + ClassificationType, + Nomenclature, + ProteinReferenceAgreeClassification, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class ProteinReferenceAgree(Validator): + """The Protein Reference Agree Validator class.""" + + async def get_valid_invalid_results( + self, classification: ProteinReferenceAgreeClassification, accessions: List[str] + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + # Only HGVS Expressions are validated + # Free text is validated during tokenization + if classification.nomenclature == Nomenclature.HGVS: + invalid_classification_msgs = self.validate_protein_hgvs_classification( + classification + ) + if invalid_classification_msgs: + return [ + ValidationResult( + accession=None, + classification=classification, + is_valid=False, + errors=invalid_classification_msgs, + ) + ] + + validation_results = [] + + for p_ac in accessions: + errors = [] + + valid_ref_seq_msg = self.validate_reference_sequence( + p_ac, classification.pos, classification.pos, classification.ref + ) + if valid_ref_seq_msg: + errors.append(valid_ref_seq_msg) + + validation_results.append( + ValidationResult( + accession=p_ac, + classification=classification, + is_valid=not errors, + errors=errors, + ) + ) + + return validation_results + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Return whether or not the classification type is protein reference agree.""" + return classification_type == ClassificationType.PROTEIN_REFERENCE_AGREE + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + if classification.nomenclature == Nomenclature.HGVS: + accessions = [classification.ac] + else: + accessions = self.get_protein_accessions(classification.gene_token, errors) + return accessions diff --git a/variation/validators/protein_stop_gain.py b/variation/validators/protein_stop_gain.py new file mode 100644 index 0000000..67bd912 --- /dev/null +++ b/variation/validators/protein_stop_gain.py @@ -0,0 +1,89 @@ +"""The module for Protein Stop Gain Validation.""" +from typing import List + +from variation.schemas.classification_response_schema import ( + Classification, + ClassificationType, + Nomenclature, + ProteinStopGainClassification, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class ProteinStopGain(Validator): + """The Protein Stop Gain Validator class.""" + + async def get_valid_invalid_results( + self, classification: ProteinStopGainClassification, accessions: List[str] + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + errors = [] + + # Only HGVS Expressions are validated + # Free text is validated during tokenization + # Don't need to validate alt, since we know it's '*' + if classification.nomenclature == Nomenclature.HGVS: + invalid_classification_msgs = self.validate_protein_hgvs_classification( + classification + ) + if invalid_classification_msgs: + return [ + ValidationResult( + accession=None, + classification=classification, + is_valid=False, + errors=invalid_classification_msgs, + ) + ] + + validation_results = [] + + for p_ac in accessions: + errors = [] + + valid_ref_seq_msg = self.validate_reference_sequence( + p_ac, classification.pos, classification.pos, classification.ref + ) + if valid_ref_seq_msg: + errors.append(valid_ref_seq_msg) + + validation_results.append( + ValidationResult( + accession=p_ac, + classification=classification, + is_valid=not errors, + errors=errors, + ) + ) + + return validation_results + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Return whether or not the classification type is protein stop gain.""" + return classification_type == ClassificationType.PROTEIN_STOP_GAIN + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + if classification.nomenclature == Nomenclature.HGVS: + accessions = [classification.ac] + else: + accessions = self.get_protein_accessions(classification.gene_token, errors) + return accessions diff --git a/variation/validators/protein_substitution.py b/variation/validators/protein_substitution.py new file mode 100644 index 0000000..ba4df8a --- /dev/null +++ b/variation/validators/protein_substitution.py @@ -0,0 +1,86 @@ +"""The module for Protein Substitution Validation.""" +from typing import List + +from variation.schemas.classification_response_schema import ( + Classification, + ClassificationType, + Nomenclature, + ProteinSubstitutionClassification, +) +from variation.schemas.validation_response_schema import ValidationResult +from variation.validators.validator import Validator + + +class ProteinSubstitution(Validator): + """The Protein Substitution Validator class.""" + + async def get_valid_invalid_results( + self, classification: ProteinSubstitutionClassification, accessions: List[str] + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + # Only HGVS Expressions are validated + # Free text is validated during tokenization + if classification.nomenclature == Nomenclature.HGVS: + invalid_classification_msgs = self.validate_protein_hgvs_classification( + classification + ) + if invalid_classification_msgs: + return [ + ValidationResult( + accession=None, + classification=classification, + is_valid=False, + errors=invalid_classification_msgs, + ) + ] + + validation_results = [] + + for p_ac in accessions: + errors = [] + + valid_ref_seq_msg = self.validate_reference_sequence( + p_ac, classification.pos, classification.pos, classification.ref + ) + if valid_ref_seq_msg: + errors.append(valid_ref_seq_msg) + + validation_results.append( + ValidationResult( + accession=p_ac, + classification=classification, + is_valid=not errors, + errors=errors, + ) + ) + + return validation_results + + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Return whether or not the classification type is protein substitution.""" + return classification_type == ClassificationType.PROTEIN_SUBSTITUTION + + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + if classification.nomenclature == Nomenclature.HGVS: + accessions = [classification.ac] + else: + accessions = self.get_protein_accessions(classification.gene_token, errors) + return accessions diff --git a/variation/validators/validator.py b/variation/validators/validator.py new file mode 100644 index 0000000..6d51a8f --- /dev/null +++ b/variation/validators/validator.py @@ -0,0 +1,436 @@ +"""Module for Validation.""" +from abc import ABC, abstractmethod +from typing import List, Literal, Optional, Tuple, Union + +from cool_seq_tool.handlers import SeqRepoAccess +from cool_seq_tool.schemas import ResidueMode +from cool_seq_tool.sources import TranscriptMappings, UtaDatabase +from gene.query import QueryHandler as GeneQueryHandler +from gene.schemas import SourceName + +from variation.schemas.classification_response_schema import ( + AmbiguousType, + Classification, + ClassificationType, + GenomicDeletionAmbiguousClassification, + GenomicDuplicationAmbiguousClassification, + ProteinDeletionClassification, + ProteinDelInsClassification, + ProteinInsertionClassification, + ProteinReferenceAgreeClassification, + ProteinStopGainClassification, + ProteinSubstitutionClassification, +) +from variation.schemas.token_response_schema import GeneToken +from variation.schemas.validation_response_schema import ValidationResult +from variation.utils import get_aa1_codes +from variation.validators.genomic_base import GenomicBase + + +class Validator(ABC): + """The validator class.""" + + def __init__( + self, + seqrepo_access: SeqRepoAccess, + transcript_mappings: TranscriptMappings, + uta: UtaDatabase, + gene_normalizer: GeneQueryHandler, + ) -> None: + """Initialize the DelIns validator. + + :param seqrepo_access: Access to SeqRepo data + :param transcript_mappings: Access to transcript mappings + :param uta: Access to UTA queries + :param gene_normalizer: Access to gene-normalizer + """ + self.transcript_mappings = transcript_mappings + self.seqrepo_access = seqrepo_access + self.uta = uta + self.genomic_base = GenomicBase(self.seqrepo_access, self.uta) + self.gene_normalizer = gene_normalizer + + @abstractmethod + async def get_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get accessions for a given classification. + If `classification.nomenclature == Nomenclature.HGVS`, will return the accession + in the HGVS expression. + Else, will get all accessions associated to the gene + + :param classification: The classification for list of tokens + :param errors: List of errors + :return: List of accessions + """ + + @abstractmethod + def validates_classification_type( + self, classification_type: ClassificationType + ) -> bool: + """Check that classification type can be validated by validator. + + :param ClassificationType classification_type: The type of variation + :return: `True` if classification_type matches validator's + classification type. `False` otherwise. + """ + + @abstractmethod + async def get_valid_invalid_results( + self, classification: Classification, accessions: List + ) -> List[ValidationResult]: + """Get list of validation results for a given classification and accessions + + :param classification: A classification for a list of tokens + :param accessions: A list of accessions for a classification + :return: List of validation results containing invalid and valid results + """ + + async def validate(self, classification: Classification) -> List[ValidationResult]: + """Get list of associated accessions for a classification. Use these accessions + to perform validation checks (pos exists, accession is valid, reference sequence + matches expected, etc). Gets list of validation results for a given + classification + + :param classification: A classification for a list of tokens + :return: List of validation results containing invalid and valid results + """ + errors = [] + + try: + # NC_ queries do not have gene tokens + accessions = await self.get_accessions(classification, errors) + except IndexError: + accessions = [] + + if errors: + return [ + ValidationResult( + accession=None, + classification=classification, + is_valid=False, + errors=errors, + ) + ] + validation_results = await self.get_valid_invalid_results( + classification, accessions + ) + return validation_results + + def get_protein_accessions(self, gene_token: GeneToken, errors: List) -> List[str]: + """Get accessions for variations with protein reference sequence. + + :param gene_token: Gene token for a classification + :param errors: List of errors + :return: List of possible protein accessions for the variation + """ + accessions = self.transcript_mappings.protein_transcripts(gene_token.token) + if not accessions: + errors.append( + f"No protein accessions found for gene symbol: {gene_token.token}" + ) + return accessions + + def get_cdna_accessions(self, gene_token: GeneToken, errors: List) -> List[str]: + """Get accessions for variations with cDNA reference sequence. + + :param gene_token: Gene token for a classification + :param errors: List of errors + :return: List of possible cDNA accessions for the variation + """ + accessions = self.transcript_mappings.coding_dna_transcripts(gene_token.token) + if not accessions: + errors.append( + f"No cDNA accessions found for gene symbol: {gene_token.token}" + ) + return accessions + + async def get_genomic_accessions( + self, classification: Classification, errors: List + ) -> List[str]: + """Get genomic RefSeq accessions for variations with genomic reference sequence. + + :param classification: Classification for a list of tokens + :param errors: List of errors + :return: List of possible genomic RefSeq accessions for the variation + """ + accessions = await self.genomic_base.get_nc_accessions(classification) + if not accessions: + errors.append("No genomic accessions found") + return accessions + + async def _validate_gene_pos( + self, + gene: str, + alt_ac: str, + pos0: int, + pos1: Optional[int], + pos2: Optional[int] = None, + pos3: Optional[int] = None, + residue_mode: ResidueMode = ResidueMode.RESIDUE, + ) -> Optional[str]: + """Validate whether free text genomic query is valid input. + If invalid input, add error to list of errors + + :param gene: Gene symbol + :param alt_ac: Genomic accession + :param pos0: Queried genomic position + :param pos1: Queried genomic position + :param pos2: Queried genomic position + :param pos3: Queried genomic position + :param residue_mode: Residue mode for positions + :return: Invalid error message if invalid. Else, `None` + """ + gene_start_end = {"start": None, "end": None} + resp = self.gene_normalizer.search(gene, incl=SourceName.ENSEMBL.value) + if resp.source_matches: + ensembl_resp = resp.source_matches[SourceName.ENSEMBL] + if all( + (ensembl_resp, ensembl_resp.records, ensembl_resp.records[0].locations) + ): + ensembl_loc = ensembl_resp.records[0].locations[0] + gene_start_end["start"] = ensembl_loc.start + gene_start_end["end"] = ensembl_loc.end - 1 + + if gene_start_end["start"] is None and gene_start_end["end"] is None: + return f"gene-normalizer unable to find Ensembl location for gene: {gene}" + else: + assembly = await self.uta.get_chr_assembly(alt_ac) + if assembly: + # Not in GRCh38 assembly. Gene normalizer only uses 38, so we + # need to liftover to GRCh37 coords + chromosome, assembly = assembly + for key in gene_start_end.keys(): + gene_pos = gene_start_end[key] + gene_pos_liftover = self.uta.liftover_38_to_37.convert_coordinate( + chromosome, gene_pos + ) + if gene_pos_liftover is None or len(gene_pos_liftover) == 0: + return f"{gene_pos} does not exist on {chromosome}" + else: + gene_start_end[key] = gene_pos_liftover[0][1] + + gene_start = gene_start_end["start"] + gene_end = gene_start_end["end"] + + for pos in [pos0, pos1, pos2, pos3]: + if pos not in ["?", None]: + if residue_mode == "residue": + pos -= 1 + if not (gene_start <= pos <= gene_end): + return f"Position {pos} out of index on {alt_ac} on gene, {gene}" # noqa: E501 + + def validate_reference_sequence( + self, + ac: str, + start_pos: int, + end_pos: int, + expected_ref: str, + residue_mode: ResidueMode = ResidueMode.RESIDUE, + ) -> Optional[str]: + """Validate that expected reference sequence matches actual reference sequence. + This is also in translator, but there is a ticket to have this method be moved + to cool-seq-tool. Once added, will be removed + + :param ac: Accession + :param start_pos: Start position + :param end_pos: End position + :param expected_ref: The expected reference sequence (from input string) + :param residue_mode: Residue mode for `start_pos` and `end_pos` + :return: Invalid message if invalid. If valid, `None` + """ + actual_ref, err_msg = self.seqrepo_access.get_reference_sequence( + ac, start=start_pos, end=end_pos, residue_mode=residue_mode + ) + + if not err_msg and (actual_ref != expected_ref): + err_msg = ( + f"Expected to find {expected_ref} at positions ({start_pos}, " + f"{end_pos}) on {ac} but found {actual_ref}" + ) + + return err_msg + + async def get_cds_start(self, ac: str) -> Tuple[Optional[int], Optional[str]]: + """Get coding start site for accession + + :param ac: Accession to get coding start site for + :return: Tuple containing coding start site (if successful) and errors + (if unsuccessful) + """ + cds_start_end = await self.uta.get_cds_start_end(ac) + + if cds_start_end: + cds_start = cds_start_end[0] + msg = None + else: + cds_start = None + msg = f"Unable to get CDS start for accession: {ac}" + + return cds_start, msg + + def validate_ac_and_pos( + self, + ac: str, + start_pos: int, + end_pos: Optional[int] = None, + residue_mode: ResidueMode = ResidueMode.RESIDUE, + ) -> Optional[str]: + """Validate that accession exists and that position(s) exist on accession + + :param ac: Accession + :param start_pos: Start position on accession + :param end_position: End position on accession + :param residue_mode: Residue mode for `start_pos` and `end_pos` + :return: If valid accession and position(s) on accession, `None`. If invalid + accession or invalid position(s) on accession, return error message + """ + if residue_mode == ResidueMode.RESIDUE: + start_pos -= 1 + + msg = None + ref_len = None + try: + if end_pos: + ref_len = len(self.seqrepo_access.sr[ac][start_pos:end_pos]) + else: + ref_len = len(self.seqrepo_access.sr[ac][start_pos]) + except KeyError: + msg = f"Accession does not exist in SeqRepo: {ac}" + except ValueError as e: + msg = f"{e} on accession ({ac})" + else: + if end_pos: + if not ref_len or (end_pos - start_pos != ref_len): + msg = f"Positions ({start_pos}, {end_pos}) not valid on accession ({ac})" # noqa: E501 + else: + if not ref_len: + msg = f"Position ({start_pos}) not valid on accession ({ac})" + + return msg + + @staticmethod + def validate_5_prime_to_3_prime( + pos0: int, + pos1: Optional[Union[int, Literal["?"]]], + pos2: Optional[Union[int, Literal["?"]]] = None, + pos3: Optional[Union[int, Literal["?"]]] = None, + ) -> Optional[str]: + """Validate that positions are unique and listed from 5' to 3' + + :param pos0: Position 0 + :param pos1: Position 1 + :param pos2: Position 2 + :param pos3: Position 3 + :return: Message if positions are not unique or not listed from 5' to 3'. + Else, `None` + """ + prev_pos = None + invalid_msg = None + for pos in [pos0, pos1, pos2, pos3]: + if pos not in {"?", None}: + if prev_pos is None: + prev_pos = pos + else: + if pos <= prev_pos: + invalid_msg = ( + "Positions should contain two different positions and " + "should be listed from 5' to 3'" + ) + break + else: + prev_pos = pos + return invalid_msg + + def validate_ambiguous_classification( + self, + classification: Union[ + GenomicDeletionAmbiguousClassification, + GenomicDuplicationAmbiguousClassification, + ], + ) -> Optional[str]: + """Validate that ambiguous type is supported and that positions are unique and + listed from 5' to 3' + + :param classification: Ambiguous duplication or deletion classification + :return: Message if ambiguous type is not supported, positions are not unique, + or if positions are not listed from 5' to 3'. Else, `None` + """ + invalid_msg = None + if classification.ambiguous_type not in { + AmbiguousType.AMBIGUOUS_1, + AmbiguousType.AMBIGUOUS_2, + AmbiguousType.AMBIGUOUS_5, + AmbiguousType.AMBIGUOUS_7, + }: + invalid_msg = f"{classification.ambiguous_type} is not yet supported" + else: + invalid_msg = self.validate_5_prime_to_3_prime( + classification.pos0, + pos1=classification.pos1, + pos2=classification.pos2, + pos3=classification.pos3, + ) + return invalid_msg + + def validate_protein_hgvs_classification( + self, + classification: Union[ + ProteinDelInsClassification, + ProteinDeletionClassification, + ProteinInsertionClassification, + ProteinReferenceAgreeClassification, + ProteinStopGainClassification, + ProteinSubstitutionClassification, + ], + ) -> List[str]: + """Validate protein HGVS classification + + :param classification: Classification + Will be mutated if used 3 letter AA codes + :return: List of invalid error messages if found + """ + errors = [] + + if hasattr(classification, "ref"): + aa1_ref = get_aa1_codes(classification.ref) + if aa1_ref: + classification.ref = aa1_ref + else: + errors.append(f"`ref` not valid amino acid(s): {classification.ref}") + + if hasattr(classification, "alt"): + aa1_alt = get_aa1_codes(classification.alt) + if aa1_alt: + classification.alt = aa1_alt + else: + errors.append(f"`alt` not valid amino acid(s): {classification.alt}") + + if hasattr(classification, "aa0"): + aa0_codes = get_aa1_codes(classification.aa0) + if aa0_codes: + classification.aa0 = aa0_codes + else: + errors.append(f"`aa0` not valid amino acid(s): {classification.aa0}") + + if hasattr(classification, "aa1"): + if classification.aa1: + aa1_codes = get_aa1_codes(classification.aa1) + if aa1_codes: + classification.aa1 = aa1_codes + else: + errors.append( + f"`aa1` not valid amino acid(s): {classification.aa1}" + ) + + if hasattr(classification, "inserted_sequence"): + ins_codes = get_aa1_codes(classification.inserted_sequence) + if ins_codes: + classification.inserted_sequence = ins_codes + else: + errors.append( + f"`inserted_sequence` not valid amino acid(s): " + f"{classification.inserted_sequence}" + ) + + return errors diff --git a/variation/version.py b/variation/version.py new file mode 100644 index 0000000..7b64f4f --- /dev/null +++ b/variation/version.py @@ -0,0 +1,2 @@ +"""Module for version of app""" +__version__ = "0.8.0-dev1" diff --git a/variation/vrs_representation.py b/variation/vrs_representation.py new file mode 100644 index 0000000..bde025f --- /dev/null +++ b/variation/vrs_representation.py @@ -0,0 +1,231 @@ +"""Module for generating VRS objects""" +from typing import Dict, List, Optional, Tuple, Union + +from cool_seq_tool.handlers import SeqRepoAccess +from cool_seq_tool.schemas import AnnotationLayer, ResidueMode +from ga4gh.core import ga4gh_identify +from ga4gh.vrs import models, normalize +from pydantic import ValidationError + +from variation.schemas.token_response_schema import ( + AMBIGUOUS_REGIONS, + AltType, +) +from variation.utils import get_refget_accession + + +class VRSRepresentation: + """Class for representing VRS objects""" + + def __init__(self, seqrepo_access: SeqRepoAccess) -> None: + """Initialize the VRSRepresentation class + + :param SeqRepoAccess seqrepo_access: Access to SeqRepo + """ + self.seqrepo_access = seqrepo_access + + @staticmethod + def get_start_end( + coordinate: str, start: int, end: int, cds_start: int, errors: List + ) -> Optional[Tuple[int, int]]: + """Get start and end coordinates. + + :param str coordinate: Coordinate used. Must be either `p`, `c`, or `g` + :param int start: Start position change + :param int end: End position change + :param int cds_start: Coding start site + :param List errors: List of errors + :return: Tuple[start, end] + """ + try: + start = int(start) + if end is None: + end = start + end = int(end) + except (ValueError, TypeError): + errors.append("Start/End must be valid ints") + return None + + if coordinate == "c": + if cds_start: + start += cds_start + end += cds_start + return start, end + + @staticmethod + def get_start_indef_range(start: int) -> models.Range: + """Return indefinite range given start coordinate + + :param int start: Start position (assumes 1-based) + :return: Range + """ + return models.Range([None, start - 1]) + + @staticmethod + def get_end_indef_range(end: int) -> models.Range: + """Return indefinite range given end coordinate + + :param int end: End position (assumes 1-based) + :return: Range model + """ + return models.Range([end, None]) + + @staticmethod + def get_sequence_loc( + refget_accession: str, + start: Union[int, models.Range], + end: Union[int, models.Range], + ) -> models.Location: + """Return VRS location + + :param refget_accession: Refget accession (SQ.) + :param start: start pos + :param end: end pos + :return: VRS Location model + """ + return models.SequenceLocation( + sequenceReference=models.SequenceReference( + refgetAccession=refget_accession + ), + start=start, + end=end, + ) + + def vrs_allele( + self, + ac: str, + start: Union[int, models.Range], + end: Union[int, models.Range], + sstate: Union[ + models.LiteralSequenceExpression, models.ReferenceLengthExpression + ], + alt_type: AltType, + errors: List[str], + ) -> Optional[Dict]: + """Create a VRS Allele object. + + :param ac: Accession + :param start: start pos + :param end: end pos + :param sstate: State + :param alt_type: Type of alteration + :param errors: List of errors + :return: VRS Allele object represented as a Dict + """ + refget_accession = get_refget_accession(self.seqrepo_access, ac, errors) + if not refget_accession: + return None + + try: + location = self.get_sequence_loc(refget_accession, start, end) + except ValueError as e: + errors.append(f"Unable to get sequence location: {e}") + return None + allele = models.Allele(location=location, state=sstate) + # Ambiguous regions do not get normalized + if alt_type not in AMBIGUOUS_REGIONS: + try: + allele = normalize(allele, self.seqrepo_access) + except (KeyError, AttributeError) as e: + errors.append(f"vrs-python unable to normalize allele: {e}") + return None + + if not allele: + errors.append("Unable to get allele") + return None + + allele.location.id = ga4gh_identify(allele.location) + allele.id = ga4gh_identify(allele) + allele_dict = allele.model_dump(exclude_none=True) + try: + models.Allele(**allele_dict) + except ValidationError as e: + errors.append(str(e)) + return None + else: + return allele_dict + + def to_vrs_allele( + self, + ac: str, + start: int, + end: int, + coordinate: AnnotationLayer, + alt_type: AltType, + errors: List[str], + cds_start: Optional[int] = None, + alt: Optional[str] = None, + residue_mode: ResidueMode = ResidueMode.RESIDUE, + ) -> Optional[Dict]: + """Translate accession and position to VRS Allele Object. + + :param ac: Accession + :param start: Start position change + :param end: End position change + :param coordinate: Coordinate used + :param alt_type: Type of alteration + :param errors: List of errors + :param cds_start: Coding start site + :param alt: Alteration + :param residue_mode: Residue mode for ``start`` and ``end`` positions + :return: VRS Allele Object + """ + coords = self.get_start_end(coordinate, start, end, cds_start, errors) + if not coords: + return None + if coords[0] > coords[1]: + new_end, new_start = coords + else: + new_start, new_end = coords + + if residue_mode == ResidueMode.RESIDUE: + new_start -= 1 + residue_mode = ResidueMode.INTER_RESIDUE + + # Right now, this follows HGVS conventions + # This will change once we support other representations + if alt_type == AltType.INSERTION: + state = alt + new_start += 1 + new_end = new_start + elif alt_type in { + AltType.SUBSTITUTION, + AltType.STOP_GAIN, + AltType.DELETION, + AltType.DELINS, + AltType.REFERENCE_AGREE, + AltType.NONSENSE, + }: + if alt_type == AltType.REFERENCE_AGREE: + state, _ = self.seqrepo_access.get_reference_sequence( + ac, start=new_start, end=new_end, residue_mode=residue_mode + ) + if state is None: + errors.append( + f"Unable to get sequence on {ac} from " f"{new_start}" + ) + return None + else: + state = alt or "" + + if alt_type == AltType.SUBSTITUTION: + # This accounts for MNVs + new_end += len(state) - 1 + + elif alt_type == AltType.DUPLICATION: + ref, _ = self.seqrepo_access.get_reference_sequence( + ac, start=new_start, end=new_end, residue_mode=residue_mode + ) + if ref is not None: + state = ref + ref + else: + errors.append( + f"Unable to get sequence on {ac} from {new_start} to {new_end + 1}" + ) + return None + else: + errors.append(f"alt_type not supported: {alt_type}") + return None + + sstate = models.LiteralSequenceExpression(sequence=state) + return self.vrs_allele(ac, new_start, new_end, sstate, alt_type, errors) diff --git a/variation_normalizer.egg-info/PKG-INFO b/variation_normalizer.egg-info/PKG-INFO new file mode 100644 index 0000000..78b28e0 --- /dev/null +++ b/variation_normalizer.egg-info/PKG-INFO @@ -0,0 +1,223 @@ +Metadata-Version: 2.1 +Name: variation-normalizer +Version: 0.8.0.dev1 +Summary: VICC normalization routine for variations +Home-page: https://github.com/cancervariants/variation-normalization +Author: VICC +Author-email: help@cancervariants.org +License: MIT +Project-URL: Changelog, https://github.com/cancervariants/variation-normalization/releases +Project-URL: Source, https://github.com/cancervariants/variation-normalization +Project-URL: Tracker, https://github.com/cancervariants/variation-normalization/issues +Classifier: Development Status :: 3 - Alpha +Classifier: Intended Audience :: Science/Research +Classifier: Intended Audience :: Developers +Classifier: Topic :: Scientific/Engineering :: Bio-Informatics +Classifier: License :: OSI Approved :: MIT License +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.7 +Requires-Python: >=3.7 +Description-Content-Type: text/markdown +License-File: LICENSE +Requires-Dist: biocommons.seqrepo +Requires-Dist: fastapi +Requires-Dist: uvicorn +Requires-Dist: pydantic==2.* +Requires-Dist: ga4gh.vrs[extras]~=2.0.0a2 +Requires-Dist: gene-normalizer~=0.3.0.dev1 +Requires-Dist: boto3 +Requires-Dist: cool-seq-tool~=0.4.0.dev1 +Requires-Dist: bioutils +Provides-Extra: dev +Requires-Dist: pytest; extra == "dev" +Requires-Dist: pytest-asyncio; extra == "dev" +Requires-Dist: pytest-cov; extra == "dev" +Requires-Dist: ruff; extra == "dev" +Requires-Dist: pre-commit; extra == "dev" +Requires-Dist: jupyter; extra == "dev" +Requires-Dist: ipykernel; extra == "dev" +Requires-Dist: psycopg2-binary; extra == "dev" +Requires-Dist: black; extra == "dev" + +# Variation Normalization + +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5894937.svg)](https://doi.org/10.5281/zenodo.5894937) + +Services and guidelines for normalizing variation terms to [VRS](https://vrs.ga4gh.org/en/latest) compatible representations. + +Public OpenAPI endpoint: + +Installing with pip: + +```shell +pip install variation-normalizer +``` + +The variation-normalization repo depends on VRS models, and therefore each variation-normalizer package on PyPI uses a particular version of VRS. The correspondences between packages may be summarized as: + +| variation-normalization branch | variation-normalizer version | gene-normalizer version | VRS version | +| ---- | --- | ---- | --- | +| [main](https://github.com/cancervariants/variation-normalization/tree/main) | 0.6.X | 0.1.X | [1.X.X](https://github.com/ga4gh/vrs) | +| [staging](https://github.com/cancervariants/variation-normalization/tree/staging) | 0.8.X | 0.3.X | [2.0-alpha](https://github.com/ga4gh/vrs/tree/2.0-alpha) | + +## About + +Variation Normalization works by using four main steps: tokenization, classification, validation, and translation. During tokenization, we split strings on whitespace and parse to determine the type of token. During classification, we specify the order of tokens a classification can have. We then do validation checks such as ensuring references for a nucleotide or amino acid matches the expected value and validating a position exists on the given transcript. During translation, we return a VRS Allele object. + +Variation Normalization is limited to the following types of variants: + +* HGVS expressions and text representations (ex: `BRAF V600E`): + * **protein (p.)**: substitution, deletion, insertion, deletion-insertion + * **coding DNA (c.)**: substitution, deletion, insertion, deletion-insertion + * **genomic (g.)**: substitution, deletion, ambiguous deletion, insertion, deletion-insertion, duplication +* gnomAD-style VCF (chr-pos-ref-alt, ex: `7-140753336-A-T`) + * **genomic (g.)**: substitution, deletion, insertion + +Variation Normalizer accepts input from GRCh37 or GRCh8 assemblies. + +We are working towards adding more types of variations, coordinates, and representations. + +### Endpoints + +#### `/to_vrs` + +Returns a list of validated VRS [Variations](https://vrs.ga4gh.org/en/stable/terms_and_model.html#variation). + +#### `/normalize` + +Returns a VRS Variation aligned to the prioritized transcript. The Variation Normalizer relies on [**C**ommon **O**perations **O**n **L**ots-of **Seq**uences Tool (cool-seq-tool)](https://github.com/GenomicMedLab/cool-seq-tool) for retrieving the prioritized transcript data. More information on the transcript selection algorithm can be found [here](https://github.com/GenomicMedLab/cool-seq-tool/blob/main/docs/TranscriptSelectionPriority.md). + +If a genomic variation query _is_ given a gene (E.g. `BRAF g.140753336A>T`), the associated cDNA representation will be returned. This is because the gene provides additional strand context. If a genomic variation query is _not_ given a gene, the GRCh38 representation will be returned. + +## Developer Instructions + +Clone the repo: + +```shell +git clone https://github.com/cancervariants/variation-normalization.git +cd variation-normalization +``` + +For a development install, we recommend using Pipenv. See the +[pipenv docs](https://pipenv-fork.readthedocs.io/en/latest/#install-pipenv-today) +for direction on installing pipenv in your compute environment. + +Once installed, from the project root dir, just run: + +```shell +pipenv shell +pipenv update && pipenv install --dev +``` + +### Backend Services + +Variation Normalization relies on some local data caches which you will need to set up. It uses pipenv to manage its environment, which you will also need to install. + +#### Gene Normalizer + +Variation Normalization relies on data from [Gene Normalization](https://github.com/cancervariants/gene-normalization). You must load all sources _and_ merged concepts. + +You must also have Gene Normalization's DynamoDB running in a separate terminal for the application to work. + +For more information about the gene-normalizer and how to load the database, visit the [README](https://github.com/cancervariants/gene-normalization/blob/main/README.md). + +#### SeqRepo +Variation Normalization relies on [seqrepo](https://github.com/biocommons/biocommons.seqrepo), which you must download yourself. + +Variation Normalizer uses seqrepo to retrieve sequences at given positions on a transcript. + +From the _root_ directory: + +```shell +pip install seqrepo +sudo mkdir /usr/local/share/seqrepo +sudo chown $USER /usr/local/share/seqrepo +seqrepo pull -i 2021-01-29 # Replace with latest version using `seqrepo list-remote-instances` if outdated +``` + +If you get an error similar to the one below: + +```shell +PermissionError: [Error 13] Permission denied: '/usr/local/share/seqrepo/2021-01-29._fkuefgd' -> '/usr/local/share/seqrepo/2021-01-29' +``` + +You will want to do the following:\ +(*Might not be ._fkuefgd, so replace with your error message path*) + +```shell +sudo mv /usr/local/share/seqrepo/2021-01-29._fkuefgd /usr/local/share/seqrepo/2021-01-29 +exit +``` + +Use the `SEQREPO_ROOT_DIR` environment variable to set the path of an already existing SeqRepo directory. The default is `/usr/local/share/seqrepo/latest`. + +#### UTA + +Variation Normalizer also uses [**C**ommon **O**perations **O**n **L**ots-of **Seq**uences Tool (cool-seq-tool)](https://github.com/GenomicMedLab/cool-seq-tool) which uses [UTA](https://github.com/biocommons/uta) as the underlying PostgreSQL database. + +_The following commands will likely need modification appropriate for the installation environment._ + +1. Install [PostgreSQL](https://www.postgresql.org/) +2. Create user and database. + + ```shell + createuser -U postgres uta_admin + createuser -U postgres anonymous + createdb -U postgres -O uta_admin uta + ``` + +3. To install locally, from the _variation/data_ directory: + +```shell +export UTA_VERSION=uta_20210129.pgd.gz +curl -O http://dl.biocommons.org/uta/$UTA_VERSION +gzip -cdq ${UTA_VERSION} | grep -v "^REFRESH MATERIALIZED VIEW" | psql -h localhost -U uta_admin --echo-errors --single-transaction -v ON_ERROR_STOP=1 -d uta -p 5433 +``` + +##### UTA Installation Issues + +If you have trouble installing UTA, you can visit [these two READMEs](https://github.com/ga4gh/vrs-python/tree/main/docs/setup_help). + +##### Connecting to the UTA database + +To connect to the UTA database, you can use the default url (`postgresql://uta_admin@localhost:5433/uta/uta_20210129`). If you do not wish to use the default, you must set the environment variable `UTA_DB_URL` which has the format of `driver://user:pass@host:port/database/schema`. + +## Starting the Variation Normalization Service Locally + +`gene-normalizer`s dynamodb and the `uta` database must be running. + +To start the service, run the following: + +```shell +uvicorn variation.main:app --reload +``` + +Next, view the OpenAPI docs on your local machine: + + +### Init coding style tests + +Code style is managed by [Ruff](https://github.com/astral-sh/ruff) and checked prior to commit. + +We use [pre-commit](https://pre-commit.com/#usage) to run conformance tests. + +This ensures: + +* Check code style +* Check for added large files +* Detect AWS Credentials +* Detect Private Key + +Before first commit run: + +```shell +pre-commit install +``` + +### Testing + +From the _root_ directory of the repository: + +```shell +pytest tests/ +``` diff --git a/variation_normalizer.egg-info/SOURCES.txt b/variation_normalizer.egg-info/SOURCES.txt new file mode 100644 index 0000000..45525b5 --- /dev/null +++ b/variation_normalizer.egg-info/SOURCES.txt @@ -0,0 +1,139 @@ +LICENSE +README.md +pyproject.toml +setup.cfg +setup.py +tests/__init__.py +tests/conftest.py +tests/test_classifier.py +tests/test_hgvs_dup_del_mode.py +tests/test_normalize.py +tests/test_tokenizer.py +tests/test_translator.py +tests/test_validator.py +variation/__init__.py +variation/classify.py +variation/hgvs_dup_del_mode.py +variation/main.py +variation/normalize.py +variation/query.py +variation/regex.py +variation/to_copy_number_variation.py +variation/to_vrs.py +variation/tokenize.py +variation/translate.py +variation/utils.py +variation/validate.py +variation/version.py +variation/vrs_representation.py +variation/classifiers/__init__.py +variation/classifiers/amplification_classifier.py +variation/classifiers/cdna_deletion_classifier.py +variation/classifiers/cdna_delins_classifier.py +variation/classifiers/cdna_insertion_classifier.py +variation/classifiers/cdna_reference_agree_classifier.py +variation/classifiers/cdna_substitution_classifier.py +variation/classifiers/classifier.py +variation/classifiers/genomic_deletion_ambiguous.py +variation/classifiers/genomic_deletion_classifier.py +variation/classifiers/genomic_delins_classifier.py +variation/classifiers/genomic_duplication_ambiguous.py +variation/classifiers/genomic_duplication_classifier.py +variation/classifiers/genomic_insertion_classifier.py +variation/classifiers/genomic_reference_agree_classifier.py +variation/classifiers/genomic_substitution_classifier.py +variation/classifiers/gnomad_vcf_classifier.py +variation/classifiers/hgvs_classifier.py +variation/classifiers/protein_deletion_classifier.py +variation/classifiers/protein_delins_classifier.py +variation/classifiers/protein_insertion_classifier.py +variation/classifiers/protein_reference_agree.py +variation/classifiers/protein_stop_gain_classifier.py +variation/classifiers/protein_substitution_classifier.py +variation/schemas/__init__.py +variation/schemas/app_schemas.py +variation/schemas/classification_response_schema.py +variation/schemas/copy_number_schema.py +variation/schemas/hgvs_to_copy_number_schema.py +variation/schemas/normalize_response_schema.py +variation/schemas/service_schema.py +variation/schemas/to_vrs_response_schema.py +variation/schemas/token_response_schema.py +variation/schemas/translation_response_schema.py +variation/schemas/validation_response_schema.py +variation/schemas/variation_schema.py +variation/schemas/vrs_python_translator_schema.py +variation/tokenizers/__init__.py +variation/tokenizers/cdna_and_genomic_reference_agree.py +variation/tokenizers/cdna_deletion.py +variation/tokenizers/cdna_delins.py +variation/tokenizers/cdna_insertion.py +variation/tokenizers/cdna_substitution.py +variation/tokenizers/free_text_categorical.py +variation/tokenizers/gene_symbol.py +variation/tokenizers/genomic_deletion.py +variation/tokenizers/genomic_delins.py +variation/tokenizers/genomic_duplication.py +variation/tokenizers/genomic_insertion.py +variation/tokenizers/genomic_substitution.py +variation/tokenizers/gnomad_vcf.py +variation/tokenizers/hgvs.py +variation/tokenizers/protein_deletion.py +variation/tokenizers/protein_delins.py +variation/tokenizers/protein_insertion.py +variation/tokenizers/protein_reference_agree.py +variation/tokenizers/protein_substitution.py +variation/tokenizers/tokenizer.py +variation/translators/__init__.py +variation/translators/ambiguous_translator_base.py +variation/translators/amplification.py +variation/translators/cdna_deletion.py +variation/translators/cdna_delins.py +variation/translators/cdna_insertion.py +variation/translators/cdna_reference_agree.py +variation/translators/cdna_substitution.py +variation/translators/genomic_del_dup_base.py +variation/translators/genomic_deletion.py +variation/translators/genomic_deletion_ambiguous.py +variation/translators/genomic_delins.py +variation/translators/genomic_duplication.py +variation/translators/genomic_duplication_ambiguous.py +variation/translators/genomic_insertion.py +variation/translators/genomic_reference_agree.py +variation/translators/genomic_substitution.py +variation/translators/protein_deletion.py +variation/translators/protein_delins.py +variation/translators/protein_insertion.py +variation/translators/protein_reference_agree.py +variation/translators/protein_stop_gain.py +variation/translators/protein_substitution.py +variation/translators/translator.py +variation/validators/__init__.py +variation/validators/amplification.py +variation/validators/cdna_deletion.py +variation/validators/cdna_delins.py +variation/validators/cdna_insertion.py +variation/validators/cdna_reference_agree.py +variation/validators/cdna_substitution.py +variation/validators/genomic_base.py +variation/validators/genomic_deletion.py +variation/validators/genomic_deletion_ambiguous.py +variation/validators/genomic_delins.py +variation/validators/genomic_duplication.py +variation/validators/genomic_duplication_ambiguous.py +variation/validators/genomic_insertion.py +variation/validators/genomic_reference_agree.py +variation/validators/genomic_substitution.py +variation/validators/protein_deletion.py +variation/validators/protein_delins.py +variation/validators/protein_insertion.py +variation/validators/protein_reference_agree.py +variation/validators/protein_stop_gain.py +variation/validators/protein_substitution.py +variation/validators/validator.py +variation_normalizer.egg-info/PKG-INFO +variation_normalizer.egg-info/SOURCES.txt +variation_normalizer.egg-info/dependency_links.txt +variation_normalizer.egg-info/not-zip-safe +variation_normalizer.egg-info/requires.txt +variation_normalizer.egg-info/top_level.txt \ No newline at end of file diff --git a/variation_normalizer.egg-info/dependency_links.txt b/variation_normalizer.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/variation_normalizer.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/variation_normalizer.egg-info/not-zip-safe b/variation_normalizer.egg-info/not-zip-safe new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/variation_normalizer.egg-info/not-zip-safe @@ -0,0 +1 @@ + diff --git a/variation_normalizer.egg-info/requires.txt b/variation_normalizer.egg-info/requires.txt new file mode 100644 index 0000000..32b9eeb --- /dev/null +++ b/variation_normalizer.egg-info/requires.txt @@ -0,0 +1,20 @@ +biocommons.seqrepo +fastapi +uvicorn +pydantic==2.* +ga4gh.vrs[extras]~=2.0.0a2 +gene-normalizer~=0.3.0.dev1 +boto3 +cool-seq-tool~=0.4.0.dev1 +bioutils + +[dev] +pytest +pytest-asyncio +pytest-cov +ruff +pre-commit +jupyter +ipykernel +psycopg2-binary +black diff --git a/variation_normalizer.egg-info/top_level.txt b/variation_normalizer.egg-info/top_level.txt new file mode 100644 index 0000000..b9fbb77 --- /dev/null +++ b/variation_normalizer.egg-info/top_level.txt @@ -0,0 +1,2 @@ +tests +variation