Skip to content

Commit

Permalink
Merge pull request #142 from cancervariants/issue-141
Browse files Browse the repository at this point in the history
build: separate out etl (dev) dependencies on main
  • Loading branch information
korikuzma authored Oct 18, 2022
2 parents 20dc67d + f6718da commit 89a5149
Show file tree
Hide file tree
Showing 15 changed files with 214 additions and 263 deletions.
11 changes: 5 additions & 6 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,14 @@ fastapi = "*"
uvicorn = "*"
click = "*"
boto3 = "*"
beautifulsoup4 = "*"
gffutils = "*"
requests = "*"
"biocommons.seqrepo" = "*"
"ga4gh.vrs" = {version = ">=0.7.5.dev1", extras = ["extras"]}
"ga4gh.vrsatile.pydantic" = ">=0.0.10"
"ga4gh.vrsatile.pydantic" = "==0.0.11"

[dev-packages]
gene = {editable = true, path = "."}
gffutils = "*"
"biocommons.seqrepo" = "*"
"ga4gh.vrs" = {version = "==0.8.0dev0", extras = ["extras"]}
psycopg2-binary = "*"
pytest = "*"
pre-commit = "*"
flake8 = "*"
Expand Down
25 changes: 23 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
# Gene Normalization
Services and guidelines for normalizing gene terms

Installing with pip:

```commandline
pip install gene[dev]
```

The `[dev]` argument tells pip to install packages to fulfill the dependencies of the `gene.etl` package.

## Developer instructions
Following are sections include instructions specifically for developers.

Expand All @@ -14,16 +22,29 @@ Once installed, from the project root dir, just run:
```commandline
pipenv shell
pipenv lock && pipenv sync
pipenv install --dev
```

Gene Normalization relies on [SeqRepo](https://github.com/biocommons/biocommons.seqrepo) data.
Gene Normalization relies on [SeqRepo](https://github.com/biocommons/biocommons.seqrepo) data, which you must download yourself.

From the _root_ directory:
```
pip install seqrepo
sudo mkdir /usr/local/share/seqrepo
sudo chown $USER /usr/local/share/seqrepo
seqrepo pull -i 2021-01-29
seqrepo pull -i 2021-01-29 # Replace with latest version using `seqrepo list-remote-instances` if outdated
```

If you get an error similar to the one below:
```
PermissionError: [Error 13] Permission denied: '/usr/local/share/seqrepo/2021-01-29._fkuefgd' -> '/usr/local/share/seqrepo/2021-01-29'
```

You will want to do the following:\
(*Might not be ._fkuefgd, so replace with your error message path*)
```console
sudo mv /usr/local/share/seqrepo/2021-01-29._fkuefgd /usr/local/share/seqrepo/2021-01-29
exit
```

### Deploying DynamoDB Locally
Expand Down
2 changes: 1 addition & 1 deletion gene/etl/ensembl.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from gene import APP_ROOT
from gene.schemas import SourceName, NamespacePrefix, Strand, SourceMeta
from gene.database import Database
from gene.vrs_locations import SequenceLocation
from gene.etl.vrs_locations import SequenceLocation


logger = logging.getLogger("gene")
Expand Down
13 changes: 7 additions & 6 deletions gene/etl/hgnc.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
"""This module defines the HGNC ETL methods."""
from .base import Base
from gene import APP_ROOT, PREFIX_LOOKUP
from gene.schemas import SourceName, SymbolStatus, NamespacePrefix, \
SourceMeta, Annotation, Chromosome
from gene.database import Database
import logging
import json
import shutil
import re
from gene.vrs_locations import ChromosomeLocation

from gene import APP_ROOT, PREFIX_LOOKUP
from gene.database import Database
from gene.schemas import SourceName, SymbolStatus, NamespacePrefix, \
SourceMeta, Annotation, Chromosome
from gene.etl.base import Base
from gene.etl.vrs_locations import ChromosomeLocation

logger = logging.getLogger('gene')
logger.setLevel(logging.DEBUG)
Expand Down
16 changes: 9 additions & 7 deletions gene/etl/ncbi.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
"""This module defines ETL methods for the NCBI data source."""
from .base import Base
from gene import APP_ROOT, PREFIX_LOOKUP
from gene.database import Database
from gene.schemas import SourceMeta, SourceName, NamespacePrefix, Annotation, \
Chromosome, SymbolStatus
from ftplib import FTP
import logging
from pathlib import Path
import csv
from datetime import datetime
import re

import gffutils
from gene.vrs_locations import SequenceLocation, ChromosomeLocation
from ftplib import FTP

from gene import APP_ROOT, PREFIX_LOOKUP
from gene.database import Database
from gene.schemas import SourceMeta, SourceName, NamespacePrefix, Annotation, \
Chromosome, SymbolStatus
from gene.etl.base import Base
from gene.etl.vrs_locations import SequenceLocation, ChromosomeLocation


logger = logging.getLogger('gene')
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""This module defines GA4GH Sequence Location."""
from typing import List
import logging

from ga4gh.vrs import models
from ga4gh.core import ga4gh_identify
import logging

logger = logging.getLogger('gene')
logger.setLevel(logging.DEBUG)
Expand All @@ -18,7 +19,12 @@ def get_aliases(self, sr, seqid) -> List[str]:
:param str seqid: Sequence ID accession
:return: List of aliases for seqid
"""
return sr.translate_alias(seqid)
aliases = []
try:
aliases = sr.translate_alias(seqid)
except KeyError as e:
logger.warning(f"SeqRepo raised KeyError: {e}")
return aliases

def add_location(self, seqid, gene, params, sr):
"""Get a gene's Sequence Location.
Expand All @@ -31,6 +37,9 @@ def add_location(self, seqid, gene, params, sr):
"""
location = dict()
aliases = self.get_aliases(sr, seqid)
if not aliases:
return location

sequence_id = [a for a in aliases if a.startswith('ga4gh')][0]

if gene.start != '.' and gene.end != '.' and sequence_id:
Expand Down
2 changes: 1 addition & 1 deletion gene/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""Gene normalizer version"""
__version__ = "0.1.27"
__version__ = "0.1.28"
Loading

0 comments on commit 89a5149

Please sign in to comment.