Skip to content

Commit

Permalink
Merge pull request #46 from cancervariants/issue-13
Browse files Browse the repository at this point in the history
Issue 13
  • Loading branch information
jsstevenson authored Apr 13, 2021
2 parents 2b1e572 + e5856ce commit b5a11a3
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 4 deletions.
2 changes: 1 addition & 1 deletion disease/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
logger.setLevel(logging.DEBUG)


__version__ = "0.2.6" # remember to update setup.cfg as well!
__version__ = "0.2.7" # remember to update setup.cfg as well!


class DownloadException(Exception):
Expand Down
2 changes: 1 addition & 1 deletion disease/etl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def _download_data(self):
def _extract_data(self):
"""Get source file from data directory."""
self._data_path.mkdir(exist_ok=True, parents=True)
src_name = type(self).__name__.lower()
src_name = f'{type(self).__name__.lower()}_'
dir_files = [f for f in self._data_path.iterdir()
if f.name.startswith(src_name)]
if len(dir_files) == 0:
Expand Down
18 changes: 17 additions & 1 deletion disease/etl/do.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Disease Ontology ETL module."""
import logging
from .base import OWLBase
import requests
from pathlib import Path
from disease import PROJECT_ROOT, PREFIX_LOOKUP
from disease.schemas import SourceMeta, SourceName, NamespacePrefix
Expand Down Expand Up @@ -55,11 +56,26 @@ def perform_etl(self) -> List[str]:
:return: empty list (because DO IDs shouldn't be used to construct
merged concept groups)
"""
self._extract_data()
self._load_meta()
self._transform_data()
self.database.flush_batch()
return []

def _download_data(self):
"""Download DO source file for loading into normalizer."""
logger.info('Downloading DO data...')
try:
response = requests.get(self._SRC_URL, stream=True)
except requests.exceptions.RequestException as e:
logger.error(f'DO download failed: {e}')
raise e
handle = open(self._data_path / f'do_{self._version}.owl', "wb")
for chunk in response.iter_content(chunk_size=512):
if chunk:
handle.write(chunk)
logger.info('Finished downloading Human Disease Ontology')

def _load_meta(self):
"""Load metadata"""
metadata_params = {
Expand All @@ -80,7 +96,7 @@ def _load_meta(self):

def _transform_data(self):
"""Transform source data and send to loading method."""
do = owl.get_ontology(self._SRC_URL).load()
do = owl.get_ontology(self._data_file.absolute().as_uri()).load()
disease_uri = 'http://purl.obolibrary.org/obo/DOID_4'
diseases = self._get_subclasses(disease_uri)
for uri in diseases:
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = disease-normalizer
version = 0.2.6
version = 0.2.7
author = VICC
author_email = help@cancervariants.org
description = VICC normalization routine for diseases
Expand Down

0 comments on commit b5a11a3

Please sign in to comment.