Skip to content

Commit

Permalink
feat: allow chainfile argument to be passed to Converter (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
korikuzma authored Feb 2, 2024
1 parent af8197a commit 7e52275
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 22 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "agct"
version = "0.1.0-dev0"
version = "0.1.0-dev1"
authors = [
{name = "James Stevenson"}
]
Expand Down
56 changes: 35 additions & 21 deletions src/agct/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
from enum import Enum
from pathlib import Path
from typing import Callable, List, Tuple
from typing import Callable, List, Optional, Tuple

from wags_tails import CustomData
from wags_tails.utils.downloads import download_http, handle_gzip
Expand Down Expand Up @@ -36,35 +36,49 @@ class Converter:
association.
"""

def __init__(self, from_db: Genome, to_db: Genome) -> None:
def __init__(
self,
from_db: Optional[Genome] = None,
to_db: Optional[Genome] = None,
chainfile: Optional[str] = None,
) -> None:
"""Initialize liftover instance.
:param from_db: database name, e.g. ``"19"``
:param to_db: database name, e.g. ``"38"``
:param from_db: database name, e.g. ``"19"``. Must be different than ``to_db``
If ``chainfile`` is provided, will ignore this argument
:param to_db: database name, e.g. ``"38"``. Must be different than ``from_db``
If ``chainfile`` is provided, will ignore this argument
:param chainfile: Path to chainfile
If not provided, must provide both ``from_db`` and ``to_db`` so that
``wags-tails`` can download the corresponding chainfile
:raise ValueError: if required arguments are not passed or are invalid
:raise FileNotFoundError: if unable to open corresponding chainfile
:raise _core.ChainfileError: if unable to read chainfile (i.e. it's invalid)
"""
if from_db == to_db:
raise ValueError("Liftover must be to/from different sources.")
if not isinstance(from_db, Genome):
from_db = Genome(from_db)
if not isinstance(to_db, Genome):
to_db = Genome(to_db)
data_handler = CustomData(
f"chainfile_{from_db.value}_to_{to_db.value}",
"chain",
lambda: "",
self._download_function_builder(from_db, to_db),
data_dir=get_data_dir() / "ucsc-chainfile",
)
file, _ = data_handler.get_latest()
if not chainfile:
if from_db is None and to_db is None:
raise ValueError("Must provide both `from_db` and `to_db`")

if from_db == to_db:
raise ValueError("Liftover must be to/from different sources.")

data_handler = CustomData(
f"chainfile_{from_db.value}_to_{to_db.value}",
"chain",
lambda: "",
self._download_function_builder(from_db, to_db),
data_dir=get_data_dir() / "ucsc-chainfile",
)
file, _ = data_handler.get_latest()
chainfile = str(file.absolute())

try:
self._converter = _core.Converter(str(file.absolute()))
self._converter = _core.Converter(chainfile)
except FileNotFoundError as e:
_logger.error("Unable to open chainfile located at %s", file.absolute())
_logger.error("Unable to open chainfile located at %s", chainfile)
raise e
except _core.ChainfileError as e:
_logger.error("Error reading chainfile located at %s", file.absolute())
_logger.error("Error reading chainfile located at %s", chainfile)
raise e

@staticmethod
Expand Down
21 changes: 21 additions & 0 deletions tests/test_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Module for testing Converter initialization"""
import pytest
from tests.conftest import DATA_DIR

from agct import Converter, Genome


def test_valid():
"""Test valid initialization"""
assert Converter(
chainfile=str(DATA_DIR / "ucsc-chainfile" / "chainfile_hg19_to_hg38_.chain")
)


def test_invalid():
"""Test invalid initialization"""
with pytest.raises(ValueError, match="Must provide both `from_db` and `to_db`"):
Converter()

with pytest.raises(ValueError, match="Liftover must be to/from different sources."):
Converter(Genome.HG19, Genome.HG19)

0 comments on commit 7e52275

Please sign in to comment.