diff --git a/README.md b/README.md index f615b71..636ae26 100644 --- a/README.md +++ b/README.md @@ -24,17 +24,17 @@ python3 -m pip install agct Initialize a class instance: ```python3 -from agct import Converter -c = Converter("hg38", "hg19") +from agct import Converter, Genome +c = Converter(Genome.HG38, Genome.HG19) ``` -> If a chainfile is unavailable locally, it's downloaded from UCSC and saved using the `wags-tails` package -- see the [configuration instructions](https://github.com/GenomicMedLab/wags-tails?tab=readme-ov-file#configuration) for information on how to designate a non-default storage location. +> If a chainfile is unavailable locally, it's downloaded from UCSC and saved using the `wags-tails` package -- see the [wags-tails configuration instructions](https://github.com/GenomicMedLab/wags-tails?tab=readme-ov-file#configuration) for information on how to designate a non-default storage location. Call ``convert_coordinate()``: ```python3 c.convert_coordinate("chr7", 140453136, "+") -# [['chr7', 140152936, '+']] +# [['chr7', 140152936, ]] ``` ## Development diff --git a/src/agct/converter.py b/src/agct/converter.py index a7680c4..01e71eb 100644 --- a/src/agct/converter.py +++ b/src/agct/converter.py @@ -38,15 +38,15 @@ class Converter: def __init__( self, - from_db: Genome | None = None, - to_db: Genome | None = None, + from_db: Genome | str | None = None, + to_db: Genome | str | None = None, chainfile: str | None = None, ) -> None: """Initialize liftover instance. - :param from_db: database name, e.g. ``"19"``. Must be different than ``to_db`` + :param from_db: database name, e.g. ``"hg19"``. Must be different than ``to_db`` If ``chainfile`` is provided, will ignore this argument - :param to_db: database name, e.g. ``"38"``. Must be different than ``from_db`` + :param to_db: database name, e.g. ``"hg38"``. Must be different than ``from_db`` If ``chainfile`` is provided, will ignore this argument :param chainfile: Path to chainfile If not provided, must provide both ``from_db`` and ``to_db`` so that @@ -56,7 +56,7 @@ def __init__( :raise _core.ChainfileError: if unable to read chainfile (i.e. it's invalid) """ if not chainfile: - if from_db is None and to_db is None: + if from_db is None or to_db is None: msg = "Must provide both `from_db` and `to_db`" raise ValueError(msg) @@ -64,6 +64,19 @@ def __init__( msg = "Liftover must be to/from different sources." raise ValueError(msg) + if isinstance(from_db, str): + try: + from_db = Genome(from_db) + except ValueError as e: + msg = f"Unable to coerce from_db value '{from_db}' to a known reference genome: {list(Genome)}" + raise ValueError(msg) from e + if isinstance(to_db, str): + try: + to_db = Genome(to_db) + except ValueError as e: + msg = f"Unable to coerce to_db value '{to_db}' to a known reference genome: {list(Genome)}" + raise ValueError(msg) from e + data_handler = CustomData( f"chainfile_{from_db.value}_to_{to_db.value}", "chain", diff --git a/tests/test_converter.py b/tests/test_converter.py index 6db2ce7..57729e1 100644 --- a/tests/test_converter.py +++ b/tests/test_converter.py @@ -1,4 +1,6 @@ """Module for testing Converter initialization""" +import re + import pytest from tests.conftest import DATA_DIR @@ -19,3 +21,11 @@ def test_invalid(): with pytest.raises(ValueError, match="Liftover must be to/from different sources."): Converter(Genome.HG19, Genome.HG19) + + with pytest.raises( + ValueError, + match=re.escape( + "Unable to coerce to_db value 'hg18' to a known reference genome: [, ]" + ), + ): + Converter(Genome.HG19, "hg18")