Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: use enum to constrain genome values #4

Merged
merged 3 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Initialize a class instance:

```python3
from chainlifter.lifter import ChainLifter
ch = ChainLifter("38", "19")
ch = ChainLifter("hg38", "hg19")
```

Call ``convert_coordinate``:
Expand Down
29 changes: 23 additions & 6 deletions src/chainlifter/lifter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Perform chainfile-driven liftover."""
from enum import Enum
from enum import StrEnum
from pathlib import Path
from typing import Callable

Expand All @@ -10,26 +10,43 @@
import chainlifter._core as _core


class Strand(str, Enum):
class Strand(StrEnum):
"""Constrain strand values."""

POSITIVE = "+"
NEGATIVE = "-"


class Genome(StrEnum):
"""Constrain genome values.

We could conceivably support every UCSC chainfile offering, but for now, we'll
stick with internal use cases only.
"""

HG38 = "hg38"
HG19 = "hg19"


class ChainLifter:
"""Chainfile-based liftover provider for a single sequence to sequence
association.
"""

def __init__(self, from_db: str, to_db: str) -> None:
def __init__(self, from_db: Genome, to_db: Genome) -> None:
"""Initialize liftover instance.

:param from_db: database name, e.g. ``"19"``
:param to_db: database name, e.g. ``"38"``
"""
if from_db == to_db:
raise ValueError("Liftover must be to/from different sources.")
if not isinstance(from_db, Genome):
from_db = Genome(from_db)
if not isinstance(to_db, Genome):
to_db = Genome(to_db)
data_handler = CustomData(
f"chainfile_{from_db}_to_{to_db}",
f"chainfile_{from_db.value}_to_{to_db.value}",
"chain",
lambda: "",
self._download_function_builder(from_db, to_db),
Expand All @@ -39,7 +56,7 @@ def __init__(self, from_db: str, to_db: str) -> None:
self._chainlifter = _core.ChainLifter(str(file.absolute()))

@staticmethod
def _download_function_builder(from_db: str, to_db: str) -> Callable:
def _download_function_builder(from_db: Genome, to_db: Genome) -> Callable:
"""Build downloader function for chainfile corresponding to source/destination
params.

Expand All @@ -57,7 +74,7 @@ def _download_data(version: str, file: Path) -> None:
:param version: not used
:param file: path to save file to
"""
url = f"https://hgdownload.soe.ucsc.edu/goldenPath/{from_db}/liftOver/{from_db}To{to_db.title()}.over.chain.gz"
url = f"https://hgdownload.soe.ucsc.edu/goldenPath/{from_db.value}/liftOver/{from_db.value}To{to_db.value.title()}.over.chain.gz"
download_http(url, file, handler=handle_gzip)

return _download_data
Expand Down