Skip to content

Commit

Permalink
Merge branch 'main' into templates-workflows
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Aug 7, 2024
2 parents 3d1f822 + 4658513 commit 2247c0d
Show file tree
Hide file tree
Showing 45 changed files with 119 additions and 70 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ repos:
- id: trailing-whitespace
- id: end-of-file-fixer
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.2.0
rev: v0.5.0 # ruff version
hooks:
- id: ruff-format
- id: ruff
Expand Down
35 changes: 25 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ dependencies = [

[project.optional-dependencies]
test = ["pytest>=6.0", "pytest-cov", "requests-mock"]
dev = ["pre-commit", "ruff==0.2.0"]
dev = ["pre-commit", "ruff==0.5.0"]
docs = [
"sphinx==6.1.3",
"sphinx-autodoc-typehints==1.22.0",
Expand Down Expand Up @@ -84,16 +84,22 @@ select = [
"DTZ", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz
"T10", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz
"EM", # https://docs.astral.sh/ruff/rules/#flake8-errmsg-em
"LOG", # https://docs.astral.sh/ruff/rules/#flake8-logging-log
"G", # https://docs.astral.sh/ruff/rules/#flake8-logging-format-g
"INP", # https://docs.astral.sh/ruff/rules/#flake8-no-pep420-inp
"PIE", # https://docs.astral.sh/ruff/rules/#flake8-pie-pie
"T20", # https://docs.astral.sh/ruff/rules/#flake8-print-t20
"PT", # https://docs.astral.sh/ruff/rules/#flake8-pytest-style-pt
"Q", # https://docs.astral.sh/ruff/rules/#flake8-quotes-q
"RSE", # https://docs.astral.sh/ruff/rules/#flake8-raise-rse
"RET", # https://docs.astral.sh/ruff/rules/#flake8-return-ret
"SLF", # https://docs.astral.sh/ruff/rules/#flake8-self-slf
"SIM", # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim
"ARG", # https://docs.astral.sh/ruff/rules/#flake8-unused-arguments-arg
"PTH", # https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth
"PGH", # https://docs.astral.sh/ruff/rules/#pygrep-hooks-pgh
"PERF", # https://docs.astral.sh/ruff/rules/#perflint-perf
"FURB", # https://docs.astral.sh/ruff/rules/#refurb-furb
"RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf
]
fixable = [
Expand All @@ -104,16 +110,20 @@ fixable = [
"ANN",
"B",
"C4",
"LOG",
"G",
"PIE",
"PT",
"RSE",
"SIM",
"PERF",
"FURB",
"RUF"
]

# ANN101 - missing-type-self
# ANN003 - missing-type-kwargs
# ANN101 - missing-type-self
# ANN102 - missing-type-cls
# D203 - one-blank-line-before-class
# D205 - blank-line-after-summary
# D206 - indent-with-spaces*
Expand All @@ -129,7 +139,7 @@ fixable = [
# S321 - suspicious-ftp-lib-usage
# *ignored for compatibility with formatter
ignore = [
"ANN101", "ANN003",
"ANN003", "ANN101", "ANN102",
"D203", "D205", "D206", "D213", "D300", "D400", "D415",
"E111", "E114", "E117", "E501",
"W191",
Expand All @@ -139,15 +149,20 @@ ignore = [
[tool.ruff.lint.per-file-ignores]
# ANN001 - missing-type-function-argument
# ANN2 - missing-return-type
# ANN201 - Missing type annotation
# ANN102 - missing-type-cls
# N805 - invalid-first-argument-name-for-method
# PT018 - pytest-composite-assertion
# S101 - assert
"tests/*" = ["ANN001", "ANN102", "ANN2", "S101"]
# B011 - assert-false
# INP001 - implicit-namespace-package
# PT018 - pytest-composite-assertion
# ARG001 - unused-function-argument
# ARG005 - unused-lambda-argument
# T201 - print
"tests/*" = ["ANN001", "ANN2", "S101", "B011", "INP001"]
"tests/test_hemonc.py" = ["PT018"]
"tests/test_gtop.py" = ["PT018"]
"tests/test_do.py" = ["PT018"]
"*__init__.py" = ["F401"]
"src/wags_tails/base_source.py" = ["ANN102"]
"tests/test_custom.py" = ["ARG001", "ARG005"]
"src/wags_tails/utils/downloads.py" = ["T201"]


[tool.ruff.format]
docstring-code-format = true
1 change: 1 addition & 0 deletions src/wags_tails/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Data acquisition tools for Wagnerds."""

from .base_source import DataSource, RemoteDataError
from .chembl import ChemblData
from .chemidplus import ChemIDplusData
Expand Down
7 changes: 4 additions & 3 deletions src/wags_tails/base_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
All source classes should inherit - directly or indirectly - from ``DataSource``. Each
class defined here is an ``abstract base class`` and cannot be instantiated directly.
"""

import abc
import datetime
import logging
from collections.abc import Generator
from pathlib import Path
from typing import Generator, Optional, Tuple

import requests

Expand All @@ -33,7 +34,7 @@ class DataSource(abc.ABC):
_filetype: str
_versioned: bool = True

def __init__(self, data_dir: Optional[Path] = None, silent: bool = True) -> None:
def __init__(self, data_dir: Path | None = None, silent: bool = True) -> None:
"""Set common class parameters.
:param data_dir: direct location to store data files in, if specified. See
Expand Down Expand Up @@ -71,7 +72,7 @@ def _download_data(self, version: str, outfile: Path) -> None:

def get_latest(
self, from_local: bool = False, force_refresh: bool = False
) -> Tuple[Path, str]:
) -> tuple[Path, str]:
"""Get path to latest version of data.
Provides logic for both versioned and unversioned data here, rather than in the
Expand Down
1 change: 1 addition & 0 deletions src/wags_tails/chembl.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Provide source fetching for ChEMBL."""

import fnmatch
import re
import tarfile
Expand Down
3 changes: 2 additions & 1 deletion src/wags_tails/chemidplus.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Provide source fetching for ChemIDplus."""

import datetime
import re
from pathlib import Path
Expand Down Expand Up @@ -38,7 +39,7 @@ def _get_latest_version() -> str:
.strftime(DATE_VERSION_PATTERN)
)

def _download_data(self, version: str, outfile: Path) -> None:
def _download_data(self, version: str, outfile: Path) -> None: # noqa: ARG002
"""Download data file to specified location. ChemIDplus data is no longer
updated, so versioning is irrelevant.
Expand Down
7 changes: 4 additions & 3 deletions src/wags_tails/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
The :ref:`documentation <custom_data_source>` provides more explanation and an in-depth
example.
"""

from collections.abc import Callable
from pathlib import Path
from typing import Callable, Optional

from .base_source import DataSource

Expand All @@ -24,8 +25,8 @@ def __init__(
filetype: str,
latest_version_cb: Callable[[], str],
download_cb: Callable[[str, Path], None],
data_dir: Optional[Path] = None,
file_name: Optional[str] = None,
data_dir: Path | None = None,
file_name: str | None = None,
versioned: bool = True,
silent: bool = False,
) -> None:
Expand Down
1 change: 1 addition & 0 deletions src/wags_tails/do.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Provide source fetching for Human Disease Ontology."""

import datetime
import tarfile
from pathlib import Path
Expand Down
8 changes: 4 additions & 4 deletions src/wags_tails/drugbank.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Provide source fetching for DrugBank."""

import logging
from pathlib import Path
from typing import Tuple

import requests

Expand All @@ -19,7 +19,7 @@ class DrugBankData(DataSource):
_filetype = "csv"

@staticmethod
def _get_latest_version() -> Tuple[str, str]:
def _get_latest_version() -> tuple[str, str]:
"""Retrieve latest version value
:return: latest release value and base download URL
Expand All @@ -35,7 +35,7 @@ def _get_latest_version() -> Tuple[str, str]:
msg = "Unable to parse latest DrugBank version number from releases API endpoint"
raise RemoteDataError(msg) from e

def _get_latest_local_file(self, glob: str) -> Tuple[Path, str]:
def _get_latest_local_file(self, glob: str) -> tuple[Path, str]:
"""Get most recent locally-available file. DrugBank uses versioning that isn't
easily sortable by default so we have to use some extra magic.
Expand Down Expand Up @@ -72,7 +72,7 @@ def _download_data(self, url: str, outfile: Path) -> None:

def get_latest(
self, from_local: bool = False, force_refresh: bool = False
) -> Tuple[Path, str]:
) -> tuple[Path, str]:
"""Get path to latest version of data, and its version value
:param from_local: if True, use latest available local file
Expand Down
3 changes: 2 additions & 1 deletion src/wags_tails/drugsatfda.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Provide source fetching for Drugs@FDA."""

import datetime
from pathlib import Path

Expand Down Expand Up @@ -38,7 +39,7 @@ def _get_latest_version() -> str:
.strftime(DATE_VERSION_PATTERN)
)

def _download_data(self, version: str, outfile: Path) -> None:
def _download_data(self, version: str, outfile: Path) -> None: # noqa: ARG002
"""Download latest data file to specified location.
:param version: version to acquire
Expand Down
1 change: 1 addition & 0 deletions src/wags_tails/ensembl.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Provide data management for Ensembl genomic data."""

from pathlib import Path

import requests
Expand Down
2 changes: 1 addition & 1 deletion src/wags_tails/ensembl_transcript_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class EnsemblTranscriptMappingData(UnversionedDataSource):
_src_name = "ensembl_transcript_mappings"
_filetype = "tsv"

def _download_data(self, version: str, outfile: Path) -> None:
def _download_data(self, version: str, outfile: Path) -> None: # noqa: ARG002
"""Download data file to specified location.
:param version: version to acquire
Expand Down
5 changes: 3 additions & 2 deletions src/wags_tails/guide_to_pharmacology.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Provide source fetching for Guide To Pharmacology."""

import logging
import re
from pathlib import Path
from typing import NamedTuple, Tuple
from typing import NamedTuple

import requests

Expand Down Expand Up @@ -67,7 +68,7 @@ def _download_data(self, file_paths: GtoPLigandPaths) -> None:

def get_latest(
self, from_local: bool = False, force_refresh: bool = False
) -> Tuple[GtoPLigandPaths, str]:
) -> tuple[GtoPLigandPaths, str]:
"""Get path to latest version of data, and its version value
:param from_local: if True, use latest available local file
Expand Down
11 changes: 6 additions & 5 deletions src/wags_tails/hemonc.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
"""Provide source fetching for HemOnc."""

import logging
import os
import zipfile
from pathlib import Path
from typing import NamedTuple, Tuple
from typing import NamedTuple

import requests

Expand Down Expand Up @@ -69,7 +70,7 @@ def _download_handler(self, dl_path: Path, file_paths: HemOncPaths) -> None:
zip_ref.extract(file, path.parent)
dl_path.unlink()

def _download_data(self, version: str, outfile_paths: HemOncPaths) -> None:
def _download_data(self, version: str, outfile_paths: HemOncPaths) -> None: # noqa: ARG002
"""Download data file to specified location.
:param version: version to acquire
Expand All @@ -84,13 +85,13 @@ def _download_data(self, version: str, outfile_paths: HemOncPaths) -> None:
self.data_dir,
headers={"X-Dataverse-key": api_key},
# provide save_path arg for API consistency, but don't use it
handler=lambda dl_path, save_path: self._download_handler(
handler=lambda dl_path, save_path: self._download_handler( # noqa: ARG005
dl_path, outfile_paths
),
tqdm_params=self._tqdm_params,
)

def _get_local_files(self) -> Tuple[HemOncPaths, str]:
def _get_local_files(self) -> tuple[HemOncPaths, str]:
"""Acquire locally-available data files.
:return: HemOnc file paths and their version
Expand All @@ -108,7 +109,7 @@ def _get_local_files(self) -> Tuple[HemOncPaths, str]:

def get_latest(
self, from_local: bool = False, force_refresh: bool = False
) -> Tuple[HemOncPaths, str]:
) -> tuple[HemOncPaths, str]:
"""Get path to latest version of data, and its version value
:param from_local: if True, use latest available local file
Expand Down
3 changes: 2 additions & 1 deletion src/wags_tails/hgnc.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Provide data fetching for HGNC."""

import ftplib
from pathlib import Path

Expand Down Expand Up @@ -26,7 +27,7 @@ def _get_latest_version(self) -> str:
timestamp = ftp.voidcmd(f"MDTM {self._directory_path}{self._host_filename}")
return timestamp[4:12]

def _download_data(self, version: str, outfile: Path) -> None:
def _download_data(self, version: str, outfile: Path) -> None: # noqa: ARG002
"""Download data file to specified location.
:param version: version to acquire
Expand Down
6 changes: 3 additions & 3 deletions src/wags_tails/mondo.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""Provide source fetching for Mondo Disease Ontology."""

import datetime
import logging
from pathlib import Path
from typing import Tuple

import requests

Expand All @@ -22,7 +22,7 @@ class MondoData(GitHubDataSource):
_repo = "monarch-initiative/mondo"

@staticmethod
def _get_latest_version() -> Tuple[str, str]:
def _get_latest_version() -> tuple[str, str]:
"""Retrieve latest version value, and download URL, from GitHub release data.
:param asset_name: name of file asset, if needed
Expand Down Expand Up @@ -71,7 +71,7 @@ def _download_data(self, version: str, outfile: Path) -> None:

def get_latest(
self, from_local: bool = False, force_refresh: bool = False
) -> Tuple[Path, str]:
) -> tuple[Path, str]:
"""Get path to latest version of data. Overwrite inherited method because
final downloads depend on information gleaned from the version API call.
Expand Down
5 changes: 3 additions & 2 deletions src/wags_tails/ncbi.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
"""Provide data fetching for NCBI gene data sources."""

import ftplib
import logging
import re
from pathlib import Path
from typing import NamedTuple, Tuple
from typing import NamedTuple

from wags_tails.base_source import DataSource, RemoteDataError
from wags_tails.utils.downloads import download_ftp, handle_gzip
Expand Down Expand Up @@ -142,7 +143,7 @@ def _download_data(self, file_paths: NcbiGenePaths) -> None:

def get_latest(
self, from_local: bool = False, force_refresh: bool = False
) -> Tuple[NcbiGenePaths, str]:
) -> tuple[NcbiGenePaths, str]:
"""Get path to latest version of data, and its version value
:param from_local: if True, use latest available local file
Expand Down
3 changes: 2 additions & 1 deletion src/wags_tails/ncbi_lrg_refseqgene.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Fetches NCBI LRG_RefSeqGene data."""

import re
from pathlib import Path

Expand Down Expand Up @@ -36,7 +37,7 @@ def _get_latest_version(self) -> str:
raise RemoteDataError(msg)
return match[0].replace("-", "")

def _download_data(self, version: str, outfile: Path) -> None:
def _download_data(self, version: str, outfile: Path) -> None: # noqa: ARG002
"""Download data file to specified location.
:param version: version to acquire
Expand Down
Loading

0 comments on commit 2247c0d

Please sign in to comment.