Skip to content

Commit

Permalink
feat: configure logging in CLI (#342)
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson authored Jun 4, 2024
1 parent e558a0e commit 5e25500
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 35 deletions.
22 changes: 21 additions & 1 deletion src/gene/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Provides a CLI util to make updates to normalizer database."""
import logging
import os
from pathlib import Path
from typing import Optional, Tuple
Expand All @@ -10,9 +11,20 @@
from gene.etl.update import update_all_sources, update_normalized, update_source
from gene.schemas import SourceName

_logger = logging.getLogger(__name__)

url_description = 'URL endpoint for the application database. Can either be a URL to a local DynamoDB server (e.g. "http://localhost:8001") or a libpq-compliant PostgreSQL connection description (e.g. "postgresql://postgres:password@localhost:5432/gene_normalizer").'


def _configure_logging() -> None:
"""Configure logging."""
logging.basicConfig(
filename=f"{__package__}.log",
format="[%(asctime)s] - %(name)s - %(levelname)s : %(message)s",
)
logging.getLogger(__package__).setLevel(logging.DEBUG)


@click.group()
def cli() -> None:
"""Manage Gene Normalizer data."""
Expand Down Expand Up @@ -71,6 +83,7 @@ def update(
:param use_existing: if True, use most recent local data instead of fetching latest version
:param silent: if True, suppress console output
""" # noqa: D301
_configure_logging()
if (not sources) and (not all_) and (not normalize):
click.echo(
"Error: must provide SOURCES or at least one of --all, --normalize\n"
Expand Down Expand Up @@ -125,6 +138,7 @@ def update_from_remote(data_url: Optional[str], db_url: str, silent: bool) -> No
:param db_url: URL to normalizer database
:param silent: if True, suppress console output
""" # noqa: D301
_configure_logging()
if not click.confirm("Are you sure you want to overwrite existing data?"):
click.get_current_context().exit()
if not data_url:
Expand All @@ -140,6 +154,7 @@ def update_from_remote(data_url: Optional[str], db_url: str, silent: bool) -> No
except DatabaseError as e:
click.echo(f"Encountered exception during update: {e!s}")
click.get_current_context().exit(1)
_logger.info("Successfully loaded data from remote snapshot.")


@cli.command()
Expand Down Expand Up @@ -175,6 +190,7 @@ def check_db(db_url: str, verbose: bool, silent: bool) -> None:
:param verbose: if true, print result to console
:param silent: if True, suppress console output
""" # noqa: D301
_configure_logging()
db = create_db(db_url, False, silent)
if not db.check_schema_initialized():
if verbose:
Expand All @@ -186,8 +202,10 @@ def check_db(db_url: str, verbose: bool, silent: bool) -> None:
click.echo("Health check failed: DB is incompletely populated.")
click.get_current_context().exit(1)

msg = "DB health check successful: tables appear complete."
if verbose:
click.echo("DB health check successful: tables appear complete.")
click.echo(msg)
_logger.info(msg)


@cli.command()
Expand All @@ -209,6 +227,7 @@ def dump_database(output_directory: Path, db_url: str, silent: bool) -> None:
:param db_url: URL to normalizer database
:param silent: if True, suppress console output
""" # noqa: D301
_configure_logging()
if not output_directory:
output_directory = Path()

Expand All @@ -223,6 +242,7 @@ def dump_database(output_directory: Path, db_url: str, silent: bool) -> None:
except DatabaseError as e:
click.echo(f"Encountered exception during update: {e!s}")
click.get_current_context().exit(1)
_logger.info("Database dump successful.")


if __name__ == "__main__":
Expand Down
3 changes: 2 additions & 1 deletion src/gene/etl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def perform_etl(self, use_existing: bool = False) -> List[str]:
uploaded.
"""
self._extract_data(use_existing)
_logger.info("Transforming and loading %s data to DB...", self._src_name)
_logger.info("Transforming and loading %s data to DB...", self._src_name.value)
if not self._silent:
click.echo("Transforming and loading data to DB...")
self._add_meta()
Expand All @@ -106,6 +106,7 @@ def _extract_data(self, use_existing: bool) -> None:
self._data_file, self._version = self._data_source.get_latest(
from_local=use_existing
)
_logger.info("Acquired data for %s: %s", self._src_name.value, self._data_file)

@abstractmethod
def _transform_data(self) -> None:
Expand Down
2 changes: 1 addition & 1 deletion src/gene/etl/ncbi.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def _extract_data(self, use_existing: bool) -> None:
:param use_existing: if True, use latest available local file
"""
_logger.info("Gathering {self._src_name} data...", self._src_name)
_logger.info("Gathering %s data...", self._src_name.value)
self._gff_src, self._assembly = self._genome_data_handler.get_latest(
from_local=use_existing
)
Expand Down
60 changes: 28 additions & 32 deletions src/gene/etl/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,17 @@
_logger = logging.getLogger(__name__)


def _emit_info_msg(msg: str, silent: bool) -> None:
"""Handle info message display.
:param msg: message to log/print
:param silent: if True, don't print to console
"""
if not silent:
click.echo(msg)
_logger.info(msg)


def delete_source(
source: SourceName, db: AbstractDatabase, silent: bool = True
) -> float:
Expand All @@ -25,18 +36,12 @@ def delete_source(
:param silent: if True, suppress console output
:return: time spent deleting source
"""
msg = f"Deleting {source.value}..."
if not silent:
click.echo(f"\n{msg}")
_logger.info(msg)
_emit_info_msg(f"Deleting {source.value}...", silent)
start_delete = timer()
db.delete_source(source)
end_delete = timer()
delete_time = end_delete - start_delete
msg = f"Deleted {source.value} in {delete_time:.5f} seconds."
if not silent:
click.echo(f"{msg}")
_logger.info(msg)
_emit_info_msg(f"Deleted {source.value} in {delete_time:.5f} seconds.", silent)
return delete_time


Expand All @@ -54,10 +59,7 @@ def load_source(
:param silent: if True, suppress console output
:return: time spent loading data, and set of processed IDs from that source
"""
msg = f"Loading {source.value}..."
if not silent:
click.echo(msg)
_logger.info(msg)
_emit_info_msg(f"Loading {source.value}...", silent)
start_load = timer()

# used to get source class name from string
Expand All @@ -83,10 +85,10 @@ def load_source(
click.get_current_context().exit()
end_load = timer()
load_time = end_load - start_load
msg = f"Loaded {len(processed_ids)} records from {source.value} in {load_time:.5f} seconds."
if not silent:
click.echo(msg)
_logger.info(msg)
_emit_info_msg(
f"Loaded {len(processed_ids)} records from {source.value} in {load_time:.5f} seconds.",
silent,
)

return (load_time, set(processed_ids))

Expand All @@ -112,10 +114,10 @@ def update_source(
"""
delete_time = delete_source(source, db, silent)
load_time, processed_ids = load_source(source, db, use_existing, silent)
msg = f"Total time for {source.value}: {(delete_time + load_time):.5f} seconds."
if not silent:
click.echo(msg)
_logger.info(msg)
_emit_info_msg(
f"Total time for {source.value}: {(delete_time + load_time):.5f} seconds.",
silent,
)
return processed_ids


Expand All @@ -142,10 +144,7 @@ def delete_normalized(database: AbstractDatabase, silent: bool = True) -> None:
:param database: DB instance
:param silent: if True, suppress console output
"""
msg = "\nDeleting normalized records..."
_logger.info(msg)
if not silent:
click.echo(msg)
_emit_info_msg("\nDeleting normalized records...", silent)
start_delete = timer()
try:
database.delete_normalized_concepts()
Expand All @@ -154,10 +153,7 @@ def delete_normalized(database: AbstractDatabase, silent: bool = True) -> None:
raise e
end_delete = timer()
delete_time = end_delete - start_delete
msg = f"Deleted normalized records in {delete_time:.5f} seconds."
if not silent:
click.echo(msg)
_logger.info(msg)
_emit_info_msg(f"Deleted normalized records in {delete_time:.5f} seconds.", silent)


def update_normalized(
Expand Down Expand Up @@ -190,10 +186,10 @@ def update_normalized(
click.echo("Constructing normalized records...")
merge.create_merged_concepts(processed_ids)
end = timer()
msg = f"Merged concept generation completed in " f"{(end - start):.5f} seconds"
if not silent:
click.echo(msg)
_logger.info(msg)
_emit_info_msg(
f"Merged concept generation completed in " f"{(end - start):.5f} seconds",
silent,
)


def update_all_and_normalize(
Expand Down

0 comments on commit 5e25500

Please sign in to comment.