From 039592bf08f5e3156b51b9067e65725002080978 Mon Sep 17 00:00:00 2001 From: Matthew Evans Date: Sun, 18 Feb 2024 12:07:49 +0000 Subject: [PATCH 1/3] Fix occurences of D411 missing linebreak before section, which breaks mkdocs rendering --- .pre-commit-config.yaml | 2 +- src/maggma/core/builder.py | 1 + src/maggma/stores/gridfs.py | 1 + src/maggma/stores/mongolike.py | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4150b22be..19c16f0e8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,7 +13,7 @@ repos: rev: v0.0.280 hooks: - id: ruff - args: [--fix, --show-fixes, --ignore, D] + args: [--fix, --show-fixes, --ignore, D, --extend-select, D411] - repo: https://github.com/psf/black rev: 23.7.0 diff --git a/src/maggma/core/builder.py b/src/maggma/core/builder.py index e7a8c823b..e37cd9d6d 100644 --- a/src/maggma/core/builder.py +++ b/src/maggma/core/builder.py @@ -83,6 +83,7 @@ def process_item(self, item: Any) -> Any: """ Process an item. There should be no database operations in this method. Default behavior is to return the item. + Arguments: item: diff --git a/src/maggma/stores/gridfs.py b/src/maggma/stores/gridfs.py index db948d46e..23fe10a81 100644 --- a/src/maggma/stores/gridfs.py +++ b/src/maggma/stores/gridfs.py @@ -180,6 +180,7 @@ def last_updated(self) -> datetime: def transform_criteria(cls, criteria: Dict) -> Dict: """ Allow client to not need to prepend 'metadata.' to query fields. + Args: criteria: Query criteria """ diff --git a/src/maggma/stores/mongolike.py b/src/maggma/stores/mongolike.py index 259a9e7f4..c03dc8866 100644 --- a/src/maggma/stores/mongolike.py +++ b/src/maggma/stores/mongolike.py @@ -706,6 +706,7 @@ def read_json_file(self, path) -> List: """ Helper method to read the contents of a JSON file and generate a list of docs. + Args: path: Path to the JSON file to be read """ From b7351652b668c70c4d3387ddc9218e35de16eb4f Mon Sep 17 00:00:00 2001 From: Matthew Evans Date: Sun, 18 Feb 2024 12:09:01 +0000 Subject: [PATCH 2/3] Fix D415 missing punctuation at end of summary line. --- src/maggma/api/API.py | 14 ++--- src/maggma/api/__init__.py | 2 +- src/maggma/api/models.py | 8 +-- src/maggma/api/query_operator/core.py | 8 +-- src/maggma/api/query_operator/dynamic.py | 16 ++--- src/maggma/api/query_operator/pagination.py | 10 +-- src/maggma/api/query_operator/sorting.py | 2 +- .../api/query_operator/sparse_fields.py | 12 ++-- src/maggma/api/query_operator/submission.py | 4 +- src/maggma/api/resource/aggregation.py | 4 +- src/maggma/api/resource/core.py | 20 +++--- src/maggma/api/resource/post_resource.py | 4 +- src/maggma/api/resource/read_resource.py | 4 +- src/maggma/api/resource/s3_url.py | 2 +- src/maggma/api/resource/submission.py | 2 +- src/maggma/api/resource/utils.py | 4 +- src/maggma/api/utils.py | 6 +- src/maggma/builders/group_builder.py | 16 ++--- src/maggma/builders/map_builder.py | 16 ++--- src/maggma/builders/projection_builder.py | 2 +- src/maggma/cli/distributed.py | 2 +- src/maggma/cli/multiprocessing.py | 10 +-- src/maggma/cli/rabbitmq.py | 2 +- src/maggma/cli/serial.py | 2 +- src/maggma/cli/source_loader.py | 10 +-- src/maggma/cli/sources/__init__.py | 2 +- src/maggma/core/__init__.py | 2 +- src/maggma/core/builder.py | 8 +-- src/maggma/core/store.py | 38 ++++++------ src/maggma/core/validator.py | 4 +- src/maggma/stores/__init__.py | 2 +- src/maggma/stores/advanced_stores.py | 50 +++++++-------- src/maggma/stores/aws.py | 2 +- src/maggma/stores/azure.py | 32 +++++----- src/maggma/stores/compound_stores.py | 60 +++++++++--------- src/maggma/stores/file_store.py | 10 +-- src/maggma/stores/gridfs.py | 32 +++++----- src/maggma/stores/mongolike.py | 62 +++++++++---------- src/maggma/stores/open_data.py | 22 +++---- src/maggma/stores/shared_stores.py | 58 ++++++++--------- src/maggma/stores/ssh_tunnel.py | 2 +- src/maggma/utils.py | 38 ++++++------ src/maggma/validators.py | 4 +- 43 files changed, 305 insertions(+), 305 deletions(-) diff --git a/src/maggma/api/API.py b/src/maggma/api/API.py index ff2c82f21..b41f04181 100644 --- a/src/maggma/api/API.py +++ b/src/maggma/api/API.py @@ -13,7 +13,7 @@ class API(MSONable): """ - Basic API manager to tie together various resources + Basic API manager to tie together various resources. """ def __init__( @@ -34,7 +34,7 @@ def __init__( debug: turns debug on in FastAPI heartbeat_meta: dictionary of additional metadata to include in the heartbeat response description: description of the API to be used in the generated docs - tags_meta: descriptions of tags to be used in the generated docs + tags_meta: descriptions of tags to be used in the generated docs. """ self.title = title self.version = version @@ -50,7 +50,7 @@ def __init__( def on_startup(self): """ - Basic startup that runs the resource startup functions + Basic startup that runs the resource startup functions. """ for resource_list in self.resources.values(): for resource in resource_list: @@ -59,7 +59,7 @@ def on_startup(self): @property def app(self): """ - App server for the cluster manager + App server for the cluster manager. """ app = FastAPI( title=self.title, @@ -92,7 +92,7 @@ def app(self): @app.get("/heartbeat", include_in_schema=False) def heartbeat(): - """API Heartbeat for Load Balancing""" + """API Heartbeat for Load Balancing.""" return { "status": "OK", @@ -103,14 +103,14 @@ def heartbeat(): @app.get("/", include_in_schema=False) def redirect_docs(): - """Redirects the root end point to the docs""" + """Redirects the root end point to the docs.""" return RedirectResponse(url=app.docs_url, status_code=301) return app def run(self, ip: str = "127.0.0.1", port: int = 8000, log_level: str = "info"): """ - Runs the Cluster Manager locally + Runs the Cluster Manager locally. Args: ip: Local IP to listen on diff --git a/src/maggma/api/__init__.py b/src/maggma/api/__init__.py index 5e3624412..97b6261f5 100644 --- a/src/maggma/api/__init__.py +++ b/src/maggma/api/__init__.py @@ -1 +1 @@ -""" Simple API Interface for Maggma """ +""" Simple API Interface for Maggma. """ diff --git a/src/maggma/api/models.py b/src/maggma/api/models.py index a2d04fad8..c58cee633 100644 --- a/src/maggma/api/models.py +++ b/src/maggma/api/models.py @@ -14,7 +14,7 @@ class Meta(BaseModel): """ - Meta information for the MAPI Response + Meta information for the MAPI Response. """ api_version: str = Field( @@ -35,7 +35,7 @@ class Config: class Error(BaseModel): """ - Base Error model for General API + Base Error model for General API. """ code: int = Field(..., description="The error code") @@ -48,7 +48,7 @@ def from_traceback(cls, traceback): class Response(BaseModel, Generic[DataT]): """ - A Generic API Response + A Generic API Response. """ data: Optional[List[DataT]] = Field(None, description="List of returned data") @@ -78,7 +78,7 @@ def default_meta(cls, v, values): class S3URLDoc(BaseModel): """ - S3 pre-signed URL data returned by the S3 URL resource + S3 pre-signed URL data returned by the S3 URL resource. """ url: str = Field( diff --git a/src/maggma/api/query_operator/core.py b/src/maggma/api/query_operator/core.py index 46827d6c8..127d622bd 100644 --- a/src/maggma/api/query_operator/core.py +++ b/src/maggma/api/query_operator/core.py @@ -9,18 +9,18 @@ class QueryOperator(MSONable, metaclass=ABCMeta): """ Base Query Operator class for defining powerful query language - in the Materials API + in the Materials API. """ @abstractmethod def query(self) -> STORE_PARAMS: """ - The query function that does the work for this query operator + The query function that does the work for this query operator. """ def meta(self) -> Dict: """ - Returns meta data to return with the Response + Returns meta data to return with the Response. Args: store: the Maggma Store that the resource uses @@ -30,7 +30,7 @@ def meta(self) -> Dict: def post_process(self, docs: List[Dict], query: Dict) -> List[Dict]: """ - An optional post-processing function for the data + An optional post-processing function for the data. Args: docs: the document results to post-process diff --git a/src/maggma/api/query_operator/dynamic.py b/src/maggma/api/query_operator/dynamic.py index 0a30232b3..2715038ed 100644 --- a/src/maggma/api/query_operator/dynamic.py +++ b/src/maggma/api/query_operator/dynamic.py @@ -13,7 +13,7 @@ class DynamicQueryOperator(QueryOperator): - """Abstract Base class for dynamic query operators""" + """Abstract Base class for dynamic query operators.""" def __init__( self, @@ -75,7 +75,7 @@ def query(**kwargs) -> STORE_PARAMS: self.query = query # type: ignore def query(self): - "Stub query function for abstract class" + "Stub query function for abstract class." @abstractmethod def field_to_operator(self, name: str, field: FieldInfo) -> List[Tuple[str, Any, Query, Callable[..., Dict]]]: @@ -84,7 +84,7 @@ def field_to_operator(self, name: str, field: FieldInfo) -> List[Tuple[str, Any, - query param name, - query param type - FastAPI Query object, - - and callable to convert the value into a query dict + - and callable to convert the value into a query dict. """ @classmethod @@ -97,7 +97,7 @@ def from_dict(cls, d): def as_dict(self) -> Dict: """ - Special as_dict implemented to convert pydantic models into strings + Special as_dict implemented to convert pydantic models into strings. """ d = super().as_dict() # Ensures sub-classes serialize correctly d["model"] = f"{self.model.__module__}.{self.model.__name__}" # type: ignore @@ -105,7 +105,7 @@ def as_dict(self) -> Dict: class NumericQuery(DynamicQueryOperator): - "Query Operator to enable searching on numeric fields" + "Query Operator to enable searching on numeric fields." def field_to_operator(self, name: str, field: FieldInfo) -> List[Tuple[str, Any, Query, Callable[..., Dict]]]: """ @@ -113,7 +113,7 @@ def field_to_operator(self, name: str, field: FieldInfo) -> List[Tuple[str, Any, query_param name, default value, Query object, - and callable to convert it into a query dict + and callable to convert it into a query dict. """ ops = [] @@ -190,7 +190,7 @@ def field_to_operator(self, name: str, field: FieldInfo) -> List[Tuple[str, Any, class StringQueryOperator(DynamicQueryOperator): - "Query Operator to enable searching on numeric fields" + "Query Operator to enable searching on numeric fields." def field_to_operator(self, name: str, field: FieldInfo) -> List[Tuple[str, Any, Query, Callable[..., Dict]]]: """ @@ -198,7 +198,7 @@ def field_to_operator(self, name: str, field: FieldInfo) -> List[Tuple[str, Any, query_param name, default value, Query object, - and callable to convert it into a query dict + and callable to convert it into a query dict. """ ops = [] diff --git a/src/maggma/api/query_operator/pagination.py b/src/maggma/api/query_operator/pagination.py index cbfc49dbf..742e930c6 100644 --- a/src/maggma/api/query_operator/pagination.py +++ b/src/maggma/api/query_operator/pagination.py @@ -7,13 +7,13 @@ class PaginationQuery(QueryOperator): - """Query operators to provides Pagination""" + """Query operators to provides Pagination.""" def __init__(self, default_limit: int = 100, max_limit: int = 1000): """ Args: default_limit: the default number of documents to return - max_limit: max number of documents to return + max_limit: max number of documents to return. """ self.default_limit = default_limit @@ -39,7 +39,7 @@ def query( ), ) -> STORE_PARAMS: """ - Pagination parameters for the API Endpoint + Pagination parameters for the API Endpoint. """ if _page is not None: @@ -80,10 +80,10 @@ def query( self.query = query # type: ignore def query(self): - "Stub query function for abstract class" + "Stub query function for abstract class." def meta(self) -> Dict: """ - Metadata for the pagination params + Metadata for the pagination params. """ return {"max_limit": self.max_limit} diff --git a/src/maggma/api/query_operator/sorting.py b/src/maggma/api/query_operator/sorting.py index dc478eafc..9d45eb24b 100644 --- a/src/maggma/api/query_operator/sorting.py +++ b/src/maggma/api/query_operator/sorting.py @@ -8,7 +8,7 @@ class SortQuery(QueryOperator): """ - Method to generate the sorting portion of a query + Method to generate the sorting portion of a query. """ def query( diff --git a/src/maggma/api/query_operator/sparse_fields.py b/src/maggma/api/query_operator/sparse_fields.py index 3e55cec1d..3b33363b2 100644 --- a/src/maggma/api/query_operator/sparse_fields.py +++ b/src/maggma/api/query_operator/sparse_fields.py @@ -13,7 +13,7 @@ def __init__(self, model: Type[BaseModel], default_fields: Optional[List[str]] = """ Args: model: PyDantic Model that represents the underlying data source - default_fields: default fields to return in the API response if no fields are explicitly requested + default_fields: default fields to return in the API response if no fields are explicitly requested. """ self.model = model @@ -32,7 +32,7 @@ def query( _all_fields: bool = Query(False, description="Include all fields."), ) -> STORE_PARAMS: """ - Pagination parameters for the API Endpoint + Pagination parameters for the API Endpoint. """ properties = _fields.split(",") if isinstance(_fields, str) else self.default_fields @@ -44,17 +44,17 @@ def query( self.query = query # type: ignore def query(self): - "Stub query function for abstract class" + "Stub query function for abstract class." def meta(self) -> Dict: """ - Returns metadata for the Sparse field set + Returns metadata for the Sparse field set. """ return {"default_fields": self.default_fields} def as_dict(self) -> Dict: """ - Special as_dict implemented to convert pydantic models into strings + Special as_dict implemented to convert pydantic models into strings. """ d = super().as_dict() # Ensures sub-classes serialize correctly @@ -64,7 +64,7 @@ def as_dict(self) -> Dict: @classmethod def from_dict(cls, d): """ - Special from_dict to autoload the pydantic model from the location string + Special from_dict to autoload the pydantic model from the location string. """ model = d.get("model") if isinstance(model, str): diff --git a/src/maggma/api/query_operator/submission.py b/src/maggma/api/query_operator/submission.py index 66c4d0104..4b75f7694 100644 --- a/src/maggma/api/query_operator/submission.py +++ b/src/maggma/api/query_operator/submission.py @@ -9,7 +9,7 @@ class SubmissionQuery(QueryOperator): """ - Method to generate a query for submission data using status and datetime + Method to generate a query for submission data using status and datetime. """ def __init__(self, status_enum): @@ -40,4 +40,4 @@ def query( self.query = query def query(self): - "Stub query function for abstract class" + "Stub query function for abstract class." diff --git a/src/maggma/api/resource/aggregation.py b/src/maggma/api/resource/aggregation.py index c26e37223..b7e7848d1 100644 --- a/src/maggma/api/resource/aggregation.py +++ b/src/maggma/api/resource/aggregation.py @@ -17,7 +17,7 @@ class AggregationResource(Resource): """ - Implements a REST Compatible Resource as a GET URL endpoint + Implements a REST Compatible Resource as a GET URL endpoint. """ def __init__( @@ -58,7 +58,7 @@ def __init__( def prepare_endpoint(self): """ Internal method to prepare the endpoint by setting up default handlers - for routes + for routes. """ self.build_dynamic_model_search() diff --git a/src/maggma/api/resource/core.py b/src/maggma/api/resource/core.py index 5ff5af18a..23e358b92 100644 --- a/src/maggma/api/resource/core.py +++ b/src/maggma/api/resource/core.py @@ -13,7 +13,7 @@ class Resource(MSONable, metaclass=ABCMeta): """ - Base class for a REST Compatible Resource + Base class for a REST Compatible Resource. """ def __init__( @@ -22,7 +22,7 @@ def __init__( ): """ Args: - model: the pydantic model this Resource represents + model: the pydantic model this Resource represents. """ if not issubclass(model, BaseModel): raise ValueError("The resource model has to be a PyDantic Model") @@ -36,7 +36,7 @@ def __init__( def on_startup(self): """ - Callback to perform some work on resource initialization + Callback to perform some work on resource initialization. """ @abstractmethod @@ -51,7 +51,7 @@ def setup_redirect(self): def redirect_unslashed(): """ Redirects unforward slashed url to resource - url with the forward slash + url with the forward slash. """ url = self.router.url_path_for("/") @@ -60,7 +60,7 @@ def redirect_unslashed(): def run(self): # pragma: no cover """ Runs the Endpoint cluster locally - This is intended for testing not production + This is intended for testing not production. """ import uvicorn @@ -70,7 +70,7 @@ def run(self): # pragma: no cover def as_dict(self) -> Dict: """ - Special as_dict implemented to convert pydantic models into strings + Special as_dict implemented to convert pydantic models into strings. """ d = super().as_dict() # Ensures sub-classes serialize correctly @@ -87,19 +87,19 @@ def from_dict(cls, d: Dict): class HintScheme(MSONable, metaclass=ABCMeta): """ - Base class for generic hint schemes generation + Base class for generic hint schemes generation. """ @abstractmethod def generate_hints(self, query: STORE_PARAMS) -> STORE_PARAMS: """ - This method takes in a MongoDB query and returns hints + This method takes in a MongoDB query and returns hints. """ class HeaderProcessor(MSONable, metaclass=ABCMeta): """ - Base class for generic header processing + Base class for generic header processing. """ @abstractmethod @@ -107,5 +107,5 @@ def process_header(self, response: Response, request: Request): """ This method takes in a FastAPI Response object and processes a new header for it in-place. It can use data in the upstream request to generate the header. - (https://fastapi.tiangolo.com/advanced/response-headers/#use-a-response-parameter) + (https://fastapi.tiangolo.com/advanced/response-headers/#use-a-response-parameter). """ diff --git a/src/maggma/api/resource/post_resource.py b/src/maggma/api/resource/post_resource.py index 713888843..deab50455 100644 --- a/src/maggma/api/resource/post_resource.py +++ b/src/maggma/api/resource/post_resource.py @@ -17,7 +17,7 @@ class PostOnlyResource(Resource): """ - Implements a REST Compatible Resource as a POST URL endpoint + Implements a REST Compatible Resource as a POST URL endpoint. """ def __init__( @@ -73,7 +73,7 @@ def __init__( def prepare_endpoint(self): """ Internal method to prepare the endpoint by setting up default handlers - for routes + for routes. """ self.build_dynamic_model_search() diff --git a/src/maggma/api/resource/read_resource.py b/src/maggma/api/resource/read_resource.py index 066b5a231..2d602f49e 100644 --- a/src/maggma/api/resource/read_resource.py +++ b/src/maggma/api/resource/read_resource.py @@ -21,7 +21,7 @@ class ReadOnlyResource(Resource): """ Implements a REST Compatible Resource as a GET URL endpoint This class provides a number of convenience features - including full pagination, field projection + including full pagination, field projection. """ def __init__( @@ -98,7 +98,7 @@ def __init__( def prepare_endpoint(self): """ Internal method to prepare the endpoint by setting up default handlers - for routes + for routes. """ if self.enable_get_by_key: diff --git a/src/maggma/api/resource/s3_url.py b/src/maggma/api/resource/s3_url.py index 0eaf899e0..b12d8779a 100644 --- a/src/maggma/api/resource/s3_url.py +++ b/src/maggma/api/resource/s3_url.py @@ -54,7 +54,7 @@ def __init__( def prepare_endpoint(self): """ Internal method to prepare the endpoint by setting up default handlers - for routes + for routes. """ self.build_get_by_key() diff --git a/src/maggma/api/resource/submission.py b/src/maggma/api/resource/submission.py index 14ace3901..ef1a9905b 100644 --- a/src/maggma/api/resource/submission.py +++ b/src/maggma/api/resource/submission.py @@ -116,7 +116,7 @@ def __init__( def prepare_endpoint(self): """ Internal method to prepare the endpoint by setting up default handlers - for routes + for routes. """ if self.enable_default_search: diff --git a/src/maggma/api/resource/utils.py b/src/maggma/api/resource/utils.py index 8532dc6db..51ca9388b 100644 --- a/src/maggma/api/resource/utils.py +++ b/src/maggma/api/resource/utils.py @@ -12,7 +12,7 @@ def attach_query_ops( ) -> Callable[[List[STORE_PARAMS]], Dict]: """ Attach query operators to API compliant function - The function has to take a list of STORE_PARAMs as the only argument + The function has to take a list of STORE_PARAMs as the only argument. Args: function: the function to decorate @@ -31,7 +31,7 @@ def attach_query_ops( def generate_query_pipeline(query: dict, store: Store): """ - Generate the generic aggregation pipeline used in GET endpoint queries + Generate the generic aggregation pipeline used in GET endpoint queries. Args: query: Query parameters diff --git a/src/maggma/api/utils.py b/src/maggma/api/utils.py index 99177e5f2..6dcefed04 100644 --- a/src/maggma/api/utils.py +++ b/src/maggma/api/utils.py @@ -58,7 +58,7 @@ def merge_queries(queries: List[STORE_PARAMS]) -> STORE_PARAMS: def attach_signature(function: Callable, defaults: Dict, annotations: Dict): """ - Attaches signature for defaults and annotations for parameters to function + Attaches signature for defaults and annotations for parameters to function. Args: function: callable function to attach the signature to @@ -142,12 +142,12 @@ def api_sanitize( def allow_msonable_dict(monty_cls: Type[MSONable]): """ - Patch Monty to allow for dict values for MSONable + Patch Monty to allow for dict values for MSONable. """ def validate_monty(cls, v, _): """ - Stub validator for MSONable as a dictionary only + Stub validator for MSONable as a dictionary only. """ if isinstance(v, cls): return v diff --git a/src/maggma/builders/group_builder.py b/src/maggma/builders/group_builder.py index 0feed4cf3..3f6df7ca6 100644 --- a/src/maggma/builders/group_builder.py +++ b/src/maggma/builders/group_builder.py @@ -1,5 +1,5 @@ """ -Many-to-Many GroupBuilder +Many-to-Many GroupBuilder. """ import traceback from abc import ABCMeta, abstractmethod @@ -50,7 +50,7 @@ def __init__( store_process_time: If True, add "_process_time" key to document for profiling purposes retry_failed: If True, will retry building documents that - previously failed + previously failed. """ self.source = source self.target = target @@ -69,7 +69,7 @@ def __init__( def ensure_indexes(self): """ Ensures indices on critical fields for GroupBuilder - which include the plural version of the target's key field + which include the plural version of the target's key field. """ index_checks = [ self.source.ensure_index(self.source.key), @@ -92,7 +92,7 @@ def ensure_indexes(self): def prechunk(self, number_splits: int) -> Iterator[Dict]: """ Generic prechunk for group builder to perform domain-decomposition - by the grouping keys + by the grouping keys. """ self.ensure_indexes() @@ -154,7 +154,7 @@ def process_item(self, item: List[Dict]) -> Dict[Tuple, Dict]: # type: ignore def update_targets(self, items: List[Dict]): """ - Generic update targets for Group Builder + Generic update targets for Group Builder. """ target = self.target for item in items: @@ -167,7 +167,7 @@ def update_targets(self, items: List[Dict]): @abstractmethod def unary_function(self, items: List[Dict]) -> Dict: """ - Processing function for GroupBuilder + Processing function for GroupBuilder. Arguments: items: list of of documents with matching grouping keys @@ -180,7 +180,7 @@ def unary_function(self, items: List[Dict]) -> Dict: def get_ids_to_process(self) -> Iterable: """ - Gets the IDs that need to be processed + Gets the IDs that need to be processed. """ query = self.query or {} @@ -214,7 +214,7 @@ def get_ids_to_process(self) -> Iterable: def get_groups_from_keys(self, keys) -> Set[Tuple]: """ - Get the groups by grouping_keys for these documents + Get the groups by grouping_keys for these documents. """ grouping_keys = self.grouping_keys diff --git a/src/maggma/builders/map_builder.py b/src/maggma/builders/map_builder.py index 0561cd62e..15392d3ea 100644 --- a/src/maggma/builders/map_builder.py +++ b/src/maggma/builders/map_builder.py @@ -1,5 +1,5 @@ """ -One-to-One Map Builder and a simple CopyBuilder implementation +One-to-One Map Builder and a simple CopyBuilder implementation. """ import traceback from abc import ABCMeta, abstractmethod @@ -65,7 +65,7 @@ def __init__( def ensure_indexes(self): """ - Ensures indices on critical fields for MapBuilder + Ensures indices on critical fields for MapBuilder. """ index_checks = [ self.source.ensure_index(self.source.key), @@ -87,7 +87,7 @@ def ensure_indexes(self): def prechunk(self, number_splits: int) -> Iterator[Dict]: """ Generic prechunk for map builder to perform domain-decomposition - by the key field + by the key field. """ self.ensure_indexes() keys = self.target.newer_in(self.source, criteria=self.query, exhaustive=True) @@ -99,7 +99,7 @@ def prechunk(self, number_splits: int) -> Iterator[Dict]: def get_items(self): """ Generic get items for Map Builder designed to perform - incremental building + incremental building. """ self.logger.info(f"Starting {self.__class__.__name__} Builder") @@ -136,7 +136,7 @@ def get_items(self): def process_item(self, item: Dict): """ Generic process items to process a dictionary using - a map function + a map function. """ self.logger.debug(f"Processing: {item[self.source.key]}") @@ -173,7 +173,7 @@ def process_item(self, item: Dict): def update_targets(self, items: List[Dict]): """ - Generic update targets for Map Builder + Generic update targets for Map Builder. """ target = self.target for item in items: @@ -186,7 +186,7 @@ def update_targets(self, items: List[Dict]): def finalize(self): """ - Finalize MapBuilder operations including removing orphaned documents + Finalize MapBuilder operations including removing orphaned documents. """ if self.delete_orphans: source_keyvals = set(self.source.distinct(self.source.key)) @@ -214,7 +214,7 @@ class CopyBuilder(MapBuilder): def unary_function(self, item): """ - Identity function for copy builder map operation + Identity function for copy builder map operation. """ if "_id" in item: del item["_id"] diff --git a/src/maggma/builders/projection_builder.py b/src/maggma/builders/projection_builder.py index 3659d3a47..ec0dc845c 100644 --- a/src/maggma/builders/projection_builder.py +++ b/src/maggma/builders/projection_builder.py @@ -102,7 +102,7 @@ def __init__( def ensure_indexes(self): """ - Ensures key fields are indexed to improve querying efficiency + Ensures key fields are indexed to improve querying efficiency. """ index_checks = [s.ensure_index(s.key) for s in self.sources] diff --git a/src/maggma/cli/distributed.py b/src/maggma/cli/distributed.py index ceb098f42..78170b323 100644 --- a/src/maggma/cli/distributed.py +++ b/src/maggma/cli/distributed.py @@ -212,7 +212,7 @@ def handle_dead_workers(workers, socket): def worker(url: str, port: int, num_processes: int, no_bars: bool): """ Simple distributed worker that connects to a manager asks for work and deploys - using multiprocessing + using multiprocessing. """ identity = f"{randint(0, 0x10000):04X}-{randint(0, 0x10000):04X}" logger = getLogger(f"Worker {identity}") diff --git a/src/maggma/cli/multiprocessing.py b/src/maggma/cli/multiprocessing.py index 57e1bb755..6f2a407c5 100644 --- a/src/maggma/cli/multiprocessing.py +++ b/src/maggma/cli/multiprocessing.py @@ -18,7 +18,7 @@ class BackPressure: """ Wrapper for an iterator to provide - async access with backpressure + async access with backpressure. """ def __init__(self, iterator, n): @@ -38,7 +38,7 @@ async def __anext__(self): async def release(self, async_iterator): """ - release iterator to pipeline the backpressure + release iterator to pipeline the backpressure. """ async for item in async_iterator: try: @@ -53,7 +53,7 @@ class AsyncUnorderedMap: """ Async iterator that maps a function to an async iterator using an executor and returns items as they are done - This does not guarantee order + This does not guarantee order. """ def __init__(self, func, async_iterator, executor): @@ -105,7 +105,7 @@ async def __anext__(self): async def atqdm(async_iterator, *args, **kwargs): """ - Wrapper around tqdm for async generators + Wrapper around tqdm for async generators. """ _tqdm = tqdm(*args, **kwargs) async for item in async_iterator: @@ -119,7 +119,7 @@ async def grouper(async_iterator, n: int): """ Collect data into fixed-length chunks or blocks. >>> list(grouper(3, 'ABCDEFG')) - [['A', 'B', 'C'], ['D', 'E', 'F'], ['G']] + [['A', 'B', 'C'], ['D', 'E', 'F'], ['G']]. Updated from: https://stackoverflow.com/questions/31164731/python-chunking-csv-file-multiproccessing/31170795#31170795 diff --git a/src/maggma/cli/rabbitmq.py b/src/maggma/cli/rabbitmq.py index 6052a4e46..e8487c5f4 100644 --- a/src/maggma/cli/rabbitmq.py +++ b/src/maggma/cli/rabbitmq.py @@ -223,7 +223,7 @@ def handle_dead_workers(connection, workers, channel, worker_queue): def worker(url: str, port: int, num_processes: int, no_bars: bool, queue_prefix: str): """ Simple distributed worker that connects to a manager asks for work and deploys - using multiprocessing + using multiprocessing. """ identity = f"{randint(0, 0x10000):04X}-{randint(0, 0x10000):04X}" logger = getLogger(f"Worker {identity}") diff --git a/src/maggma/cli/serial.py b/src/maggma/cli/serial.py index dd36ec00b..1c6c9fd5a 100644 --- a/src/maggma/cli/serial.py +++ b/src/maggma/cli/serial.py @@ -12,7 +12,7 @@ def serial(builder: Builder, no_bars=False): """ - Runs the builders using a single process + Runs the builders using a single process. """ logger = logging.getLogger("SerialProcessor") diff --git a/src/maggma/cli/source_loader.py b/src/maggma/cli/source_loader.py index 98c78930d..83be487a5 100644 --- a/src/maggma/cli/source_loader.py +++ b/src/maggma/cli/source_loader.py @@ -21,7 +21,7 @@ class ScriptFinder(MetaPathFinder): """ - Special Finder designed to find custom script builders + Special Finder designed to find custom script builders. """ @classmethod @@ -42,7 +42,7 @@ def find_spec(cls, fullname, path, target=None): class NotebookLoader(Loader): - """Module Loader for Jupyter Notebooks or Source Files""" + """Module Loader for Jupyter Notebooks or Source Files.""" def __init__(self, name=None, path=None): self.shell = InteractiveShell.instance() @@ -82,7 +82,7 @@ def spec_from_source(file_path: str) -> ModuleSpec: """ Returns a ModuleSpec from a filepath for importlib loading Specialized for loading python source files and notebooks into - a temporary maggma cli package to run as a builder + a temporary maggma cli package to run as a builder. """ file_path_obj = Path(file_path).resolve().relative_to(Path(".").resolve()) file_path_str = str(file_path_obj) @@ -115,7 +115,7 @@ def spec_from_source(file_path: str) -> ModuleSpec: def load_builder_from_source(file_path: str) -> List[Builder]: """ - Loads Maggma Builders from a Python source file + Loads Maggma Builders from a Python source file. """ file_path = str(Path(file_path).resolve()) spec = spec_from_source(file_path) @@ -135,7 +135,7 @@ def find_matching_file(segments, curr_path="./"): """ Finds file that has the right sequence of segments in the path relative to the current path - Requires all segments match the file path + Requires all segments match the file path. """ # If we've gotten to the end of the segment match check to see if a file exists diff --git a/src/maggma/cli/sources/__init__.py b/src/maggma/cli/sources/__init__.py index 665b9359a..90cd0cb81 100644 --- a/src/maggma/cli/sources/__init__.py +++ b/src/maggma/cli/sources/__init__.py @@ -1 +1 @@ -""" Dummy module to allow for loading dynamic source files """ +""" Dummy module to allow for loading dynamic source files. """ diff --git a/src/maggma/core/__init__.py b/src/maggma/core/__init__.py index 8f0e8d3dc..9c8cad383 100644 --- a/src/maggma/core/__init__.py +++ b/src/maggma/core/__init__.py @@ -1,4 +1,4 @@ -""" Core specifications for Maggma """ +""" Core specifications for Maggma. """ from maggma.core.builder import Builder from maggma.core.store import DateTimeFormat, Sort, Store, StoreError from maggma.core.validator import Validator diff --git a/src/maggma/core/builder.py b/src/maggma/core/builder.py index e37cd9d6d..9017d92df 100644 --- a/src/maggma/core/builder.py +++ b/src/maggma/core/builder.py @@ -1,5 +1,5 @@ """ -Module containing the core builder definition +Module containing the core builder definition. """ import logging @@ -17,7 +17,7 @@ class Builder(MSONable, metaclass=ABCMeta): Base Builder class At minimum this class should implement: get_items - Get items from the sources - update_targets - Updates the sources with results + update_targets - Updates the sources with results. Multiprocessing and MPI processing can be used if all the data processing is limited to process_items @@ -56,7 +56,7 @@ def prechunk(self, number_splits: int) -> Iterable[Dict]: Part of a domain-decomposition paradigm to allow the builder to operate on multiple nodes by dividing up the IO as well as the compute This function should return an iterator of dictionaries that can be distributed - to multiple instances of the builder to get/process/update on + to multiple instances of the builder to get/process/update on. Arguments: number_splits: The number of groups to split the documents to work on @@ -119,7 +119,7 @@ def finalize(self): def run(self, log_level=logging.DEBUG): """ Run the builder serially - This is only intended for diagnostic purposes + This is only intended for diagnostic purposes. """ # Set up logging root = logging.getLogger() diff --git a/src/maggma/core/store.py b/src/maggma/core/store.py index 864adef9a..b98a20f7f 100644 --- a/src/maggma/core/store.py +++ b/src/maggma/core/store.py @@ -1,5 +1,5 @@ """ -Module containing the core Store definition +Module containing the core Store definition. """ import logging @@ -17,14 +17,14 @@ class Sort(Enum): - """Enumeration for sorting order""" + """Enumeration for sorting order.""" Ascending = 1 Descending = -1 class DateTimeFormat(Enum): - """Datetime format in store document""" + """Datetime format in store document.""" DateTime = "datetime" IsoFormat = "isoformat" @@ -33,7 +33,7 @@ class DateTimeFormat(Enum): class Store(MSONable, metaclass=ABCMeta): """ Abstract class for a data Store - Defines the interface for all data going in and out of a Builder + Defines the interface for all data going in and out of a Builder. """ def __init__( @@ -49,7 +49,7 @@ def __init__( last_updated_field: field for date/time stamping the data last_updated_type: the date/time format for the last_updated_field. Can be "datetime" or "isoformat" - validator: Validator to validate documents going into the store + validator: Validator to validate documents going into the store. """ self.key = key self.last_updated_field = last_updated_field @@ -64,19 +64,19 @@ def __init__( @abstractproperty def _collection(self): """ - Returns a handle to the pymongo collection object + Returns a handle to the pymongo collection object. """ @abstractproperty def name(self) -> str: """ - Return a string representing this data source + Return a string representing this data source. """ @abstractmethod def connect(self, force_reset: bool = False): """ - Connect to the source data + Connect to the source data. Args: force_reset: whether to reset the connection or not @@ -85,13 +85,13 @@ def connect(self, force_reset: bool = False): @abstractmethod def close(self): """ - Closes any connections + Closes any connections. """ @abstractmethod def count(self, criteria: Optional[Dict] = None) -> int: """ - Counts the number of documents matching the query criteria + Counts the number of documents matching the query criteria. Args: criteria: PyMongo filter for documents to count in @@ -107,7 +107,7 @@ def query( limit: int = 0, ) -> Iterator[Dict]: """ - Queries the Store for a set of documents + Queries the Store for a set of documents. Args: criteria: PyMongo filter for documents to search in @@ -121,7 +121,7 @@ def query( @abstractmethod def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = None): """ - Update documents into the Store + Update documents into the Store. Args: docs: the document or list of documents to update @@ -134,7 +134,7 @@ def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = No @abstractmethod def ensure_index(self, key: str, unique: bool = False) -> bool: """ - Tries to create an index and return true if it succeeded + Tries to create an index and return true if it succeeded. Args: key: single key to index @@ -174,7 +174,7 @@ def groupby( @abstractmethod def remove_docs(self, criteria: Dict): """ - Remove docs matching the query dictionary + Remove docs matching the query dictionary. Args: criteria: query dictionary to match @@ -187,7 +187,7 @@ def query_one( sort: Optional[Dict[str, Union[Sort, int]]] = None, ): """ - Queries the Store for a single document + Queries the Store for a single document. Args: criteria: PyMongo filter for documents to search @@ -199,7 +199,7 @@ def query_one( def distinct(self, field: str, criteria: Optional[Dict] = None, all_exist: bool = False) -> List: """ - Get all distinct values for a field + Get all distinct values for a field. Args: field: the field(s) to get distinct values for @@ -214,7 +214,7 @@ def distinct(self, field: str, criteria: Optional[Dict] = None, all_exist: bool def last_updated(self) -> datetime: """ Provides the most recent last_updated date time stamp from - the documents in this Store + the documents in this Store. """ doc = next( self.query( @@ -295,7 +295,7 @@ def lu_filter(self, targets): def updated_keys(self, target, criteria=None): """ Returns keys for docs that are newer in the target store in comparison - with this store when comparing the last updated field (last_updated_field) + with this store when comparing the last updated field (last_updated_field). Args: target (Store): store to look for updated documents @@ -329,7 +329,7 @@ def __exit__(self, exception_type, exception_value, traceback): class StoreError(Exception): - """General Store-related error""" + """General Store-related error.""" def __init__(self, *args, **kwargs): super().__init__(self, *args, **kwargs) diff --git a/src/maggma/core/validator.py b/src/maggma/core/validator.py index 4f270f937..f323658af 100644 --- a/src/maggma/core/validator.py +++ b/src/maggma/core/validator.py @@ -20,7 +20,7 @@ class Validator(MSONable, metaclass=ABCMeta): @abstractmethod def is_valid(self, doc: Dict) -> bool: """ - Determines if the document is valid + Determines if the document is valid. Args: doc: document to check @@ -30,7 +30,7 @@ def is_valid(self, doc: Dict) -> bool: def validation_errors(self, doc: Dict) -> List[str]: """ If document is not valid, provides a list of - strings to display for why validation has failed + strings to display for why validation has failed. Returns empty list if the document is valid diff --git a/src/maggma/stores/__init__.py b/src/maggma/stores/__init__.py index 5bc0b8c52..2b31fb0c1 100644 --- a/src/maggma/stores/__init__.py +++ b/src/maggma/stores/__init__.py @@ -1,4 +1,4 @@ -""" Root store module with easy imports for implemented Stores """ +""" Root store module with easy imports for implemented Stores. """ from maggma.core import Store from maggma.stores.advanced_stores import AliasingStore, MongograntStore, SandboxStore, VaultStore from maggma.stores.aws import S3Store diff --git a/src/maggma/stores/advanced_stores.py b/src/maggma/stores/advanced_stores.py index 428e16d5e..138aa06a3 100644 --- a/src/maggma/stores/advanced_stores.py +++ b/src/maggma/stores/advanced_stores.py @@ -1,5 +1,5 @@ """ -Advanced Stores for behavior outside normal access patterns +Advanced Stores for behavior outside normal access patterns. """ import json import os @@ -85,20 +85,20 @@ def __hash__(self): @classmethod def from_db_file(cls, file): """ - Raises ValueError since MongograntStores can't be initialized from a file + Raises ValueError since MongograntStores can't be initialized from a file. """ raise ValueError("MongograntStore doesn't implement from_db_file") @classmethod def from_collection(cls, collection): """ - Raises ValueError since MongograntStores can't be initialized from a PyMongo collection + Raises ValueError since MongograntStores can't be initialized from a PyMongo collection. """ raise ValueError("MongograntStore doesn't implement from_collection") def __eq__(self, other: object) -> bool: """ - Check equality for MongograntStore + Check equality for MongograntStore. Args: other: other MongograntStore to compare with @@ -118,7 +118,7 @@ def __eq__(self, other: object) -> bool: class VaultStore(MongoStore): """ Extends MongoStore to read credentials out of Vault server - and uses these values to initialize MongoStore instance + and uses these values to initialize MongoStore instance. """ @requires(hvac is not None, "hvac is required to use VaultStore") @@ -126,7 +126,7 @@ def __init__(self, collection_name: str, vault_secret_path: str): """ Args: collection_name: name of mongo collection - vault_secret_path: path on vault server with mongo creds object + vault_secret_path: path on vault server with mongo creds object. Important: Environment variables that must be set prior to invocation @@ -174,7 +174,7 @@ def __init__(self, collection_name: str, vault_secret_path: str): def __eq__(self, other: object) -> bool: """ - Check equality for VaultStore + Check equality for VaultStore. Args: other: other VaultStore to compare with @@ -188,14 +188,14 @@ def __eq__(self, other: object) -> bool: class AliasingStore(Store): """ - Special Store that aliases for the primary accessors + Special Store that aliases for the primary accessors. """ def __init__(self, store: Store, aliases: Dict, **kwargs): """ Args: store: the store to wrap around - aliases: dict of aliases of the form external key: internal key + aliases: dict of aliases of the form external key: internal key. """ self.store = store # Given an external key tells what the internal key is @@ -215,13 +215,13 @@ def __init__(self, store: Store, aliases: Dict, **kwargs): @property def name(self) -> str: """ - Return a string representing this data source + Return a string representing this data source. """ return self.store.name def count(self, criteria: Optional[Dict] = None) -> int: """ - Counts the number of documents matching the query criteria + Counts the number of documents matching the query criteria. Args: criteria: PyMongo filter for documents to count in @@ -239,7 +239,7 @@ def query( limit: int = 0, ) -> Iterator[Dict]: """ - Queries the Store for a set of documents + Queries the Store for a set of documents. Args: criteria: PyMongo filter for documents to search in @@ -264,7 +264,7 @@ def query( def distinct(self, field: str, criteria: Optional[Dict] = None, all_exist: bool = False) -> List: """ - Get all distinct values for a field + Get all distinct values for a field. Args: field: the field(s) to get distinct values for @@ -321,7 +321,7 @@ def groupby( def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = None): """ - Update documents into the Store + Update documents into the Store. Args: docs: the document or list of documents to update @@ -342,7 +342,7 @@ def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = No def remove_docs(self, criteria: Dict): """ - Remove docs matching the query dictionary + Remove docs matching the query dictionary. Args: criteria: query dictionary to match @@ -368,7 +368,7 @@ def connect(self, force_reset=False): def __eq__(self, other: object) -> bool: """ - Check equality for AliasingStore + Check equality for AliasingStore. Args: other: other AliasingStore to compare with @@ -382,7 +382,7 @@ def __eq__(self, other: object) -> bool: class SandboxStore(Store): """ - Provides a sandboxed view to another store + Provides a sandboxed view to another store. """ def __init__(self, store: Store, sandbox: str, exclusive: bool = False): @@ -390,7 +390,7 @@ def __init__(self, store: Store, sandbox: str, exclusive: bool = False): Args: store: store to wrap sandboxing around sandbox: the corresponding sandbox - exclusive: whether to be exclusively in this sandbox or include global items + exclusive: whether to be exclusively in this sandbox or include global items. """ self.store = store self.sandbox = sandbox @@ -406,7 +406,7 @@ def __init__(self, store: Store, sandbox: str, exclusive: bool = False): def name(self) -> str: """ Returns: - a string representing this data source + a string representing this data source. """ return f"Sandbox[{self.store.name}][{self.sandbox}]" @@ -414,7 +414,7 @@ def name(self) -> str: def sbx_criteria(self) -> Dict: """ Returns: - the sandbox criteria dict used to filter the source store + the sandbox criteria dict used to filter the source store. """ if self.exclusive: return {"sbxn": self.sandbox} @@ -422,7 +422,7 @@ def sbx_criteria(self) -> Dict: def count(self, criteria: Optional[Dict] = None) -> int: """ - Counts the number of documents matching the query criteria + Counts the number of documents matching the query criteria. Args: criteria: PyMongo filter for documents to count in @@ -439,7 +439,7 @@ def query( limit: int = 0, ) -> Iterator[Dict]: """ - Queries the Store for a set of documents + Queries the Store for a set of documents. Args: criteria: PyMongo filter for documents to search in @@ -483,7 +483,7 @@ def groupby( def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = None): """ - Update documents into the Store + Update documents into the Store. Args: docs: the document or list of documents to update @@ -502,7 +502,7 @@ def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = No def remove_docs(self, criteria: Dict): """ - Remove docs matching the query dictionary + Remove docs matching the query dictionary. Args: criteria: query dictionary to match @@ -526,7 +526,7 @@ def connect(self, force_reset=False): def __eq__(self, other: object) -> bool: """ - Check equality for SandboxStore + Check equality for SandboxStore. Args: other: other SandboxStore to compare with diff --git a/src/maggma/stores/aws.py b/src/maggma/stores/aws.py index 505671174..6059aec68 100644 --- a/src/maggma/stores/aws.py +++ b/src/maggma/stores/aws.py @@ -398,7 +398,7 @@ def _get_resource_and_bucket(self): return resource, bucket def _get_full_key_path(self, id: str) -> str: - """Produces the full key path for S3 items + """Produces the full key path for S3 items. Args: id (str): The value of the key identifier. diff --git a/src/maggma/stores/azure.py b/src/maggma/stores/azure.py index 8f2dcbc91..ca7af1154 100644 --- a/src/maggma/stores/azure.py +++ b/src/maggma/stores/azure.py @@ -1,5 +1,5 @@ """ -Advanced Stores for connecting to Microsoft Azure data +Advanced Stores for connecting to Microsoft Azure data. """ import os import threading @@ -56,7 +56,7 @@ def __init__( **kwargs, ): """ - Initializes an AzureBlob Store + Initializes an AzureBlob Store. Args: index: a store to use to index the Azure blob @@ -122,13 +122,13 @@ def __init__( def name(self) -> str: """ Returns: - a string representing this data source + a string representing this data source. """ return f"container://{self.container_name}" def connect(self, *args, **kwargs): # lgtm[py/conflicting-attributes] """ - Connect to the source data + Connect to the source data. """ service_client = self._get_service_client() @@ -151,7 +151,7 @@ def connect(self, *args, **kwargs): # lgtm[py/conflicting-attributes] def close(self): """ - Closes any connections + Closes any connections. """ self.index.close() self.service = None @@ -161,7 +161,7 @@ def close(self): def _collection(self): """ Returns: - a handle to the pymongo collection object + a handle to the pymongo collection object. Important: Not guaranteed to exist in the future @@ -171,7 +171,7 @@ def _collection(self): def count(self, criteria: Optional[Dict] = None) -> int: """ - Counts the number of documents matching the query criteria + Counts the number of documents matching the query criteria. Args: criteria: PyMongo filter for documents to count in @@ -188,7 +188,7 @@ def query( limit: int = 0, ) -> Iterator[Dict]: """ - Queries the Store for a set of documents + Queries the Store for a set of documents. Args: criteria: PyMongo filter for documents to search in @@ -241,7 +241,7 @@ def _unpack(data: bytes, compressed: bool): def distinct(self, field: str, criteria: Optional[Dict] = None, all_exist: bool = False) -> List: """ - Get all distinct values for a field + Get all distinct values for a field. Args: field: the field(s) to get distinct values for @@ -286,7 +286,7 @@ def groupby( def ensure_index(self, key: str, unique: bool = False) -> bool: """ - Tries to create an index and return true if it succeeded + Tries to create an index and return true if it succeeded. Args: key: single key to index @@ -304,7 +304,7 @@ def update( additional_metadata: Union[str, List[str], None] = None, ): """ - Update documents into the Store + Update documents into the Store. Args: docs: the document or list of documents to update @@ -380,7 +380,7 @@ def _get_container(self) -> Optional[ContainerClient]: def write_doc_to_blob(self, doc: Dict, search_keys: List[str]): """ - Write the data to an Azure blob and return the metadata to be inserted into the index db + Write the data to an Azure blob and return the metadata to be inserted into the index db. Args: doc: the document @@ -451,7 +451,7 @@ def _sanitize_key(self, key): def remove_docs(self, criteria: Dict, remove_blob_object: bool = False): """ - Remove docs matching the query dictionary + Remove docs matching the query dictionary. Args: criteria: query dictionary to match @@ -500,7 +500,7 @@ def rebuild_index_from_blob_data(self, **kwargs): """ Rebuilds the index Store from the data in Azure Relies on the index document being stores as the metadata for the file - This can help recover lost databases + This can help recover lost databases. """ objects = self.container.list_blobs(name_starts_with=self.sub_dir) @@ -524,7 +524,7 @@ def rebuild_metadata_from_index(self, index_query: Optional[Dict] = None): Read data from the index store and populate the metadata of the Azure Blob. Force all of the keys to be lower case to be Minio compatible Args: - index_query: query on the index store + index_query: query on the index store. """ if self.container is None or self.service is None: raise RuntimeError("The store has not been connected") @@ -545,7 +545,7 @@ def rebuild_metadata_from_index(self, index_query: Optional[Dict] = None): def __eq__(self, other: object) -> bool: """ Check equality for AzureBlobStore - other: other AzureBlobStore to compare with + other: other AzureBlobStore to compare with. """ if not isinstance(other, AzureBlobStore): return False diff --git a/src/maggma/stores/compound_stores.py b/src/maggma/stores/compound_stores.py index b06441eb4..5c8a51dd0 100644 --- a/src/maggma/stores/compound_stores.py +++ b/src/maggma/stores/compound_stores.py @@ -1,4 +1,4 @@ -""" Special stores that combine underlying Stores together """ +""" Special stores that combine underlying Stores together. """ from datetime import datetime from itertools import groupby from typing import Dict, Iterator, List, Optional, Tuple, Union @@ -40,7 +40,7 @@ def __init__( password: Password to connect with main: name for the main collection if not specified this defaults to the first - in collection_names list + in collection_names list. """ self.database = database self.collection_names = collection_names @@ -59,14 +59,14 @@ def __init__( @property def name(self) -> str: """ - Return a string representing this data source + Return a string representing this data source. """ compound_name = ",".join(self.collection_names) return f"Compound[{self.host}/{self.database}][{compound_name}]" def connect(self, force_reset: bool = False): """ - Connects the underlying Mongo database and all collection connections + Connects the underlying Mongo database and all collection connections. Args: force_reset: whether to reset the connection or not when the Store is @@ -90,13 +90,13 @@ def connect(self, force_reset: bool = False): def close(self): """ - Closes underlying database connections + Closes underlying database connections. """ self._collection.database.client.close() @property def _collection(self): - """Property referring to the root pymongo collection""" + """Property referring to the root pymongo collection.""" if self._coll is None: raise StoreError("Must connect Mongo-like store before attempting to use it") return self._coll @@ -104,7 +104,7 @@ def _collection(self): @property def nonmain_names(self) -> List: """ - alll non-main collection names + alll non-main collection names. """ return list(set(self.collection_names) - {self.main}) @@ -112,7 +112,7 @@ def nonmain_names(self) -> List: def last_updated(self) -> datetime: """ Special last_updated for this JointStore - that checks all underlying collections + that checks all underlying collections. """ lus = [] for cname in self.collection_names: @@ -126,13 +126,13 @@ def last_updated(self) -> datetime: def update(self, docs, update_lu=True, key=None, **kwargs): """ Update documents into the underlying collections - Not Implemented for JointStore + Not Implemented for JointStore. """ raise NotImplementedError("JointStore is a read-only store") def _get_store_by_name(self, name) -> MongoStore: """ - Gets an underlying collection as a mongoStore + Gets an underlying collection as a mongoStore. """ if name not in self.collection_names: raise ValueError("Asking for collection not referenced in this Store") @@ -140,13 +140,13 @@ def _get_store_by_name(self, name) -> MongoStore: def ensure_index(self, key, unique=False, **kwargs): """ - Can't ensure index for JointStore + Can't ensure index for JointStore. """ raise NotImplementedError("No ensure_index method for JointStore") def _get_pipeline(self, criteria=None, properties=None, skip=0, limit=0): """ - Gets the aggregation pipeline for query and query_one + Gets the aggregation pipeline for query and query_one. Args: properties: properties to be returned @@ -218,7 +218,7 @@ def _get_pipeline(self, criteria=None, properties=None, skip=0, limit=0): def count(self, criteria: Optional[Dict] = None) -> int: """ - Counts the number of documents matching the query criteria + Counts the number of documents matching the query criteria. Args: criteria: PyMongo filter for documents to count in @@ -264,7 +264,7 @@ def groupby( def query_one(self, criteria=None, properties=None, **kwargs): """ - Get one document + Get one document. Args: properties: properties to return in query @@ -285,7 +285,7 @@ def query_one(self, criteria=None, properties=None, **kwargs): def remove_docs(self, criteria: Dict): """ - Remove docs matching the query dictionary + Remove docs matching the query dictionary. Args: criteria: query dictionary to match @@ -296,7 +296,7 @@ def __eq__(self, other: object) -> bool: """ Check equality for JointStore Args: - other: other JointStore to compare with + other: other JointStore to compare with. """ if not isinstance(other, JointStore): return False @@ -313,12 +313,12 @@ def __eq__(self, other: object) -> bool: class ConcatStore(Store): - """Store concatting multiple stores""" + """Store concatting multiple stores.""" def __init__(self, stores: List[Store], **kwargs): """ Initialize a ConcatStore that concatenates multiple stores together - to appear as one store + to appear as one store. Args: stores: list of stores to concatenate together @@ -330,14 +330,14 @@ def __init__(self, stores: List[Store], **kwargs): @property def name(self) -> str: """ - A string representing this data source + A string representing this data source. """ compound_name = ",".join([store.name for store in self.stores]) return f"Concat[{compound_name}]" def connect(self, force_reset: bool = False): """ - Connect all stores in this ConcatStore + Connect all stores in this ConcatStore. Args: force_reset: Whether to forcibly reset the connection for all stores @@ -347,7 +347,7 @@ def connect(self, force_reset: bool = False): def close(self): """ - Close all connections in this ConcatStore + Close all connections in this ConcatStore. """ for store in self.stores: store.close() @@ -362,7 +362,7 @@ def last_updated(self) -> datetime: Finds the most recent last_updated across all the stores. This might not be the most useful way to do this for this type of Store since it could very easily over-estimate the last_updated based on what stores - are used + are used. """ lus = [] for store in self.stores: @@ -373,7 +373,7 @@ def last_updated(self) -> datetime: def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = None): """ Update documents into the Store - Not implemented in ConcatStore + Not implemented in ConcatStore. Args: docs: the document or list of documents to update @@ -386,7 +386,7 @@ def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = No def distinct(self, field: str, criteria: Optional[Dict] = None, all_exist: bool = False) -> List: """ - Get all distinct values for a field + Get all distinct values for a field. Args: field: the field(s) to get distinct values for @@ -400,7 +400,7 @@ def distinct(self, field: str, criteria: Optional[Dict] = None, all_exist: bool def ensure_index(self, key: str, unique: bool = False) -> bool: """ - Ensure an index is properly set. Returns whether all stores support this index or not + Ensure an index is properly set. Returns whether all stores support this index or not. Args: key: single key to index @@ -413,7 +413,7 @@ def ensure_index(self, key: str, unique: bool = False) -> bool: def count(self, criteria: Optional[Dict] = None) -> int: """ - Counts the number of documents matching the query criteria + Counts the number of documents matching the query criteria. Args: criteria: PyMongo filter for documents to count in @@ -431,7 +431,7 @@ def query( limit: int = 0, ) -> Iterator[Dict]: """ - Queries across all Store for a set of documents + Queries across all Store for a set of documents. Args: criteria: PyMongo filter for documents to search in @@ -490,7 +490,7 @@ def groupby( docs.extend(group) def key_set(d: Dict) -> Tuple: - "index function based on passed in keys" + "index function based on passed in keys." return tuple(d.get(k, None) for k in keys) sorted_docs = sorted(docs, key=key_set) @@ -500,7 +500,7 @@ def key_set(d: Dict) -> Tuple: def remove_docs(self, criteria: Dict): """ - Remove docs matching the query dictionary + Remove docs matching the query dictionary. Args: criteria: query dictionary to match @@ -509,7 +509,7 @@ def remove_docs(self, criteria: Dict): def __eq__(self, other: object) -> bool: """ - Check equality for ConcatStore + Check equality for ConcatStore. Args: other: other JointStore to compare with diff --git a/src/maggma/stores/file_store.py b/src/maggma/stores/file_store.py index 2eaf2e1de..9a8595c4b 100644 --- a/src/maggma/stores/file_store.py +++ b/src/maggma/stores/file_store.py @@ -119,7 +119,7 @@ def __init__( @property def name(self) -> str: """ - Return a string representing this data source + Return a string representing this data source. """ return f"file://{self.path}" @@ -265,7 +265,7 @@ def _create_record_from_file(self, f: Path) -> Dict: def connect(self, force_reset: bool = False): """ - Connect to the source data + Connect to the source data. Read all the files in the directory, create corresponding File items in the internal MemoryStore. @@ -358,7 +358,7 @@ def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = No def _filter_data(self, d): """ - Remove any protected keys from a dictionary + Remove any protected keys from a dictionary. Args: d: Dictionary whose keys are to be filtered @@ -376,7 +376,7 @@ def query( # type: ignore contents_size_limit: Optional[int] = 0, ) -> Iterator[Dict]: """ - Queries the Store for a set of documents + Queries the Store for a set of documents. Args: criteria: PyMongo filter for documents to search in @@ -461,7 +461,7 @@ def query_one( contents_size_limit: Optional[int] = None, ): """ - Queries the Store for a single document + Queries the Store for a single document. Args: criteria: PyMongo filter for documents to search diff --git a/src/maggma/stores/gridfs.py b/src/maggma/stores/gridfs.py index 23fe10a81..99b98d313 100644 --- a/src/maggma/stores/gridfs.py +++ b/src/maggma/stores/gridfs.py @@ -1,7 +1,7 @@ """ Module containing various definitions of Stores. Stores are a default access pattern to data and provide -various utilities +various utilities. """ import copy @@ -39,7 +39,7 @@ class GridFSStore(Store): """ - A Store for GridFS backend. Provides a common access method consistent with other stores + A Store for GridFS backend. Provides a common access method consistent with other stores. """ def __init__( @@ -100,7 +100,7 @@ def __init__( @classmethod def from_launchpad_file(cls, lp_file, collection_name, **kwargs): """ - Convenience method to construct a GridFSStore from a launchpad file + Convenience method to construct a GridFSStore from a launchpad file. Note: A launchpad file is a special formatted yaml file used in fireworks @@ -121,13 +121,13 @@ def from_launchpad_file(cls, lp_file, collection_name, **kwargs): @property def name(self) -> str: """ - Return a string representing this data source + Return a string representing this data source. """ return f"gridfs://{self.host}/{self.database}/{self.collection_name}" def connect(self, force_reset: bool = False): """ - Connect to the source data + Connect to the source data. Args: force_reset: whether to reset the connection or not when the Store is @@ -163,7 +163,7 @@ def connect(self, force_reset: bool = False): @property def _collection(self): - """Property referring to underlying pymongo collection""" + """Property referring to underlying pymongo collection.""" if self._coll is None: raise StoreError("Must connect Mongo-like store before attempting to use it") return self._coll @@ -172,7 +172,7 @@ def _collection(self): def last_updated(self) -> datetime: """ Provides the most recent last_updated date time stamp from - the documents in this Store + the documents in this Store. """ return self._files_store.last_updated @@ -195,7 +195,7 @@ def transform_criteria(cls, criteria: Dict) -> Dict: def count(self, criteria: Optional[Dict] = None) -> int: """ - Counts the number of documents matching the query criteria + Counts the number of documents matching the query criteria. Args: criteria: PyMongo filter for documents to count in @@ -274,7 +274,7 @@ def query( def distinct(self, field: str, criteria: Optional[Dict] = None, all_exist: bool = False) -> List: """ Get all distinct values for a field. This function only operates - on the metadata in the files collection + on the metadata in the files collection. Args: field: the field(s) to get distinct values for @@ -300,7 +300,7 @@ def groupby( """ Simple grouping function that will group documents by keys. Will only work if the keys are included in the files - collection for GridFS + collection for GridFS. Args: keys: fields to group documents @@ -333,7 +333,7 @@ def ensure_index(self, key: str, unique: Optional[bool] = False) -> bool: Currently operators on the GridFS files collection Args: key: single key to index - unique: Whether or not this index contains only unique keys + unique: Whether or not this index contains only unique keys. Returns: bool indicating if the index exists/was created @@ -351,7 +351,7 @@ def update( additional_metadata: Union[str, List[str], None] = None, ): """ - Update documents into the Store + Update documents into the Store. Args: docs: the document or list of documents to update @@ -402,7 +402,7 @@ def update( def remove_docs(self, criteria: Dict): """ - Remove docs matching the query dictionary + Remove docs matching the query dictionary. Args: criteria: query dictionary to match @@ -423,7 +423,7 @@ def close(self): def __eq__(self, other: object) -> bool: """ Check equality for GridFSStore - other: other GridFSStore to compare with + other: other GridFSStore to compare with. """ if not isinstance(other, GridFSStore): return False @@ -459,7 +459,7 @@ def __init__( collection_name: The collection name compression: compress the data as it goes into GridFS ensure_metadata: ensure returned documents have the metadata fields - searchable_fields: fields to keep in the index store + searchable_fields: fields to keep in the index store. """ self.uri = uri @@ -487,7 +487,7 @@ def __init__( def connect(self, force_reset: bool = False): """ - Connect to the source data + Connect to the source data. Args: force_reset: whether to reset the connection or not when the Store is diff --git a/src/maggma/stores/mongolike.py b/src/maggma/stores/mongolike.py index c03dc8866..b910aaad2 100644 --- a/src/maggma/stores/mongolike.py +++ b/src/maggma/stores/mongolike.py @@ -1,7 +1,7 @@ """ Module containing various definitions of Stores. Stores are a default access pattern to data and provide -various utilities +various utilities. """ import warnings @@ -41,7 +41,7 @@ class MongoStore(Store): """ - A Store that connects to a Mongo collection + A Store that connects to a Mongo collection. """ def __init__( @@ -94,13 +94,13 @@ def __init__( @property def name(self) -> str: """ - Return a string representing this data source + Return a string representing this data source. """ return f"mongo://{self.host}/{self.database}/{self.collection_name}" def connect(self, force_reset: bool = False): """ - Connect to the source data + Connect to the source data. Args: force_reset: whether to reset the connection or not when the Store is @@ -130,14 +130,14 @@ def connect(self, force_reset: bool = False): self._coll = db[self.collection_name] # type: ignore def __hash__(self) -> int: - """Hash for MongoStore""" + """Hash for MongoStore.""" return hash((self.database, self.collection_name, self.last_updated_field)) @classmethod def from_db_file(cls, filename: str, **kwargs): """ Convenience method to construct MongoStore from db_file - from old QueryEngine format + from old QueryEngine format. """ kwargs = loadfn(filename) if "collection" in kwargs: @@ -149,7 +149,7 @@ def from_db_file(cls, filename: str, **kwargs): @classmethod def from_launchpad_file(cls, lp_file, collection_name, **kwargs): """ - Convenience method to construct MongoStore from a launchpad file + Convenience method to construct MongoStore from a launchpad file. Note: A launchpad file is a special formatted yaml file used in fireworks @@ -169,7 +169,7 @@ def from_launchpad_file(cls, lp_file, collection_name, **kwargs): def distinct(self, field: str, criteria: Optional[Dict] = None, all_exist: bool = False) -> List: """ - Get all distinct values for a field + Get all distinct values for a field. Args: field: the field(s) to get distinct values for @@ -243,7 +243,7 @@ def from_collection(cls, collection): """ Generates a MongoStore from a pymongo collection object This is not a fully safe operation as it gives dummy information to the MongoStore - As a result, this will not serialize and can not reset its connection + As a result, this will not serialize and can not reset its connection. Args: collection: the PyMongo collection to create a MongoStore around @@ -258,7 +258,7 @@ def from_collection(cls, collection): @property def _collection(self): - """Property referring to underlying pymongo collection""" + """Property referring to underlying pymongo collection.""" if self._coll is None: raise StoreError("Must connect Mongo-like store before attempting to use it") return self._coll @@ -269,7 +269,7 @@ def count( hint: Optional[Dict[str, Union[Sort, int]]] = None, ) -> int: """ - Counts the number of documents matching the query criteria + Counts the number of documents matching the query criteria. Args: criteria: PyMongo filter for documents to count in @@ -303,7 +303,7 @@ def query( # type: ignore **kwargs, ) -> Iterator[Dict]: """ - Queries the Store for a set of documents + Queries the Store for a set of documents. Args: criteria: PyMongo filter for documents to search in @@ -351,7 +351,7 @@ def ensure_index(self, key: str, unique: Optional[bool] = False) -> bool: Tries to create an index and return true if it succeeded Args: key: single key to index - unique: Whether or not this index contains only unique keys + unique: Whether or not this index contains only unique keys. Returns: bool indicating if the index exists/was created @@ -368,7 +368,7 @@ def ensure_index(self, key: str, unique: Optional[bool] = False) -> bool: def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = None): """ - Update documents into the Store + Update documents into the Store. Args: docs: the document or list of documents to update @@ -416,7 +416,7 @@ def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = No def remove_docs(self, criteria: Dict): """ - Remove docs matching the query dictionary + Remove docs matching the query dictionary. Args: criteria: query dictionary to match @@ -424,7 +424,7 @@ def remove_docs(self, criteria: Dict): self._collection.delete_many(filter=criteria) def close(self): - """Close up all collections""" + """Close up all collections.""" self._collection.database.client.close() self._coll = None if self.ssh_tunnel is not None: @@ -433,7 +433,7 @@ def close(self): def __eq__(self, other: object) -> bool: """ Check equality for MongoStore - other: other mongostore to compare with + other: other mongostore to compare with. """ if not isinstance(other, MongoStore): return False @@ -446,7 +446,7 @@ class MongoURIStore(MongoStore): """ A Store that connects to a Mongo collection via a URI This is expected to be a special mongodb+srv:// URIs that include - client parameters via TXT records + client parameters via TXT records. """ def __init__( @@ -489,14 +489,14 @@ def __init__( @property def name(self) -> str: """ - Return a string representing this data source + Return a string representing this data source. """ # TODO: This is not very safe since it exposes the username/password info return self.uri def connect(self, force_reset: bool = False): """ - Connect to the source data + Connect to the source data. Args: force_reset: whether to reset the connection or not when the Store is @@ -511,14 +511,14 @@ def connect(self, force_reset: bool = False): class MemoryStore(MongoStore): """ An in-memory Store that functions similarly - to a MongoStore + to a MongoStore. """ def __init__(self, collection_name: str = "memory_db", **kwargs): """ Initializes the Memory Store Args: - collection_name: name for the collection in memory + collection_name: name for the collection in memory. """ self.collection_name = collection_name self.default_sort = None @@ -528,7 +528,7 @@ def __init__(self, collection_name: str = "memory_db", **kwargs): def connect(self, force_reset: bool = False): """ - Connect to the source data + Connect to the source data. Args: force_reset: whether to reset the connection or not when the Store is @@ -538,16 +538,16 @@ def connect(self, force_reset: bool = False): self._coll = mongomock.MongoClient().db[self.name] # type: ignore def close(self): - """Close up all collections""" + """Close up all collections.""" self._coll.database.client.close() @property def name(self): - """Name for the store""" + """Name for the store.""" return f"mem://{self.collection_name}" def __hash__(self): - """Hash for the store""" + """Hash for the store.""" return hash((self.name, self.last_updated_field)) def groupby( @@ -598,7 +598,7 @@ def grouping_keys(doc): def __eq__(self, other: object) -> bool: """ Check equality for MemoryStore - other: other MemoryStore to compare with + other: other MemoryStore to compare with. """ if not isinstance(other, MemoryStore): return False @@ -609,7 +609,7 @@ def __eq__(self, other: object) -> bool: class JSONStore(MemoryStore): """ - A Store for access to a single or multiple JSON files + A Store for access to a single or multiple JSON files. """ def __init__( @@ -671,7 +671,7 @@ def __init__( def connect(self, force_reset: bool = False): """ - Loads the files into the collection in memory + Loads the files into the collection in memory. Args: force_reset: whether to reset the connection or not. If False (default) and .connect() @@ -776,7 +776,7 @@ def __hash__(self): def __eq__(self, other: object) -> bool: """ - Check equality for JSONStore + Check equality for JSONStore. Args: other: other JSONStore to compare with @@ -885,7 +885,7 @@ def count( hint: Optional[Dict[str, Union[Sort, int]]] = None, ) -> int: """ - Counts the number of documents matching the query criteria + Counts the number of documents matching the query criteria. Args: criteria: PyMongo filter for documents to count in diff --git a/src/maggma/stores/open_data.py b/src/maggma/stores/open_data.py index 2353c27e6..8fc02548a 100644 --- a/src/maggma/stores/open_data.py +++ b/src/maggma/stores/open_data.py @@ -45,7 +45,7 @@ def __init__( """ Args: key: main key to index on - last_updated_field: field for date/time stamping the data + last_updated_field: field for date/time stamping the data. """ self._data = None self.key = key @@ -82,7 +82,7 @@ def query( criteria_fields: Union[List, None] = None, ) -> pd.DataFrame: """ - Queries the Store for a set of documents + Queries the Store for a set of documents. Args: criteria: if there's a `query` key, it's value will be used as the @@ -160,7 +160,7 @@ def _query( def count(self, criteria: Optional[Dict] = None, criteria_fields: Union[List, None] = None) -> int: """ - Counts the number of documents matching the query criteria + Counts the number of documents matching the query criteria. Returns: int: the number of documents matching the query criteria @@ -175,7 +175,7 @@ def distinct( self, field: str, criteria: Optional[Dict] = None, criteria_fields: Union[List, None] = None ) -> pd.Series: """ - Get all distinct values for a field + Get all distinct values for a field. Args: field: the field(s) to get distinct values for @@ -193,7 +193,7 @@ def distinct( def last_updated(self) -> datetime: """ Provides the most recent last_updated date time stamp from - the documents in this Store + the documents in this Store. """ if self._data is None: return datetime.min @@ -280,7 +280,7 @@ def get_merged_items(self, to_dt: pd.DataFrame, from_dt: pd.DataFrame) -> pd.Dat def update(self, docs: pd.DataFrame) -> pd.DataFrame: """ - Update documents into the Store + Update documents into the Store. Args: docs: the document or list of documents to update @@ -300,13 +300,13 @@ def _field_exists(self, key: str) -> bool: return key in self._data def __hash__(self): - """Hash for the store""" + """Hash for the store.""" return hash((self.key, self.last_updated_field)) def __eq__(self, other: object) -> bool: """ Check equality for PandasMemoryStore - other: other PandasMemoryStore to compare with + other: other PandasMemoryStore to compare with. """ if not isinstance(other, PandasMemoryStore): return False @@ -332,7 +332,7 @@ def __init__( manifest_key: str = "manifest.jsonl", **kwargs, ): - """Initializes an S3IndexStore + """Initializes an S3IndexStore. Args: collection_name (str): name of the collection @@ -467,7 +467,7 @@ def __init__( object_grouping: Optional[List[str]] = None, **kwargs, ): - """Initializes an OpenDataStore + """Initializes an OpenDataStore. Args: index (S3IndexStore): The store that'll be used as the index, @@ -547,7 +547,7 @@ def query( criteria_fields: Union[List, None] = None, ) -> pd.DataFrame: """ - Queries the Store for a set of documents + Queries the Store for a set of documents. Args: criteria: if there's a `query` key, it's value will be used as the diff --git a/src/maggma/stores/shared_stores.py b/src/maggma/stores/shared_stores.py index 683a4c374..35a2b461f 100644 --- a/src/maggma/stores/shared_stores.py +++ b/src/maggma/stores/shared_stores.py @@ -60,20 +60,20 @@ def __setattr__(self, name: str, value: Any): @property def _collection(self): """ - Returns a handle to the pymongo collection object + Returns a handle to the pymongo collection object. """ return self.multistore.store_collection(self.store) @property def name(self) -> str: """ - Return a string representing this data source + Return a string representing this data source. """ return self.multistore.store_name(self.store) def connect(self, force_reset: bool = False): """ - Connect to the source data + Connect to the source data. Args: force_reset: whether to reset the connection or not when the Store is @@ -83,13 +83,13 @@ def connect(self, force_reset: bool = False): def close(self): """ - Closes any connections + Closes any connections. """ self.multistore.close(self.store) def count(self, criteria: Optional[Dict] = None) -> int: """ - Counts the number of documents matching the query criteria + Counts the number of documents matching the query criteria. Args: criteria: PyMongo filter for documents to count in @@ -105,7 +105,7 @@ def query( limit: int = 0, ) -> Iterator[Dict]: """ - Queries the Store for a set of documents + Queries the Store for a set of documents. Args: criteria: PyMongo filter for documents to search in @@ -126,7 +126,7 @@ def query( def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = None, **kwargs): """ - Update documents into the Store + Update documents into the Store. Args: docs: the document or list of documents to update @@ -139,7 +139,7 @@ def update(self, docs: Union[List[Dict], Dict], key: Union[List, str, None] = No def ensure_index(self, key: str, unique: bool = False, **kwargs) -> bool: """ - Tries to create an index and return true if it succeeded + Tries to create an index and return true if it succeeded. Args: key: single key to index @@ -182,7 +182,7 @@ def groupby( def remove_docs(self, criteria: Dict, **kwargs): """ - Remove docs matching the query dictionary + Remove docs matching the query dictionary. Args: criteria: query dictionary to match @@ -197,7 +197,7 @@ def query_one( **kwargs, ): """ - Queries the Store for a single document + Queries the Store for a single document. Args: criteria: PyMongo filter for documents to search @@ -209,7 +209,7 @@ def query_one( def distinct(self, field: str, criteria: Optional[Dict] = None, all_exist: bool = False, **kwargs) -> List: """ - Get all distinct values for a field + Get all distinct values for a field. Args: field: the field(s) to get distinct values for @@ -257,7 +257,7 @@ class MultiStore: def __init__(self, **kwargs): """ - Initializes a MultiStore + Initializes a MultiStore. """ # Keep a list of stores, since there is no way to hash a store (to use a dict) self._stores = [] @@ -267,7 +267,7 @@ def __init__(self, **kwargs): def get_store_index(self, store: Store) -> Optional[int]: """ Gets the index of the store in the list of stores. - If it doesn't exist, returns None + If it doesn't exist, returns None. Note: this is not a search for an instance of a store, but rather a search for a equivalent store @@ -285,7 +285,7 @@ def get_store_index(self, store: Store) -> Optional[int]: def add_store(self, store: Store): """ - Adds a store to the list of cached stores + Adds a store to the list of cached stores. Args: store: The store to cache @@ -331,7 +331,7 @@ def ensure_store(self, store: Store) -> bool: def count_stores(self) -> int: """ - Returns the number of stores in the multistore + Returns the number of stores in the multistore. Returns: int indicating the number of stores @@ -349,7 +349,7 @@ def store_name(self, store) -> str: def connect(self, store, force_reset: bool = False): """ - For a given store, connect to the source data + For a given store, connect to the source data. Args: store: the store to connect to the source data @@ -362,7 +362,7 @@ def connect(self, store, force_reset: bool = False): def close(self, store: Store): """ - For a given store, close any connections + For a given store, close any connections. Args: store: the store to close connections to @@ -373,7 +373,7 @@ def close(self, store: Store): def connect_all(self, force_reset: bool = False): """ - Connects to all stores + Connects to all stores. Args: force_reset: whether to reset the connection or not when the Store is @@ -385,7 +385,7 @@ def connect_all(self, force_reset: bool = False): def close_all(self): """ - Closes all connections + Closes all connections. """ with self._multistore_lock: for store in self._stores: @@ -393,7 +393,7 @@ def close_all(self): def count(self, store: Store, criteria: Optional[Dict] = None, **kwargs) -> int: """ - Counts the number of documents matching the query criteria + Counts the number of documents matching the query criteria. Args: criteria: PyMongo filter for documents to count in @@ -412,7 +412,7 @@ def query( **kwargs, ) -> List[Dict]: """ - Queries the Store for a set of documents + Queries the Store for a set of documents. Args: criteria: PyMongo filter for documents to search in @@ -432,7 +432,7 @@ def query( def update(self, store: Store, docs: Union[List[Dict], Dict], key: Union[List, str, None] = None, **kwargs): """ - Update documents into the Store + Update documents into the Store. Args: docs: the document or list of documents to update @@ -446,7 +446,7 @@ def update(self, store: Store, docs: Union[List[Dict], Dict], key: Union[List, s def ensure_index(self, store: Store, key: str, unique: bool = False, **kwargs) -> bool: """ - Tries to create an index and return true if it succeeded + Tries to create an index and return true if it succeeded. Args: key: single key to index @@ -492,7 +492,7 @@ def groupby( def remove_docs(self, store: Store, criteria: Dict, **kwargs): """ - Remove docs matching the query dictionary + Remove docs matching the query dictionary. Args: criteria: query dictionary to match @@ -509,7 +509,7 @@ def query_one( **kwargs, ): """ - Queries the Store for a single document + Queries the Store for a single document. Args: criteria: PyMongo filter for documents to search @@ -527,7 +527,7 @@ def distinct( self, store: Store, field: str, criteria: Optional[Dict] = None, all_exist: bool = False, **kwargs ) -> List: """ - Get all distinct values for a field + Get all distinct values for a field. Args: field: the field(s) to get distinct values for @@ -538,7 +538,7 @@ def distinct( def set_store_attribute(self, store: Store, name: str, value: Any): """ - A method to set an attribute of a store + A method to set an attribute of a store. Args: name: The name of a function or attribute to access @@ -550,7 +550,7 @@ def set_store_attribute(self, store: Store, name: str, value: Any): def call_attr(self, name: str, store: Store, **kwargs): """ - This class will actually call an attribute/method on the class instance + This class will actually call an attribute/method on the class instance. Args: name: The name of a function or attribute to access @@ -596,7 +596,7 @@ class MultiStoreManager(BaseManager): def setup(cls, multistore): """ Args: - multistore: A multistore to share between processes + multistore: A multistore to share between processes. Returns: A manager diff --git a/src/maggma/stores/ssh_tunnel.py b/src/maggma/stores/ssh_tunnel.py index 0e496803b..1732d9843 100644 --- a/src/maggma/stores/ssh_tunnel.py +++ b/src/maggma/stores/ssh_tunnel.py @@ -30,7 +30,7 @@ def __init__( password: optional password for the ssh tunnel server; If a private_key is supplied this password is assumed to be the private key password private_key: ssh private key to authenticate to the tunnel server - kwargs: any extra args passed to the SSHTunnelForwarder + kwargs: any extra args passed to the SSHTunnelForwarder. """ self.tunnel_server_address = tunnel_server_address diff --git a/src/maggma/utils.py b/src/maggma/utils.py index 106356dad..49786b3c0 100644 --- a/src/maggma/utils.py +++ b/src/maggma/utils.py @@ -1,5 +1,5 @@ """ -Utilities to help with maggma functions +Utilities to help with maggma functions. """ import itertools import logging @@ -24,7 +24,7 @@ def primed(iterable: Iterable) -> Iterable: """Preprimes an iterator so the first value is calculated immediately - but not returned until the first iteration + but not returned until the first iteration. """ itr = iter(iterable) try: @@ -36,18 +36,18 @@ def primed(iterable: Iterable) -> Iterable: class TqdmLoggingHandler(logging.Handler): """ - Helper to enable routing tqdm progress around logging + Helper to enable routing tqdm progress around logging. """ def __init__(self, level=logging.NOTSET): """ - Initialize the Tqdm handler + Initialize the Tqdm handler. """ super().__init__(level) def emit(self, record): """ - Emit a record via Tqdm screen + Emit a record via Tqdm screen. """ try: msg = self.format(record) @@ -60,7 +60,7 @@ def emit(self, record): def confirm_field_index(collection: Collection, field: str) -> bool: - """Confirm index on store for at least one of fields + """Confirm index on store for at least one of fields. One can't simply ensure an index exists via `store.collection.create_index` because a Builder must assume @@ -105,7 +105,7 @@ def to_dt(s: Union[datetime, str]) -> datetime: def recursive_update(d: Dict, u: Dict): """ - Recursive updates d with values from u + Recursive updates d with values from u. Args: d (dict): dict to update @@ -126,7 +126,7 @@ def grouper(iterable: Iterable, n: int) -> Iterable: """ Collect data into fixed-length chunks or blocks. >>> list(grouper(3, 'ABCDEFG')) - [['A', 'B', 'C'], ['D', 'E', 'F'], ['G']] + [['A', 'B', 'C'], ['D', 'E', 'F'], ['G']]. Updated from: https://stackoverflow.com/questions/31164731/python-chunking-csv-file-multiproccessing/31170795#31170795 @@ -137,7 +137,7 @@ def grouper(iterable: Iterable, n: int) -> Iterable: def lazy_substitute(d: Dict, aliases: Dict): """ - Simple top level substitute that doesn't dive into mongo like strings + Simple top level substitute that doesn't dive into mongo like strings. """ for alias, key in aliases.items(): if key in d: @@ -148,7 +148,7 @@ def lazy_substitute(d: Dict, aliases: Dict): def substitute(d: Dict, aliases: Dict): """ Substitutes keys in dictionary - Accepts multilevel mongo like keys + Accepts multilevel mongo like keys. """ for alias, key in aliases.items(): if has(d, key): @@ -158,7 +158,7 @@ def substitute(d: Dict, aliases: Dict): def unset(d: Dict, key: str): """ - Unsets a key + Unsets a key. """ _unset(d, key) path = to_path(key) @@ -170,7 +170,7 @@ def unset(d: Dict, key: str): class Timeout: """ Context manager that provides context. - implementation courtesy of https://stackoverflow.com/a/22348885/637562 + implementation courtesy of https://stackoverflow.com/a/22348885/637562. """ def __init__(self, seconds=14, error_message=""): @@ -187,13 +187,13 @@ def __init__(self, seconds=14, error_message=""): def handle_timeout(self, signum, frame): """ - Raises an error on timeout + Raises an error on timeout. """ raise TimeoutError(self.error_message) def __enter__(self): """ - Enter context with timeout + Enter context with timeout. """ if self.seconds: signal.signal(signal.SIGALRM, self.handle_timeout) @@ -201,7 +201,7 @@ def __enter__(self): def __exit__(self, type, value, traceback): """ - Exit context with timeout + Exit context with timeout. """ if self.seconds: signal.alarm(0) @@ -209,7 +209,7 @@ def __exit__(self, type, value, traceback): def dynamic_import(abs_module_path: str, class_name: Optional[str] = None): """ - Dynamic class importer from: https://www.bnmetrics.com/blog/dynamic-import-in-python3 + Dynamic class importer from: https://www.bnmetrics.com/blog/dynamic-import-in-python3. """ if class_name is None: @@ -223,12 +223,12 @@ def dynamic_import(abs_module_path: str, class_name: Optional[str] = None): class ReportingHandler(logging.Handler): """ Helper to route reporting messages - This uses the NOTSET level to send reporting messages + This uses the NOTSET level to send reporting messages. """ def __init__(self, reporting_store): """ - Initialize the Reporting Logger + Initialize the Reporting Logger. """ super().__init__(logging.NOTSET) self.reporting_store = reporting_store @@ -239,7 +239,7 @@ def __init__(self, reporting_store): def emit(self, record): """ - Emit a record via Tqdm screen + Emit a record via Tqdm screen. """ if "maggma" in record.__dict__: maggma_record = record.maggma diff --git a/src/maggma/validators.py b/src/maggma/validators.py index a7522ea4f..09affcf44 100644 --- a/src/maggma/validators.py +++ b/src/maggma/validators.py @@ -31,7 +31,7 @@ def __init__(self, schema: Dict, strict: bool = False): is found and raise a ValueError, if False will continue build but log an error message. In both cases, invalid documents will not be stored. - schema: A Python dict representation of a JSON + schema: A Python dict representation of a JSON. """ self._schema = schema self._strict = strict @@ -78,7 +78,7 @@ def is_valid(self, doc: Dict) -> bool: def validation_errors(self, doc: Dict) -> List[str]: """ If document is not valid, provides a list of - strings to display for why validation has failed + strings to display for why validation has failed. Returns empty list if the document is valid From 60d2622cef44e14ab160b4b81914b40e51dc2f84 Mon Sep 17 00:00:00 2001 From: Matthew Evans Date: Sun, 18 Feb 2024 12:16:47 +0000 Subject: [PATCH 3/3] Manually fix some more cases that ruff could not detect --- src/maggma/stores/azure.py | 1 + src/maggma/stores/compound_stores.py | 1 + src/maggma/stores/file_store.py | 55 ++++++++++++++-------------- src/maggma/stores/gridfs.py | 3 +- src/maggma/stores/mongolike.py | 6 ++- src/maggma/stores/shared_stores.py | 1 + tests/api/test_api.py | 1 + tests/api/test_read_resource.py | 3 +- 8 files changed, 40 insertions(+), 31 deletions(-) diff --git a/src/maggma/stores/azure.py b/src/maggma/stores/azure.py index ca7af1154..8136b6daa 100644 --- a/src/maggma/stores/azure.py +++ b/src/maggma/stores/azure.py @@ -523,6 +523,7 @@ def rebuild_metadata_from_index(self, index_query: Optional[Dict] = None): """ Read data from the index store and populate the metadata of the Azure Blob. Force all of the keys to be lower case to be Minio compatible + Args: index_query: query on the index store. """ diff --git a/src/maggma/stores/compound_stores.py b/src/maggma/stores/compound_stores.py index 5c8a51dd0..5a27ca467 100644 --- a/src/maggma/stores/compound_stores.py +++ b/src/maggma/stores/compound_stores.py @@ -295,6 +295,7 @@ def remove_docs(self, criteria: Dict): def __eq__(self, other: object) -> bool: """ Check equality for JointStore + Args: other: other JointStore to compare with. """ diff --git a/src/maggma/stores/file_store.py b/src/maggma/stores/file_store.py index 9a8595c4b..9057ba335 100644 --- a/src/maggma/stores/file_store.py +++ b/src/maggma/stores/file_store.py @@ -57,7 +57,8 @@ def __init__( **kwargs, ): """ - Initializes a FileStore + Initializes a FileStore. + Args: path: parent directory containing all files and subdirectories to process file_filters: List of fnmatch patterns defining the files to be tracked by @@ -177,20 +178,20 @@ def read(self) -> List[Dict]: Iterate through all files in the Store folder and populate the Store with dictionaries containing basic information about each file. - The keys of the documents added to the Store are - - name: str = File name - path: Path = Absolute path of this file - parent: str = Name of the parent directory (if any) - file_id: str = Unique identifier for this file, computed from the hash - of its path relative to the base FileStore directory and - the file creation time. The key of this field is 'file_id' - by default but can be changed via the 'key' kwarg to - FileStore.__init__(). - size: int = Size of this file in bytes - last_updated: datetime = Time this file was last modified - hash: str = Hash of the file contents - orphan: bool = Whether this record is an orphan + The keys of the documents added to the Store are: + + - name: str = File name + - path: Path = Absolute path of this file + - parent: str = Name of the parent directory (if any) + - file_id: str = Unique identifier for this file, computed from the hash + of its path relative to the base FileStore directory and + the file creation time. The key of this field is 'file_id' + by default but can be changed via the 'key' kwarg to + `FileStore.__init__()`. + - size: int = Size of this file in bytes + - last_updated: datetime = Time this file was last modified + - hash: str = Hash of the file contents + - orphan: bool = Whether this record is an orphan """ file_list = [] # generate a list of files in subdirectories @@ -214,18 +215,18 @@ def _create_record_from_file(self, f: Path) -> Dict: basic information about that file. The keys in the returned dict are: - name: str = File name - path: Path = Absolute path of this file - parent: str = Name of the parent directory (if any) - file_id: str = Unique identifier for this file, computed from the hash - of its path relative to the base FileStore directory and - the file creation time. The key of this field is 'file_id' - by default but can be changed via the 'key' kwarg to - FileStore.__init__(). - size: int = Size of this file in bytes - last_updated: datetime = Time this file was last modified - hash: str = Hash of the file contents - orphan: bool = Whether this record is an orphan + - name: str = File name + - path: Path = Absolute path of this file + - parent: str = Name of the parent directory (if any) + - file_id: str = Unique identifier for this file, computed from the hash + of its path relative to the base FileStore directory and + the file creation time. The key of this field is 'file_id' + by default but can be changed via the 'key' kwarg to + FileStore.__init__(). + - size: int = Size of this file in bytes + - last_updated: datetime = Time this file was last modified + - hash: str = Hash of the file contents + - orphan: bool = Whether this record is an orphan """ # compute the file_id from the relative path relative_path = f.relative_to(self.path) diff --git a/src/maggma/stores/gridfs.py b/src/maggma/stores/gridfs.py index 99b98d313..8c5e38a36 100644 --- a/src/maggma/stores/gridfs.py +++ b/src/maggma/stores/gridfs.py @@ -452,7 +452,8 @@ def __init__( **kwargs, ): """ - Initializes a GridFS Store for binary data + Initializes a GridFS Store for binary data. + Args: uri: MongoDB+SRV URI database: database to connect to diff --git a/src/maggma/stores/mongolike.py b/src/maggma/stores/mongolike.py index b910aaad2..0ebb6b747 100644 --- a/src/maggma/stores/mongolike.py +++ b/src/maggma/stores/mongolike.py @@ -348,7 +348,8 @@ def query( # type: ignore def ensure_index(self, key: str, unique: Optional[bool] = False) -> bool: """ - Tries to create an index and return true if it succeeded + Tries to create an index and return true if it succeeded. + Args: key: single key to index unique: Whether or not this index contains only unique keys. @@ -516,7 +517,8 @@ class MemoryStore(MongoStore): def __init__(self, collection_name: str = "memory_db", **kwargs): """ - Initializes the Memory Store + Initializes the Memory Store. + Args: collection_name: name for the collection in memory. """ diff --git a/src/maggma/stores/shared_stores.py b/src/maggma/stores/shared_stores.py index 35a2b461f..11babc5bc 100644 --- a/src/maggma/stores/shared_stores.py +++ b/src/maggma/stores/shared_stores.py @@ -271,6 +271,7 @@ def get_store_index(self, store: Store) -> Optional[int]: Note: this is not a search for an instance of a store, but rather a search for a equivalent store + Args: store: The store to find diff --git a/tests/api/test_api.py b/tests/api/test_api.py index 25bcea49c..c92e39483 100644 --- a/tests/api/test_api.py +++ b/tests/api/test_api.py @@ -77,6 +77,7 @@ def test_msonable(owner_store, pet_store): def search_helper(payload, base: str = "/?", debug=True) -> Tuple[Response, Any]: """ Helper function to directly query search endpoints + Args: store: store f base: base of the query, default to /query? diff --git a/tests/api/test_read_resource.py b/tests/api/test_read_resource.py index 72badc62f..79ca5cf64 100644 --- a/tests/api/test_read_resource.py +++ b/tests/api/test_read_resource.py @@ -110,7 +110,8 @@ def generate_hints(query): def search_helper(payload, base: str = "/?", debug=True) -> Response: """ - Helper function to directly query search endpoints + Helper function to directly query search endpoints. + Args: store: store f base: base of the query, default to /query?