Skip to content

Commit

Permalink
[python] Add SomaSessionContext to all SOMA objects (#681)
Browse files Browse the repository at this point in the history
* `SOMATileDBContext` class added and now taken by all `TileDBObject` class constructors.
* `SOMATileDBContext` includes a `tiledb.Ctx`, so replaced `ctx` params with `SOMATileDBContext` `context` params.
* Moved the `member_uris_are_relative` attr from `PlatformConfig` to `SOMATileDBContext`
* `PlatformConfig` is no longer maintained as an instance variable on `TileDBObject`s and no longer passed into constructors.  Any future config needed by a `TileDBObject` (that is *not* create operation-related) can become an attribute of `SOMATileDBContext`.
  • Loading branch information
atolopko-czi committed Jan 19, 2023
1 parent f331c32 commit 30bcd1e
Show file tree
Hide file tree
Showing 21 changed files with 393 additions and 278 deletions.
2 changes: 0 additions & 2 deletions apis/python/src/tiledbsoma/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from .sparse_nd_array import SparseNDArray
from .tiledb_array import TileDBArray
from .tiledb_object import TileDBObject
from .tiledb_platform_config import TileDBPlatformConfig

__version__ = get_implementation_version()

Expand All @@ -24,7 +23,6 @@
"get_implementation_version",
"get_SOMA_version",
"get_storage_engine",
"TileDBPlatformConfig",
"TileDBObject",
"TileDBArray",
"Collection",
Expand Down
29 changes: 13 additions & 16 deletions apis/python/src/tiledbsoma/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
import tiledb

from .exception import DoesNotExistError, SOMAError
from .options import SOMATileDBContext
from .tiledb_object import TileDBObject
from .tiledb_platform_config import TileDBPlatformConfig
from .util import make_relative_path
from .util_tiledb import is_does_not_exist_error, is_duplicate_group_key_error

Expand Down Expand Up @@ -83,19 +83,13 @@ def __init__(
*,
# Non-top-level objects can have a parent to propagate context, depth, etc.
parent: Optional[CollectionBase[Any]] = None,
# Top-level objects should specify these:
tiledb_platform_config: Optional[TileDBPlatformConfig] = None,
ctx: Optional[tiledb.Ctx] = None,
# Top-level objects should specify this:
context: Optional[SOMATileDBContext] = None,
):
"""
Also see the ``TileDBObject`` constructor.
"""
super().__init__(
uri=uri,
parent=parent,
tiledb_platform_config=tiledb_platform_config,
ctx=ctx,
)
super().__init__(uri=uri, parent=parent, context=context)
self._cached_values = None

def create(self) -> "CollectionBase[CollectionElementType]":
Expand All @@ -109,7 +103,7 @@ def _create(self, soma_type: str) -> "CollectionBase[CollectionElementType]":
Helper for `create`. Ensures that the type name of a child class, not
its parent class, is written to object-type metadata in storage.
"""
tiledb.group_create(uri=self._uri, ctx=self._ctx)
tiledb.group_create(uri=self._uri, ctx=self.context.tiledb_ctx)
self._common_create(soma_type) # object-type metadata etc
self._cached_values = {}
return self
Expand Down Expand Up @@ -143,7 +137,10 @@ def __getitem__(self, key: str) -> CollectionElementType:

tdb: tiledb.Object = self._cached_values[key].tdb
soma = _construct_member(
tdb.uri, self, ctx=self._ctx, object_type=tdb.type
tdb.uri,
self,
context=self.context,
object_type=tdb.type,
)
if soma is None:
# if we were unable to create an object, it wasn't actually a SOMA object
Expand Down Expand Up @@ -274,8 +271,8 @@ def _load_tdb_group_cache(self) -> None:

def _determine_default_relative(self, uri: str) -> Optional[bool]:
"""Defaulting for the relative parameter."""
if self._tiledb_platform_config.member_uris_are_relative is not None:
return self._tiledb_platform_config.member_uris_are_relative
if self.context.member_uris_are_relative is not None:
return self.context.member_uris_are_relative
if uri.startswith("tiledb://"):
# TileDB-Cloud does not use relative URIs, ever.
return False
Expand Down Expand Up @@ -358,7 +355,7 @@ def _tiledb_open(self, mode: str = "r") -> tiledb.Group:
"""
assert mode in ("r", "w")
# This works in with-open-as contexts because tiledb.Group has __enter__ and __exit__ methods.
return tiledb.Group(self._uri, mode=mode, ctx=self._ctx)
return tiledb.Group(self._uri, mode=mode, ctx=self.context.tiledb_ctx)

def _show_metadata(self, recursively: bool = True, indent: str = "") -> None:
"""
Expand All @@ -378,7 +375,7 @@ def _show_metadata(self, recursively: bool = True, indent: str = "") -> None:
# However, getting it to work with a recursive data structure and finding the
# required methods, it was simpler to split the logic this way.

soma = _construct_member(obj.uri, self, ctx=self._ctx)
soma = _construct_member(obj.uri, self, context=self.context)
if soma is not None:
soma._show_metadata(recursively, indent=child_indent)
else:
Expand Down
46 changes: 28 additions & 18 deletions apis/python/src/tiledbsoma/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
# This package's pybind11 code
import tiledbsoma.libtiledbsoma as clib

from . import tiledb_platform_config as tdbpc
from . import util, util_arrow
from .collection import CollectionBase
from .constants import SOMA_JOINID
from .options import SOMATileDBContext, TileDBCreateOptions
from .query_condition import QueryCondition
from .tiledb_array import TileDBArray
from .types import NPFloating, NPInteger, PlatformConfig
Expand All @@ -38,12 +38,13 @@ def __init__(
uri: str,
*,
parent: Optional[CollectionBase[Any]] = None,
ctx: Optional[tiledb.Ctx] = None,
# Top-level objects should specify this:
context: Optional[SOMATileDBContext] = None,
):
"""
See also the ``TileDBOject`` constructor.
See also the ``TileDBObject`` constructor.
"""
super().__init__(uri=uri, parent=parent, ctx=ctx)
super().__init__(uri=uri, parent=parent, context=context)
self._index_column_names = ()
self._is_sparse = None

Expand All @@ -54,16 +55,21 @@ def create(
self,
schema: pa.Schema,
index_column_names: Sequence[str] = (SOMA_JOINID,),
platform_config: Optional[PlatformConfig] = None,
platform_config: Optional[somacore.options.PlatformConfig] = None,
) -> "DataFrame":
"""
:param schema: Arrow Schema defining the per-column schema. This schema must define all columns, including columns to be named as index columns. If the schema includes types unsupported by the SOMA implementation, an error will be raised.
:param index_column_names: A list of column names to use as user-defined index columns (e.g., ``['cell_type', 'tissue_type']``). All named columns must exist in the schema, and at least one index column name is required.
:param platform_config: Platform-specific options used to create this DataFrame, provided via "tiledb"->"create" nested keys
"""
schema = _validate_schema(schema, index_column_names)
config_wrapper = tdbpc.from_param(platform_config)
self._create_empty(schema, index_column_names, config_wrapper.create_options())
self._create_empty(
schema,
index_column_names,
TileDBCreateOptions.from_platform_config(platform_config),
)
self._index_column_names = tuple(index_column_names)

self._common_create(self.soma_type) # object-type metadata etc
Expand All @@ -73,14 +79,12 @@ def _create_empty(
self,
schema: pa.Schema,
index_column_names: Sequence[str],
create_options: tdbpc.CreateWrapper,
tiledb_create_options: TileDBCreateOptions,
) -> None:
"""
Create a TileDB 1D sparse array with dimensions and attributes
"""

level = self._tiledb_platform_config.string_dim_zstd_level

dims = []
for index_column_name in index_column_names:
pa_type = schema.field(index_column_name).type
Expand All @@ -98,7 +102,7 @@ def _create_empty(
raise TypeError(f"Unsupported dtype {dtype}")

# Default 2048 mods to 0 for 8-bit types and 0 is an invalid extent
extent = create_options.dim_tile(index_column_name)
extent = tiledb_create_options.dim_tile(index_column_name)
if isinstance(dtype, np.dtype) and dtype.itemsize == 1:
extent = 64

Expand All @@ -107,8 +111,14 @@ def _create_empty(
domain=domain,
tile=extent,
dtype=dtype,
filters=create_options.dim_filters(
index_column_name, [dict(_type="ZstdFilter", level=level)]
filters=tiledb_create_options.dim_filters(
index_column_name,
[
dict(
_type="ZstdFilter",
level=tiledb_create_options.string_dim_zstd_level(),
)
],
),
)
dims.append(dim)
Expand All @@ -124,20 +134,20 @@ def _create_empty(
dtype=util_arrow.tiledb_type_from_arrow_type(
schema.field(attr_name).type
),
filters=create_options.attr_filters(attr_name, ["ZstdFilter"]),
filters=tiledb_create_options.attr_filters(attr_name, ["ZstdFilter"]),
ctx=self._ctx,
)
attrs.append(attr)

cell_order, tile_order = create_options.cell_tile_orders()
cell_order, tile_order = tiledb_create_options.cell_tile_orders()

sch = tiledb.ArraySchema(
domain=dom,
attrs=attrs,
sparse=True,
allows_duplicates=False,
offsets_filters=create_options.offsets_filters(),
capacity=create_options.get("capacity", 100000),
offsets_filters=tiledb_create_options.offsets_filters(),
capacity=tiledb_create_options.get("capacity", 100000),
cell_order=cell_order,
# As of TileDB core 2.8.2, we cannot consolidate string-indexed sparse arrays with
# col-major tile order: so we write ``X`` with row-major tile order.
Expand All @@ -146,7 +156,7 @@ def _create_empty(
)
self._is_sparse = sch.sparse

tiledb.Array.create(self._uri, sch, ctx=self._ctx)
tiledb.Array.create(self._uri, sch)

def keys(self) -> Sequence[str]:
"""
Expand Down
46 changes: 24 additions & 22 deletions apis/python/src/tiledbsoma/dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,10 @@
import tiledbsoma.util_arrow as util_arrow
from tiledbsoma.util import dense_indices_to_shape

from . import tiledb_platform_config as tdbpc
from .collection import CollectionBase
from .exception import SOMAError
from .options import SOMATileDBContext, TileDBCreateOptions
from .tiledb_array import TileDBArray
from .tiledb_platform_config import TileDBPlatformConfig
from .types import NTuple, PlatformConfig

_UNBATCHED = options.BatchSize()
Expand All @@ -32,18 +31,12 @@ def __init__(
uri: str,
*,
parent: Optional[CollectionBase[Any]] = None,
tiledb_platform_config: Optional[TileDBPlatformConfig] = None,
ctx: Optional[tiledb.Ctx] = None,
context: Optional[SOMATileDBContext] = None,
):
"""
Also see the ``TileDBObject`` constructor.
"""
super().__init__(
uri=uri,
parent=parent,
tiledb_platform_config=tiledb_platform_config,
ctx=ctx,
)
super().__init__(uri=uri, parent=parent, context=context)

# Inherited from somacore
# soma_type: Final = "SOMADenseNDArray"
Expand All @@ -52,14 +45,16 @@ def create(
self,
type: pa.DataType,
shape: Union[NTuple, List[int]],
platform_config: Optional[PlatformConfig] = None,
platform_config: Optional[somacore.options.PlatformConfig] = None,
) -> "DenseNDArray":
"""
Create a ``DenseNDArray`` named with the URI.
:param type: an Arrow type defining the type of each element in the array. If the type is unsupported, an error will be raised.
:param shape: the length of each domain as a list, e.g., [100, 10]. All lengths must be in the positive int64 range.
:param platform_config: Platform-specific options used to create this Array, provided via "tiledb"->"create" nested keys
"""

# check on shape
Expand All @@ -73,19 +68,26 @@ def create(
"Unsupported type - DenseNDArray only supports primtive Arrow types"
)

level = self._tiledb_platform_config.string_dim_zstd_level
create_options = tdbpc.from_param(platform_config).create_options()
tiledb_create_options = TileDBCreateOptions.from_platform_config(
platform_config
)

dims = []
for n, e in enumerate(shape):
dim_name = f"soma_dim_{n}"
dim = tiledb.Dim(
name=dim_name,
domain=(0, e - 1),
tile=create_options.dim_tile(dim_name, min(e, 2048)),
tile=tiledb_create_options.dim_tile(dim_name, min(e, 2048)),
dtype=np.int64,
filters=create_options.dim_filters(
dim_name, [dict(_type="ZstdFilter", level=level)]
filters=tiledb_create_options.dim_filters(
dim_name,
[
dict(
_type="ZstdFilter",
level=tiledb_create_options.string_dim_zstd_level(),
)
],
),
)
dims.append(dim)
Expand All @@ -95,26 +97,26 @@ def create(
tiledb.Attr(
name="soma_data",
dtype=util_arrow.tiledb_type_from_arrow_type(type),
filters=create_options.attr_filters("soma_data", ["ZstdFilter"]),
filters=tiledb_create_options.attr_filters("soma_data", ["ZstdFilter"]),
ctx=self._ctx,
)
]

cell_order, tile_order = create_options.cell_tile_orders()
cell_order, tile_order = tiledb_create_options.cell_tile_orders()

sch = tiledb.ArraySchema(
domain=dom,
attrs=attrs,
sparse=False,
allows_duplicates=False,
offsets_filters=create_options.offsets_filters(),
capacity=create_options.get("capacity", 100000),
offsets_filters=tiledb_create_options.offsets_filters(),
capacity=tiledb_create_options.get("capacity", 100000),
cell_order=cell_order,
tile_order=tile_order,
ctx=self._ctx,
)

tiledb.Array.create(self._uri, sch, ctx=self._ctx)
tiledb.Array.create(self._uri, sch)

self._common_create(self.soma_type) # object-type metadata etc

Expand Down Expand Up @@ -174,7 +176,7 @@ def read(
self._uri,
name=self.__class__.__name__,
result_order=result_order.value,
platform_config={} if self._ctx is None else self._ctx.config().dict(),
platform_config=self._ctx.config().dict(),
)

if coords is not None:
Expand Down
15 changes: 4 additions & 11 deletions apis/python/src/tiledbsoma/experiment.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
from typing import Any, Dict, Optional, Tuple, cast

import somacore
import tiledb
from typing_extensions import Final

from .collection import CollectionBase
from .dataframe import DataFrame
from .measurement import Measurement
from .options import SOMATileDBContext
from .tiledb_object import TileDBObject
from .tiledb_platform_config import TileDBPlatformConfig

_EMPTY_QUERY = somacore.AxisQuery()

Expand All @@ -34,19 +33,13 @@ def __init__(
*,
# Non-top-level objects can have a parent to propagate context, depth, etc.
parent: Optional[CollectionBase[Any]] = None,
# Top-level objects should specify these:
tiledb_platform_config: Optional[TileDBPlatformConfig] = None,
ctx: Optional[tiledb.Ctx] = None,
# Top-level objects should specify this:
context: Optional[SOMATileDBContext] = None,
):
"""
Also see the ``TileDBObject`` constructor.
"""
super().__init__(
uri=uri,
parent=parent,
tiledb_platform_config=tiledb_platform_config,
ctx=ctx,
)
super().__init__(uri=uri, parent=parent, context=context)

# Inherited from somacore
soma_type: Final = "SOMAExperiment"
Expand Down
Loading

0 comments on commit 30bcd1e

Please sign in to comment.