Skip to content

Commit

Permalink
Add labeling APIs to pylibcudf (#16761)
Browse files Browse the repository at this point in the history
Contributes to #15162

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: #16761
  • Loading branch information
mroeschke committed Sep 9, 2024
1 parent 26a81b6 commit 150f1b1
Show file tree
Hide file tree
Showing 11 changed files with 134 additions and 44 deletions.
17 changes: 9 additions & 8 deletions docs/cudf/source/developer_guide/pylibcudf.md
Original file line number Diff line number Diff line change
Expand Up @@ -186,38 +186,39 @@ Here is an example of appropriate enum usage.


```cython
# cpp/copying.pxd
# pylibcudf/libcudf/copying.pxd
cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
# cpdef here so that we export both a cdef enum class and a Python enum.Enum.
cpdef enum class out_of_bounds_policy(bool):
NULLIFY
DONT_CHECK
# cpp/copying.pyx
# This file is empty, but is required to compile the Python enum in cpp/copying.pxd
# pylibcudf/libcudf/copying.pyx
# This file is empty, but is required to compile the Python enum in pylibcudf/libcudf/copying.pxd
# Ensure this file is included in pylibcudf/libcudf/CMakeLists.txt
# pylibcudf/copying.pxd
# cimport the enum using the exact name
# Once https://github.com/cython/cython/issues/5609 is resolved,
# this import should instead be
# from cudf._lib.cpp.copying cimport out_of_bounds_policy as OutOfBoundsPolicy
from cudf._lib.cpp.copying cimport out_of_bounds_policy
# from pylibcudf.libcudf.copying cimport out_of_bounds_policy as OutOfBoundsPolicy
from pylibcudf.libcudf.copying cimport out_of_bounds_policy
# pylibcudf/copying.pyx
# Access cpp.copying members that aren't part of this module's public API via
# this module alias
from cudf._lib.cpp cimport copying as cpp_copying
from cudf._lib.cpp.copying cimport out_of_bounds_policy
from pylibcudf.libcudf cimport copying as cpp_copying
from pylibcudf.libcudf.copying cimport out_of_bounds_policy
# This import exposes the enum in the public API of this module.
# It requires a no-cython-lint tag because it will be unused: all typing of
# parameters etc will need to use the Cython name `out_of_bounds_policy` until
# the Cython bug is resolved.
from cudf._lib.cpp.copying import \
from pylibcudf.libcudf.copying import \
out_of_bounds_policy as OutOfBoundsPolicy # no-cython-lint
```

Expand Down
40 changes: 10 additions & 30 deletions python/cudf/cudf/_lib/labeling.pyx
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION.

from cudf.core.buffer import acquire_spill_lock

from libcpp cimport bool as cbool
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.labeling cimport inclusive, label_bins as cpp_label_bins
import pylibcudf as plc

from cudf._lib.column cimport Column
from cudf.core.buffer import acquire_spill_lock


# Note that the parameter input shadows a Python built-in in the local scope,
Expand All @@ -19,26 +14,11 @@ from cudf._lib.column cimport Column
@acquire_spill_lock()
def label_bins(Column input, Column left_edges, cbool left_inclusive,
Column right_edges, cbool right_inclusive):
cdef inclusive c_left_inclusive = \
inclusive.YES if left_inclusive else inclusive.NO
cdef inclusive c_right_inclusive = \
inclusive.YES if right_inclusive else inclusive.NO

cdef column_view input_view = input.view()
cdef column_view left_edges_view = left_edges.view()
cdef column_view right_edges_view = right_edges.view()

cdef unique_ptr[column] c_result

with nogil:
c_result = move(
cpp_label_bins(
input_view,
left_edges_view,
c_left_inclusive,
right_edges_view,
c_right_inclusive,
)
)

return Column.from_unique_ptr(move(c_result))
plc_column = plc.labeling.label_bins(
input.to_pylibcudf(mode="read"),
left_edges.to_pylibcudf(mode="read"),
left_inclusive,
right_edges.to_pylibcudf(mode="read"),
right_inclusive
)
return Column.from_pylibcudf(plc_column)
1 change: 1 addition & 0 deletions python/pylibcudf/pylibcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ set(cython_sources
groupby.pyx
interop.pyx
join.pyx
labeling.pyx
lists.pyx
merge.pyx
null_mask.pyx
Expand Down
1 change: 1 addition & 0 deletions python/pylibcudf/pylibcudf/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ from . cimport (
filling,
groupby,
join,
labeling,
lists,
merge,
null_mask,
Expand Down
3 changes: 3 additions & 0 deletions python/pylibcudf/pylibcudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
interop,
io,
join,
labeling,
lists,
merge,
null_mask,
Expand Down Expand Up @@ -67,7 +68,9 @@
"gpumemoryview",
"groupby",
"interop",
"io",
"join",
"labeling",
"lists",
"merge",
"null_mask",
Expand Down
14 changes: 14 additions & 0 deletions python/pylibcudf/pylibcudf/labeling.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
from libcpp cimport bool
from pylibcudf.libcudf.labeling cimport inclusive

from .column cimport Column


cpdef Column label_bins(
Column input,
Column left_edges,
bool left_inclusive,
Column right_edges,
bool right_inclusive
)
65 changes: 65 additions & 0 deletions python/pylibcudf/pylibcudf/labeling.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from pylibcudf.libcudf cimport labeling as cpp_labeling
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.labeling cimport inclusive

from pylibcudf.libcudf.labeling import inclusive as Inclusive # no-cython-lint

from .column cimport Column


cpdef Column label_bins(
Column input,
Column left_edges,
bool left_inclusive,
Column right_edges,
bool right_inclusive
):
"""Labels elements based on membership in the specified bins.
Parameters
----------
input : Column
Column of input elements to label according to the specified bins.
left_edges : Column
Column of the left edge of each bin.
left_inclusive : bool
Whether or not the left edge is inclusive.
right_edges : Column
Column of the right edge of each bin.
right_inclusive : bool
Whether or not the right edge is inclusive.
Returns
-------
Column
Column of integer labels of the elements in `input`
according to the specified bins.
"""
cdef unique_ptr[column] c_result
cdef inclusive c_left_inclusive = (
inclusive.YES
if left_inclusive
else inclusive.NO
)
cdef inclusive c_right_inclusive = (
inclusive.YES
if right_inclusive
else inclusive.NO
)

with nogil:
c_result = move(
cpp_labeling.label_bins(
input.view(),
left_edges.view(),
c_left_inclusive,
right_edges.view(),
c_right_inclusive,
)
)

return Column.from_libcudf(move(c_result))
4 changes: 2 additions & 2 deletions python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
# the License.
# =============================================================================

set(cython_sources aggregation.pyx binaryop.pyx copying.pyx expressions.pyx reduce.pyx replace.pyx
round.pyx stream_compaction.pyx types.pyx unary.pyx
set(cython_sources aggregation.pyx binaryop.pyx copying.pyx expressions.pyx labeling.pyx reduce.pyx
replace.pyx round.pyx stream_compaction.pyx types.pyx unary.pyx
)

set(linked_libraries cudf::cudf)
Expand Down
8 changes: 4 additions & 4 deletions python/pylibcudf/pylibcudf/libcudf/labeling.pxd
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION.

from libcpp cimport int
from libcpp.memory cimport unique_ptr
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view


cdef extern from "cudf/labeling/label_bins.hpp" namespace "cudf" nogil:
ctypedef enum inclusive:
YES "cudf::inclusive::YES"
NO "cudf::inclusive::NO"
cpdef enum class inclusive(int):
YES
NO

cdef unique_ptr[column] label_bins (
const column_view &input,
Expand Down
Empty file.
25 changes: 25 additions & 0 deletions python/pylibcudf/pylibcudf/tests/test_labeling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

import pyarrow as pa
import pylibcudf as plc
import pytest


@pytest.mark.parametrize("left_inclusive", [True, False])
@pytest.mark.parametrize("right_inclusive", [True, False])
def test_label_bins(left_inclusive, right_inclusive):
in_col = plc.interop.from_arrow(pa.array([1, 2, 3]))
left_edges = plc.interop.from_arrow(pa.array([0, 5]))
right_edges = plc.interop.from_arrow(pa.array([4, 6]))
result = plc.interop.to_arrow(
plc.labeling.label_bins(
in_col, left_edges, left_inclusive, right_edges, right_inclusive
)
)
expected = pa.chunked_array([[0, 0, 0]], type=pa.int32())
assert result.equals(expected)


def test_inclusive_enum():
assert plc.labeling.Inclusive.YES == 0
assert plc.labeling.Inclusive.NO == 1

0 comments on commit 150f1b1

Please sign in to comment.