diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 316dc57a2..9c7b9fb10 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -87,8 +87,8 @@ jobs: working-directory: examples/${{ matrix.example }} run: | pip install uv - uv venv - source .venv/bin/activate + uv venv $GITHUB_WORKSPACE/.venv + source $GITHUB_WORKSPACE/.venv/bin/activate if [ -f requirements.in ]; then uv pip install -r requirements.in; fi uv pip install "flytekit>=1.12.2" "numpy<2.0.0" pip freeze @@ -126,7 +126,7 @@ jobs: run: | export FLYTE_PUSH_IMAGE_SPEC=${{ github.event_name != 'pull_request' }} default_image=ghcr.io/flyteorg/flytecookbook:${{ matrix.example }}-${{ github.sha }} - source .venv/bin/activate + source $GITHUB_WORKSPACE/.venv/bin/activate pyflyte \ --pkgs ${{ matrix.example }} package \ --image $default_image \ @@ -293,7 +293,7 @@ jobs: pip install uv uv venv source .venv/bin/activate - uv pip install "flytekit>=1.12.2" flytekitplugins-deck-standard torch tabulate + uv pip install "flytekit>=1.12.2" flytekitplugins-deck-standard torch tabulate pyarrow pip freeze - name: Checkout flytesnacks uses: actions/checkout@v3 diff --git a/examples/data_types_and_io/Dockerfile b/examples/data_types_and_io/Dockerfile index db5734820..d93ea429b 100644 --- a/examples/data_types_and_io/Dockerfile +++ b/examples/data_types_and_io/Dockerfile @@ -1,3 +1,4 @@ +#syntax=docker/dockerfile:1.8 # ###################### # NOTE: For CI/CD only # ######################## @@ -17,8 +18,10 @@ ENV VENV /opt/venv RUN python3 -m venv ${VENV} ENV PATH="${VENV}/bin:$PATH" -RUN pip install flytekit pandas -RUN pip install torch --index-url https://download.pytorch.org/whl/cpu +RUN --mount=type=cache,sharing=locked,mode=0777,target=/root/.cache/pip,id=pip \ + pip install flytekit pandas pyarrow +RUN --mount=type=cache,sharing=locked,mode=0777,target=/root/.cache/pip,id=pip \ + pip install torch --index-url https://download.pytorch.org/whl/cpu # Copy the actual code COPY . /root diff --git a/examples/data_types_and_io/requirements.in b/examples/data_types_and_io/requirements.in index f33049738..79bd303e5 100644 --- a/examples/data_types_and_io/requirements.in +++ b/examples/data_types_and_io/requirements.in @@ -1,3 +1,4 @@ pandas torch tabulate +pyarrow diff --git a/examples/development_lifecycle/Dockerfile b/examples/development_lifecycle/Dockerfile index 834f5dd29..a704a214d 100644 --- a/examples/development_lifecycle/Dockerfile +++ b/examples/development_lifecycle/Dockerfile @@ -1,3 +1,4 @@ +#syntax=docker/dockerfile:1.8 # ###################### # NOTE: For CI/CD only # ######################## @@ -21,7 +22,8 @@ ENV PATH="${VENV}/bin:$PATH" # Install Python dependencies COPY requirements.in /root -RUN pip install -r /root/requirements.in +RUN --mount=type=cache,sharing=locked,mode=0777,target=/root/.cache/pip,id=pip \ + pip install -r /root/requirements.in # Copy the actual code COPY . /root diff --git a/examples/development_lifecycle/requirements.in b/examples/development_lifecycle/requirements.in index 980271fac..8e50db9c6 100644 --- a/examples/development_lifecycle/requirements.in +++ b/examples/development_lifecycle/requirements.in @@ -3,3 +3,4 @@ flytekitplugins-deck-standard plotly scikit-learn tabulate +pyarrow diff --git a/examples/duckdb_plugin/Dockerfile b/examples/duckdb_plugin/Dockerfile index ef4f356dd..91901970a 100644 --- a/examples/duckdb_plugin/Dockerfile +++ b/examples/duckdb_plugin/Dockerfile @@ -1,3 +1,4 @@ +#syntax=docker/dockerfile:1.8 FROM python:3.8-buster WORKDIR /root @@ -25,7 +26,8 @@ ENV PATH="${VENV}/bin:$PATH" # Install Python dependencies COPY requirements.in /root/ -RUN pip install -r /root/requirements.in +RUN --mount=type=cache,sharing=locked,mode=0777,target=/root/.cache/pip,id=pip \ + pip install -r /root/requirements.in # Copy the actual code COPY . /root/ diff --git a/examples/duckdb_plugin/requirements.in b/examples/duckdb_plugin/requirements.in index f4180eb1a..4f8699977 100644 --- a/examples/duckdb_plugin/requirements.in +++ b/examples/duckdb_plugin/requirements.in @@ -3,3 +3,4 @@ wheel matplotlib flytekitplugins-deck-standard flytekitplugins-duckdb +pyarrow diff --git a/examples/kfpytorch_plugin/kfpytorch_plugin/pytorch_lightning_mnist_autoencoder.py b/examples/kfpytorch_plugin/kfpytorch_plugin/pytorch_lightning_mnist_autoencoder.py index e5e819e6d..ddc4409aa 100644 --- a/examples/kfpytorch_plugin/kfpytorch_plugin/pytorch_lightning_mnist_autoencoder.py +++ b/examples/kfpytorch_plugin/kfpytorch_plugin/pytorch_lightning_mnist_autoencoder.py @@ -10,17 +10,10 @@ import os import lightning as L -from flytekit import ImageSpec, PodTemplate, Resources, task, workflow +from flytekit import ImageSpec, Resources, task, workflow from flytekit.extras.accelerators import T4 from flytekit.types.directory import FlyteDirectory from flytekitplugins.kfpytorch.task import Elastic -from kubernetes.client.models import ( - V1Container, - V1EmptyDirVolumeSource, - V1PodSpec, - V1Volume, - V1VolumeMount, -) from torch import nn, optim from torch.utils.data import DataLoader from torchvision.datasets import MNIST @@ -69,19 +62,6 @@ # ``` # ::: -# %% [markdown] -# We're also going to define a custom pod template that mounts a shared memory -# volume to `/dev/shm`. This is necessary for distributed data parallel (DDP) -# training so that state can be shared across workers. - -# %% -container = V1Container(name=custom_image.name, volume_mounts=[V1VolumeMount(mount_path="/dev/shm", name="dshm")]) -volume = V1Volume(name="dshm", empty_dir=V1EmptyDirVolumeSource(medium="Memory")) -custom_pod_template = PodTemplate( - primary_container_name=custom_image.name, - pod_spec=V1PodSpec(containers=[container], volumes=[volume]), -) - # %% [markdown] # ## Define a `LightningModule` # @@ -175,7 +155,6 @@ def train_dataloader(self): ), accelerator=T4, requests=Resources(mem="32Gi", cpu="48", gpu="8", ephemeral_storage="100Gi"), - pod_template=custom_pod_template, ) def train_model(dataloader_num_workers: int) -> FlyteDirectory: """Train an autoencoder model on the MNIST.""" diff --git a/examples/productionizing/productionizing/reference_task.py b/examples/productionizing/productionizing/reference_task.py index 29c6efc46..a64d46468 100644 --- a/examples/productionizing/productionizing/reference_task.py +++ b/examples/productionizing/productionizing/reference_task.py @@ -1,6 +1,6 @@ -from typing import List, NamedTuple +from typing import List -from flytekit import StructuredDataset, reference_task, workflow +from flytekit import reference_task, workflow from flytekit.types.file import FlyteFile # A `flytekit.reference_task` references the Flyte tasks that have already been defined, serialized, and registered. @@ -26,22 +26,19 @@ def normalize_columns( ... -outputs = NamedTuple("Outputs", results=StructuredDataset) - - @reference_task( project="flytesnacks", domain="development", - name="bigquery", + name="sql.bigquery.no_io", version="{{ registration.version }}", ) -def bigquery_task(version: int) -> outputs: +def bigquery_task(): ... @workflow def wf() -> FlyteFile: - bigquery_task(version=1) + bigquery_task() return normalize_columns( csv_url="https://people.sc.fsu.edu/~jburkardt/data/csv/biostats.csv", column_names=["Name", "Sex", "Age", "Heights (in)", "Weight (lbs)"],