diff --git a/.dockerignore b/.dockerignore index 5aff47e60..28cc3265b 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,4 +3,5 @@ icon.png LICENSE README.md target/bin/bento +target/bin/huggingbento target/dist diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 52542f7d5..ba335772f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -115,3 +115,13 @@ jobs: platforms: linux/amd64,linux/arm64 push: true tags: ${{ steps.docker_meta.outputs.tags }} + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: ./ + file: ./resources/huggingbento/Dockerfile + builder: ${{ steps.buildx.outputs.name }} + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.docker_meta.outputs.tags }} \ No newline at end of file diff --git a/Makefile b/Makefile index ba2e79d18..682330e53 100644 --- a/Makefile +++ b/Makefile @@ -22,9 +22,10 @@ DATE := $(shell date +"%Y-%m-%dT%H:%M:%SZ") VER_FLAGS = -X main.Version=$(VERSION) -X main.DateBuilt=$(DATE) -LD_FLAGS ?= -w -s -GO_FLAGS ?= -DOCS_FLAGS ?= +LD_FLAGS ?= -w -s +CGO_LDFLAGS ?= +GO_FLAGS ?= +DOCS_FLAGS ?= APPS = bento all: $(APPS) @@ -63,6 +64,15 @@ $(PATHINSTSERVERLESS)/%: $(SOURCE_FILES) $(SERVERLESS): %: $(PATHINSTSERVERLESS)/% +HUGGINGBENTO = huggingbento +hugging-bento: $(HUGGINGBENTO) + +$(PATHINSTBIN)/$(HUGGINGBENTO): $(SOURCE_FILES) + @CGO_ENABLED=1 \ + go build $(GO_FLAGS) -tags "$(TAGS) huggingbento" -ldflags "$(LD_FLAGS) $(VER_FLAGS) -X main.BinaryName=huggingbento -X main.ProductName=huggingbento" -o $@ ./cmd/bento + +$(HUGGINGBENTO): %: $(PATHINSTBIN)/% + docker-tags: @echo "latest,$(VER_CUT),$(VER_MAJOR).$(VER_MINOR),$(VER_MAJOR)" > .tags @@ -80,6 +90,10 @@ docker-cgo: @docker build -f ./resources/docker/Dockerfile.cgo . -t $(DOCKER_IMAGE):$(VER_CUT)-cgo @docker tag $(DOCKER_IMAGE):$(VER_CUT)-cgo $(DOCKER_IMAGE):latest-cgo +docker-huggingbento: + @docker build -f ./resources/huggingbento/Dockerfile . -t ghcr.io/warpstreamlabs/huggingbento:$(VER_CUT) + @docker tag ghcr.io/warpstreamlabs/huggingbento:$(VER_CUT) ghcr.io/warpstreamlabs/huggingbento:latest + fmt: @go list -f {{.Dir}} ./... | xargs -I{} gofmt -w -s {} @go list -f {{.Dir}} ./... | xargs -I{} goimports -w -local github.com/warpstreamlabs/bento {} diff --git a/README.md b/README.md index 814eee04d..578952267 100644 --- a/README.md +++ b/README.md @@ -143,7 +143,24 @@ go install -tags "x_bento_extra" github.com/warpstreamlabs/bento/cmd/bento@lates make TAGS=x_bento_extra ``` -Note that this tag may change or be broken out into granular tags for individual components outside of major version releases. If you attempt a build and these dependencies are not present you'll see error messages such as `ld: library not found for -lzmq`. +### hugging-bento + +`hugging-bento` is a Bento distribution that supports running [ONNX models](https://onnxruntime.ai/). It leverages the [`knights-analytics/hugot`](https://github.com/knights-analytics/hugot) package which in turn has two external dependencies: +- An `onnxruntime.*` ONNX Runtime dynamic library file. This can be obtained from the [onnxruntime project releases page](https://github.com/microsoft/onnxruntime/releases). This is dynamically linked by hugot and used by the onnxruntime inference library `onnxruntime_go`. This can be set with the `onnx_library_path` flag when loading your `config.yaml`. +- The `tokenizers.a` file. This should be at `/usr/lib/tokenizers.a` by default otherwise it can be set with `CGO_LDFLAGS=-L/path/to/tokenizers.a` so that hugot can load it. + +There are instructions for configuring these external libraries at [`knights-analytics/hugot#use-it-as-a-library`](https://github.com/knights-analytics/hugot?tab=readme-ov-file#use-it-as-a-library). Alternatively, you can use the `hugging-bento` [Docker image](resources/docker/huggingbento/Dockerfile) which has all of these dependencies baked in. + +```shell +# The location of the tokenizers.a file +export CGO_LDFLAGS="-L/usr/lib" + +# With go +go install -tags "huggingbento" github.com/warpstreamlabs/bento/cmd/bento@latest + +# Using make +make TAGS=huggingbento NODOWNLOAD +``` ## Docker Builds diff --git a/cmd/bento/main.go b/cmd/bento/main.go index 0c773d93b..ba61dbcad 100644 --- a/cmd/bento/main.go +++ b/cmd/bento/main.go @@ -16,6 +16,8 @@ var ( DateBuilt string // BinaryName binary name. BinaryName string = "bento" + // ProductName name of product for CLI. + ProductName string = "Bento" ) func main() { @@ -23,7 +25,7 @@ func main() { context.Background(), service.CLIOptSetVersion(Version, DateBuilt), service.CLIOptSetBinaryName(BinaryName), - service.CLIOptSetProductName("Bento"), + service.CLIOptSetProductName(ProductName), service.CLIOptSetDocumentationURL("https://warpstreamlabs.github.io/bento/docs"), service.CLIOptSetShowRunCommand(true), ) diff --git a/go.mod b/go.mod index e15b12142..58b5aacb6 100644 --- a/go.mod +++ b/go.mod @@ -3,9 +3,9 @@ module github.com/warpstreamlabs/bento replace github.com/99designs/keyring => github.com/Jeffail/keyring v1.2.3 require ( - cloud.google.com/go/bigquery v1.59.1 - cloud.google.com/go/pubsub v1.36.1 - cloud.google.com/go/storage v1.37.0 + cloud.google.com/go/bigquery v1.61.0 + cloud.google.com/go/pubsub v1.38.0 + cloud.google.com/go/storage v1.40.0 cuelang.org/go v0.7.0 github.com/Azure/azure-sdk-for-go/sdk/azcore v1.9.2 github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.5.1 @@ -50,13 +50,14 @@ require ( github.com/clbanning/mxj/v2 v2.7.0 github.com/colinmarc/hdfs v1.1.3 github.com/couchbase/gocb/v2 v2.9.1 + github.com/daulet/tokenizers v0.9.0 github.com/denisenkom/go-mssqldb v0.12.3 github.com/dgraph-io/ristretto v0.1.1 github.com/dop251/goja v0.0.0-20231014103939-873a1496dc8e github.com/dop251/goja_nodejs v0.0.0-20231122114759-e84d9a924c5c github.com/dustin/go-humanize v1.0.1 github.com/eclipse/paho.mqtt.golang v1.4.3 - github.com/fatih/color v1.16.0 + github.com/fatih/color v1.17.0 github.com/fsnotify/fsnotify v1.7.0 github.com/generikvault/gvalstrings v0.0.0-20180926130504-471f38f0112a github.com/getsentry/sentry-go v0.27.0 @@ -80,6 +81,7 @@ require ( github.com/jmespath/go-jmespath v0.4.0 github.com/klauspost/compress v1.17.9 github.com/klauspost/pgzip v1.2.6 + github.com/knights-analytics/hugot v0.1.7-0.20240823085553-7da587ad260a github.com/lib/pq v1.10.9 github.com/linkedin/goavro/v2 v2.12.0 github.com/matoous/go-nanoid/v2 v2.0.0 @@ -122,12 +124,13 @@ require ( github.com/trinodb/trino-go-client v0.313.0 github.com/twmb/franz-go v1.16.1 github.com/twmb/franz-go/pkg/kmsg v1.7.0 - github.com/urfave/cli/v2 v2.27.1 + github.com/urfave/cli/v2 v2.27.4 github.com/vmihailenco/msgpack/v5 v5.4.1 github.com/xdg-go/scram v1.1.2 github.com/xeipuuv/gojsonschema v1.2.0 github.com/xitongsys/parquet-go v1.6.2 github.com/xitongsys/parquet-go-source v0.0.0-20211228015320-b4f792c43cd0 + github.com/yalue/onnxruntime_go v1.11.0 github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a go.etcd.io/etcd/api/v3 v3.5.14 go.etcd.io/etcd/client/v3 v3.5.14 @@ -141,13 +144,14 @@ require ( go.opentelemetry.io/otel/sdk v1.24.0 go.opentelemetry.io/otel/trace v1.24.0 go.uber.org/multierr v1.11.0 - golang.org/x/crypto v0.25.0 - golang.org/x/exp v0.0.0-20231006140011-7918f672742d - golang.org/x/net v0.27.0 - golang.org/x/oauth2 v0.17.0 - golang.org/x/sync v0.7.0 - golang.org/x/text v0.16.0 - google.golang.org/api v0.162.0 + golang.org/x/crypto v0.26.0 + golang.org/x/exp v0.0.0-20240823005443-9b4947da3948 + golang.org/x/net v0.28.0 + golang.org/x/oauth2 v0.22.0 + golang.org/x/sync v0.8.0 + golang.org/x/text v0.17.0 + google.golang.org/api v0.184.0 + google.golang.org/grpc v1.64.0 google.golang.org/protobuf v1.34.2 gopkg.in/natefinch/lumberjack.v2 v2.2.1 gopkg.in/yaml.v3 v3.0.1 @@ -155,11 +159,12 @@ require ( ) require ( - cloud.google.com/go v0.112.0 // indirect - cloud.google.com/go/compute v1.24.0 // indirect - cloud.google.com/go/compute/metadata v0.2.3 // indirect - cloud.google.com/go/iam v1.1.6 // indirect - cloud.google.com/go/trace v1.10.5 // indirect + cloud.google.com/go v0.114.0 // indirect + cloud.google.com/go/auth v0.5.1 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect + cloud.google.com/go/compute/metadata v0.3.0 // indirect + cloud.google.com/go/iam v1.1.8 // indirect + cloud.google.com/go/trace v1.10.7 // indirect dario.cat/mergo v1.0.0 // indirect github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect github.com/99designs/keyring v1.2.2 // indirect @@ -177,9 +182,11 @@ require ( github.com/andybalholm/brotli v1.1.0 // indirect github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 // indirect github.com/apache/arrow/go/v14 v14.0.2 // indirect + github.com/apache/arrow/go/v15 v15.0.2 // indirect github.com/apache/thrift v0.18.1 // indirect github.com/ardielle/ardielle-go v1.5.2 // indirect github.com/armon/go-metrics v0.3.4 // indirect + github.com/aws/aws-sdk-go v1.55.5 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.0 // indirect github.com/aws/aws-sdk-go-v2/feature/dynamodb/attributevalue v1.12.16 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.11 // indirect @@ -206,7 +213,7 @@ require ( github.com/btnguyen2k/consu/reddo v0.1.8 // indirect github.com/btnguyen2k/consu/semita v0.1.5 // indirect github.com/bufbuild/protocompile v0.8.0 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cockroachdb/apd/v3 v3.2.1 // indirect github.com/containerd/continuity v0.3.0 // indirect github.com/coreos/go-semver v0.3.0 // indirect @@ -215,7 +222,7 @@ require ( github.com/couchbase/gocbcoreps v0.1.3 // indirect github.com/couchbase/goprotostellar v1.0.2 // indirect github.com/couchbaselabs/gocbconnstr/v2 v2.0.0-20240607131231-fb385523de28 // indirect - github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect + github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect github.com/danieljoos/wincred v1.2.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect @@ -232,9 +239,10 @@ require ( github.com/form3tech-oss/jwt-go v3.2.5+incompatible // indirect github.com/frankban/quicktest v1.14.6 // indirect github.com/gabriel-vasile/mimetype v1.4.2 // indirect + github.com/go-errors/errors v1.5.1 // indirect github.com/go-faster/city v1.0.1 // indirect github.com/go-faster/errors v0.7.1 // indirect - github.com/go-logr/logr v1.4.1 // indirect + github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect github.com/goccy/go-json v0.10.2 // indirect @@ -242,7 +250,7 @@ require ( github.com/golang-jwt/jwt v3.2.2+incompatible // indirect github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 // indirect github.com/golang-sql/sqlexp v0.1.0 // indirect - github.com/golang/glog v1.2.0 // indirect + github.com/golang/glog v1.2.1 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/golang/snappy v0.0.4 // indirect @@ -252,7 +260,7 @@ require ( github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/google/uuid v1.6.0 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect - github.com/googleapis/gax-go/v2 v2.12.0 // indirect + github.com/googleapis/gax-go/v2 v2.12.5 // indirect github.com/gorilla/css v1.0.0 // indirect github.com/gosimple/unidecode v1.0.1 // indirect github.com/govalues/decimal v0.1.29 // indirect @@ -278,8 +286,10 @@ require ( github.com/jcmturner/gokrb5/v8 v8.4.4 // indirect github.com/jcmturner/rpc/v2 v2.0.3 // indirect github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect github.com/klauspost/cpuid/v2 v2.2.5 // indirect + github.com/knights-analytics/HuggingFaceModelDownloader v1.3.5 // indirect github.com/kr/fs v0.1.0 // indirect github.com/kylelemons/godebug v1.1.0 // indirect github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect @@ -289,6 +299,8 @@ require ( github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-runewidth v0.0.16 // indirect github.com/moby/term v0.5.0 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect github.com/montanaflynn/stats v0.7.0 // indirect github.com/mpvl/unique v0.0.0-20150818121801-cbe035fff7de // indirect github.com/mtibben/percent v0.2.1 // indirect @@ -316,33 +328,33 @@ require ( github.com/shopspring/decimal v1.3.1 // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/stretchr/objx v0.5.2 // indirect + github.com/viant/afs v1.25.1 // indirect + github.com/viant/afsc v1.9.3 // indirect github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect github.com/xdg-go/pbkdf2 v1.0.0 // indirect github.com/xdg-go/stringprep v1.0.4 // indirect github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect - github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect + github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect github.com/zeebo/xxh3 v1.0.2 // indirect go.etcd.io/bbolt v1.3.10 // indirect go.etcd.io/etcd/client/pkg/v3 v3.5.14 // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.47.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect go.opentelemetry.io/otel/metric v1.24.0 // indirect go.opentelemetry.io/proto/otlp v1.1.0 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect - golang.org/x/mod v0.17.0 // indirect + golang.org/x/mod v0.20.0 // indirect golang.org/x/sys v0.24.0 // indirect - golang.org/x/term v0.22.0 // indirect + golang.org/x/term v0.23.0 // indirect golang.org/x/time v0.5.0 // indirect - golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect + golang.org/x/tools v0.24.0 // indirect golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect - google.golang.org/appengine v1.6.8 // indirect - google.golang.org/genproto v0.0.0-20240227224415-6ceb2ff114de // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20240227224415-6ceb2ff114de // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda // indirect - google.golang.org/grpc v1.63.2 // indirect + google.golang.org/genproto v0.0.0-20240604185151-ef581f913117 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240610135401-a8a62080eff3 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240610135401-a8a62080eff3 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/jcmturner/aescts.v1 v1.0.1 // indirect gopkg.in/jcmturner/dnsutils.v1 v1.0.1 // indirect @@ -360,4 +372,4 @@ require ( modernc.org/token v1.1.0 // indirect ) -go 1.21 +go 1.22.0 diff --git a/go.sum b/go.sum index f5c1513f4..2f0045843 100644 --- a/go.sum +++ b/go.sum @@ -18,49 +18,51 @@ cloud.google.com/go v0.74.0/go.mod h1:VV1xSbzvo+9QJOxLDaJfTjx5e+MePCpCWwvftOeQmW cloud.google.com/go v0.78.0/go.mod h1:QjdrLG0uq+YwhjoVOLsS1t7TW8fs36kLs4XO5R5ECHg= cloud.google.com/go v0.79.0/go.mod h1:3bzgcEeQlzbuEAYu4mrWhKqWjmpprinYgKJLgKHnbb8= cloud.google.com/go v0.81.0/go.mod h1:mk/AM35KwGk/Nm2YSeZbxXdrNK3KZOYHmLkOqC2V6E0= -cloud.google.com/go v0.112.0 h1:tpFCD7hpHFlQ8yPwT3x+QeXqc2T6+n6T+hmABHfDUSM= -cloud.google.com/go v0.112.0/go.mod h1:3jEEVwZ/MHU4djK5t5RHuKOA/GbLddgTdVubX1qnPD4= +cloud.google.com/go v0.114.0 h1:OIPFAdfrFDFO2ve2U7r/H5SwSbBzEdrBdE7xkgwc+kY= +cloud.google.com/go v0.114.0/go.mod h1:ZV9La5YYxctro1HTPug5lXH/GefROyW8PPD4T8n9J8E= +cloud.google.com/go/auth v0.5.1 h1:0QNO7VThG54LUzKiQxv8C6x1YX7lUrzlAa1nVLF8CIw= +cloud.google.com/go/auth v0.5.1/go.mod h1:vbZT8GjzDf3AVqCcQmqeeM32U9HBFc32vVVAbwDsa6s= +cloud.google.com/go/auth/oauth2adapt v0.2.2 h1:+TTV8aXpjeChS9M+aTtN/TjdQnzJvmzKFt//oWu7HX4= +cloud.google.com/go/auth/oauth2adapt v0.2.2/go.mod h1:wcYjgpZI9+Yu7LyYBg4pqSiaRkfEK3GQcpb7C/uyF1Q= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= -cloud.google.com/go/bigquery v1.59.1 h1:CpT+/njKuKT3CEmswm6IbhNu9u35zt5dO4yPDLW+nG4= -cloud.google.com/go/bigquery v1.59.1/go.mod h1:VP1UJYgevyTwsV7desjzNzDND5p6hZB+Z8gZJN1GQUc= -cloud.google.com/go/compute v1.24.0 h1:phWcR2eWzRJaL/kOiJwfFsPs4BaKq1j6vnpZrc1YlVg= -cloud.google.com/go/compute v1.24.0/go.mod h1:kw1/T+h/+tK2LJK0wiPPx1intgdAM3j/g3hFDlscY40= -cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY= -cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA= -cloud.google.com/go/datacatalog v1.19.3 h1:A0vKYCQdxQuV4Pi0LL9p39Vwvg4jH5yYveMv50gU5Tw= -cloud.google.com/go/datacatalog v1.19.3/go.mod h1:ra8V3UAsciBpJKQ+z9Whkxzxv7jmQg1hfODr3N3YPJ4= +cloud.google.com/go/bigquery v1.61.0 h1:w2Goy9n6gh91LVi6B2Sc+HpBl8WbWhIyzdvVvrAuEIw= +cloud.google.com/go/bigquery v1.61.0/go.mod h1:PjZUje0IocbuTOdq4DBOJLNYB0WF3pAKBHzAYyxCwFo= +cloud.google.com/go/compute/metadata v0.3.0 h1:Tz+eQXMEqDIKRsmY3cHTL6FVaynIjX2QxYC4trgAKZc= +cloud.google.com/go/compute/metadata v0.3.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k= +cloud.google.com/go/datacatalog v1.20.1 h1:czcba5mxwRM5V//jSadyig0y+8aOHmN7gUl9GbHu59E= +cloud.google.com/go/datacatalog v1.20.1/go.mod h1:Jzc2CoHudhuZhpv78UBAjMEg3w7I9jHA11SbRshWUjk= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= -cloud.google.com/go/iam v1.1.6 h1:bEa06k05IO4f4uJonbB5iAgKTPpABy1ayxaIZV/GHVc= -cloud.google.com/go/iam v1.1.6/go.mod h1:O0zxdPeGBoFdWW3HWmBxJsk0pfvNM/p/qa82rWOGTwI= -cloud.google.com/go/kms v1.15.7 h1:7caV9K3yIxvlQPAcaFffhlT7d1qpxjB1wHBtjWa13SM= -cloud.google.com/go/kms v1.15.7/go.mod h1:ub54lbsa6tDkUwnu4W7Yt1aAIFLnspgh0kPGToDukeI= -cloud.google.com/go/logging v1.9.0 h1:iEIOXFO9EmSiTjDmfpbRjOxECO7R8C7b8IXUGOj7xZw= -cloud.google.com/go/logging v1.9.0/go.mod h1:1Io0vnZv4onoUnsVUQY3HZ3Igb1nBchky0A0y7BBBhE= -cloud.google.com/go/longrunning v0.5.5 h1:GOE6pZFdSrTb4KAiKnXsJBtlE6mEyaW44oKyMILWnOg= -cloud.google.com/go/longrunning v0.5.5/go.mod h1:WV2LAxD8/rg5Z1cNW6FJ/ZpX4E4VnDnoTk0yawPBB7s= -cloud.google.com/go/monitoring v1.18.0 h1:NfkDLQDG2UR3WYZVQE8kwSbUIEyIqJUPl+aOQdFH1T4= -cloud.google.com/go/monitoring v1.18.0/go.mod h1:c92vVBCeq/OB4Ioyo+NbN2U7tlg5ZH41PZcdvfc+Lcg= +cloud.google.com/go/iam v1.1.8 h1:r7umDwhj+BQyz0ScZMp4QrGXjSTI3ZINnpgU2nlB/K0= +cloud.google.com/go/iam v1.1.8/go.mod h1:GvE6lyMmfxXauzNq8NbgJbeVQNspG+tcdL/W8QO1+zE= +cloud.google.com/go/kms v1.17.1 h1:5k0wXqkxL+YcXd4viQzTqCgzzVKKxzgrK+rCZJytEQs= +cloud.google.com/go/kms v1.17.1/go.mod h1:DCMnCF/apA6fZk5Cj4XsD979OyHAqFasPuA5Sd0kGlQ= +cloud.google.com/go/logging v1.10.0 h1:f+ZXMqyrSJ5vZ5pE/zr0xC8y/M9BLNzQeLBwfeZ+wY4= +cloud.google.com/go/logging v1.10.0/go.mod h1:EHOwcxlltJrYGqMGfghSet736KR3hX1MAj614mrMk9I= +cloud.google.com/go/longrunning v0.5.7 h1:WLbHekDbjK1fVFD3ibpFFVoyizlLRl73I7YKuAKilhU= +cloud.google.com/go/longrunning v0.5.7/go.mod h1:8GClkudohy1Fxm3owmBGid8W0pSgodEMwEAztp38Xng= +cloud.google.com/go/monitoring v1.19.0 h1:NCXf8hfQi+Kmr56QJezXRZ6GPb80ZI7El1XztyUuLQI= +cloud.google.com/go/monitoring v1.19.0/go.mod h1:25IeMR5cQ5BoZ8j1eogHE5VPJLlReQ7zFp5OiLgiGZw= cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU= -cloud.google.com/go/pubsub v1.36.1 h1:dfEPuGCHGbWUhaMCTHUFjfroILEkx55iUmKBZTP5f+Y= -cloud.google.com/go/pubsub v1.36.1/go.mod h1:iYjCa9EzWOoBiTdd4ps7QoMtMln5NwaZQpK1hbRfBDE= +cloud.google.com/go/pubsub v1.38.0 h1:J1OT7h51ifATIedjqk/uBNPh+1hkvUaH4VKbz4UuAsc= +cloud.google.com/go/pubsub v1.38.0/go.mod h1:IPMJSWSus/cu57UyR01Jqa/bNOQA+XnPF6Z4dKW4fAA= cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs= cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= -cloud.google.com/go/storage v1.37.0 h1:WI8CsaFO8Q9KjPVtsZ5Cmi0dXV25zMoX0FklT7c3Jm4= -cloud.google.com/go/storage v1.37.0/go.mod h1:i34TiT2IhiNDmcj65PqwCjcoUX7Z5pLzS8DEmoiFq1k= -cloud.google.com/go/trace v1.10.5 h1:0pr4lIKJ5XZFYD9GtxXEWr0KkVeigc3wlGpZco0X1oA= -cloud.google.com/go/trace v1.10.5/go.mod h1:9hjCV1nGBCtXbAE4YK7OqJ8pmPYSxPA0I67JwRd5s3M= +cloud.google.com/go/storage v1.40.0 h1:VEpDQV5CJxFmJ6ueWNsKxcr1QAYOXEgxDa+sBbJahPw= +cloud.google.com/go/storage v1.40.0/go.mod h1:Rrj7/hKlG87BLqDJYtwR0fbPld8uJPbQ2ucUMY7Ir0g= +cloud.google.com/go/trace v1.10.7 h1:gK8z2BIJQ3KIYGddw9RJLne5Fx0FEXkrEQzPaeEYVvk= +cloud.google.com/go/trace v1.10.7/go.mod h1:qk3eiKmZX0ar2dzIJN/3QhY2PIFh1eqcIdaN5uEjQPM= cuelabs.dev/go/oci/ociregistry v0.0.0-20231103182354-93e78c079a13 h1:zkiIe8AxZ/kDjqQN+mDKc5BxoVJOqioSdqApjc+eB1I= cuelabs.dev/go/oci/ociregistry v0.0.0-20231103182354-93e78c079a13/go.mod h1:XGKYSMtsJWfqQYPwq51ZygxAPqpEUj/9bdg16iDPTAA= cuelang.org/go v0.7.0 h1:gMztinxuKfJwMIxtboFsNc6s8AxwJGgsJV+3CuLffHI= @@ -172,6 +174,8 @@ github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 h1:q4dksr6IC github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40/go.mod h1:Q7yQnSMnLvcXlZ8RV+jwz/6y1rQTqbX6C82SndT52Zs= github.com/apache/arrow/go/v14 v14.0.2 h1:N8OkaJEOfI3mEZt07BIkvo4sC6XDbL+48MBPWO5IONw= github.com/apache/arrow/go/v14 v14.0.2/go.mod h1:u3fgh3EdgN/YQ8cVQRguVW3R+seMybFg8QBQ5LU+eBY= +github.com/apache/arrow/go/v15 v15.0.2 h1:60IliRbiyTWCWjERBCkO1W4Qun9svcYoZrSLcyOsMLE= +github.com/apache/arrow/go/v15 v15.0.2/go.mod h1:DGXsR3ajT524njufqf95822i+KTh+yea1jass9YXgjA= github.com/apache/pulsar-client-go v0.12.0 h1:rrMlwpr6IgLRPXLRRh2vSlcw5tGV2PUSjZwmqgh2B2I= github.com/apache/pulsar-client-go v0.12.0/go.mod h1:dkutuH4oS2pXiGm+Ti7fQZ4MRjrMPZ8IJeEGAWMeckk= github.com/apache/thrift v0.0.0-20181112125854-24918abba929/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= @@ -188,6 +192,8 @@ github.com/aws/aws-lambda-go v1.46.0 h1:UWVnvh2h2gecOlFhHQfIPQcD8pL/f7pVCutmFl+o github.com/aws/aws-lambda-go v1.46.0/go.mod h1:dpMpZgvWx5vuQJfBt0zqBha60q7Dd7RfgJv23DymV8A= github.com/aws/aws-sdk-go v1.30.19/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0= github.com/aws/aws-sdk-go v1.42.37/go.mod h1:OGr6lGMAKGlG9CVrYnWYDKIyb829c6EVBRjxqjmPepc= +github.com/aws/aws-sdk-go v1.55.5 h1:KKUZBfBoyqy5d3swXyiC7Q76ic40rYcbqH7qjh59kzU= +github.com/aws/aws-sdk-go v1.55.5/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU= github.com/aws/aws-sdk-go-v2 v1.7.1/go.mod h1:L5LuPC1ZgDr2xQS7AmIec/Jlc7O/Y1u2KxJyNVab250= github.com/aws/aws-sdk-go-v2 v1.25.0 h1:sv7+1JVJxOu/dD/sz/csHX7jFqmP001TIY7aytBWDSQ= github.com/aws/aws-sdk-go-v2 v1.25.0/go.mod h1:G104G1Aho5WqF+SR3mDIobTABQzpYV0WxMsKxlMggOA= @@ -310,8 +316,8 @@ github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqy github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= -github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/logex v1.2.0/go.mod h1:9+9sk7u7pGNWYMkh0hdiL++6OeibzJccyQU4p4MedaY= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= @@ -360,14 +366,16 @@ github.com/couchbaselabs/gocaves/client v0.0.0-20230404095311-05e3ba4f0259 h1:2T github.com/couchbaselabs/gocaves/client v0.0.0-20230404095311-05e3ba4f0259/go.mod h1:AVekAZwIY2stsJOMWLAS/0uA/+qdp7pjO8EHnl61QkY= github.com/couchbaselabs/gocbconnstr/v2 v2.0.0-20240607131231-fb385523de28 h1:lhGOw8rNG6RAadmmaJAF3PJ7MNt7rFuWG7BHCYMgnGE= github.com/couchbaselabs/gocbconnstr/v2 v2.0.0-20240607131231-fb385523de28/go.mod h1:o7T431UOfFVHDNvMBUmUxpHnhivwv7BziUao/nMl81E= -github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= -github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4= +github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= github.com/danieljoos/wincred v1.2.0 h1:ozqKHaLK0W/ii4KVbbvluM91W2H3Sh0BncbUNPS7jLE= github.com/danieljoos/wincred v1.2.0/go.mod h1:FzQLLMKBFdvu+osBrnFODiv32YGwCfx0SkRa/eYHgec= +github.com/daulet/tokenizers v0.9.0 h1:PSjFUGeuhqb3C0GKP9hdvtHvJ6L1AZceV+0nYGACtCk= +github.com/daulet/tokenizers v0.9.0/go.mod h1:tGnMdZthXdcWY6DGD07IygpwJqiPvG85FQUnhs/wSCs= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -432,8 +440,8 @@ github.com/envoyproxy/go-control-plane v0.10.1/go.mod h1:AY7fTTXNdv/aJ2O5jwpxAPO github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/evanphx/json-patch v4.12.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= -github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= -github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= +github.com/fatih/color v1.17.0 h1:GlRw1BRJxkpqUCBKzKOw098ed57fEsKeNjpTe3cSjK4= +github.com/fatih/color v1.17.0/go.mod h1:YZ7TlrGPkiz6ku9fK3TLD/pl3CpsiFyu8N92HLgmosI= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= @@ -459,8 +467,8 @@ github.com/getkin/kin-openapi v0.76.0/go.mod h1:660oXbgy5JFMKreazJaQTw7o+X00qeSy github.com/getsentry/sentry-go v0.27.0 h1:Pv98CIbtB3LkMWmXi4Joa5OOcwbmnX88sF5qbK3r3Ps= github.com/getsentry/sentry-go v0.27.0/go.mod h1:lc76E2QywIyW8WuBnwl8Lc4bkmQH4+w1gwTf25trprY= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= -github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA= -github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og= +github.com/go-errors/errors v1.5.1 h1:ZwEMSLRCapFLflTpT7NKaAc7ukJ8ZPEjzlxt8rPN8bk= +github.com/go-errors/errors v1.5.1/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og= github.com/go-faker/faker/v4 v4.3.0 h1:UXOW7kn/Mwd0u6MR30JjUKVzguT20EB/hBOddAAO+DY= github.com/go-faker/faker/v4 v4.3.0/go.mod h1:F/bBy8GH9NxOxMInug5Gx4WYeG6fHJZ8Ol/dhcpRub4= github.com/go-faster/city v1.0.1 h1:4WAxSZ3V2Ws4QRDrscLEDcibJY8uf41H6AhXDrNDcGw= @@ -485,8 +493,8 @@ github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7 github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= -github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= @@ -524,8 +532,8 @@ github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei github.com/golang-sql/sqlexp v0.1.0/go.mod h1:J4ad9Vo8ZCWQ2GMrC4UCQy1JpCbwU9m3EOqtpKwwwHI= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/glog v1.2.0 h1:uCdmnmatrKCgMBlM4rMuJZWOkPDqdbZPnrMXDY4gI68= -github.com/golang/glog v1.2.0/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= +github.com/golang/glog v1.2.1 h1:OptwRhECazUx5ix5TTWC3EZhsZEHWcYWY4FQHTIubm4= +github.com/golang/glog v1.2.1/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -594,8 +602,8 @@ github.com/google/martian v2.1.0+incompatible h1:/CP5g8u/VJHijgedC/Legn3BAbAaWPg github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= -github.com/google/martian/v3 v3.3.2 h1:IqNFLAmvJOgVlpdEBiQbDc2EwKW77amAycfTuWKdfvw= -github.com/google/martian/v3 v3.3.2/go.mod h1:oBOf6HBosgwRXnUGWUB05QECsc6uvmMiJ3+6W4l/CUk= +github.com/google/martian/v3 v3.3.3 h1:DIhPTQrbPkgs2yJYdXU/eNACCG5DVQjySNRNlflZ9Fc= +github.com/google/martian/v3 v3.3.3/go.mod h1:iEPrYcgCF7jA9OtScMFQyAlZZ4YXTKEtJ1E6RWzmBA0= github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= @@ -624,8 +632,8 @@ github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfF github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= -github.com/googleapis/gax-go/v2 v2.12.0 h1:A+gCJKdRfqXkr+BIRGtZLibNXf0m1f9E4HG56etFpas= -github.com/googleapis/gax-go/v2 v2.12.0/go.mod h1:y+aIqrI5eb1YGMVJfuV3185Ts/D7qKpsEkdD5+I6QGU= +github.com/googleapis/gax-go/v2 v2.12.5 h1:8gw9KZK8TiVKB6q3zHY3SBzLnrGp6HQjyfYBYGmXdxA= +github.com/googleapis/gax-go/v2 v2.12.5/go.mod h1:BUDKcWo+RaKq5SC9vVYL0wLADa3VcfswbOMMRmB9H3E= github.com/googleapis/gnostic v0.5.1/go.mod h1:6U4PtQXGIEt/Z3h5MAT7FNofLnw9vXk2cUuW7uA/OeU= github.com/googleapis/gnostic v0.5.5/go.mod h1:7+EbHbldMins07ALC74bsA81Ovc97DwqyJO1AENw9kA= github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY= @@ -776,6 +784,7 @@ github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8Hm github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= @@ -796,6 +805,10 @@ github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/q github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU= github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= +github.com/knights-analytics/HuggingFaceModelDownloader v1.3.5 h1:AxngS8+MHZXp88Xyc1C9tF6UDTHchyMZgfGDz+g7gQU= +github.com/knights-analytics/HuggingFaceModelDownloader v1.3.5/go.mod h1:MM0048ktg6UVHmw5NGebHkLqfTPtGIyomnxBJNzlESk= +github.com/knights-analytics/hugot v0.1.7-0.20240823085553-7da587ad260a h1:vO4rw1coQI2l/Xvsimr2EHQHrNoir+QQGf8gKs0Mseg= +github.com/knights-analytics/hugot v0.1.7-0.20240823085553-7da587ad260a/go.mod h1:ks+WSLmlYgimSvA7gWE6xc/DR3G8TgbM9RUZG6T2Ccs= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8= @@ -870,9 +883,11 @@ github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0Gq github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0= github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/modocache/gover v0.0.0-20171022184752-b58185e213c5/go.mod h1:caMODM3PzxT8aQXRPkAt8xlV/e7d7w8GM5g0fa5F0D8= github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= @@ -1107,8 +1122,12 @@ github.com/twmb/franz-go v1.16.1 h1:rpWc7fB9jd7TgmCyfxzenBI+QbgS8ZfJOUQE+tzPtbE= github.com/twmb/franz-go v1.16.1/go.mod h1:/pER254UPPGp/4WfGqRi+SIRGE50RSQzVubQp6+N4FA= github.com/twmb/franz-go/pkg/kmsg v1.7.0 h1:a457IbvezYfA5UkiBvyV3zj0Is3y1i8EJgqjJYoij2E= github.com/twmb/franz-go/pkg/kmsg v1.7.0/go.mod h1:se9Mjdt0Nwzc9lnjJ0HyDtLyBnaBDAd7pCje47OhSyw= -github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho= -github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= +github.com/urfave/cli/v2 v2.27.4 h1:o1owoI+02Eb+K107p27wEX9Bb8eqIoZCfLXloLUSWJ8= +github.com/urfave/cli/v2 v2.27.4/go.mod h1:m4QzxcD2qpra4z7WhzEGn74WZLViBnMpb1ToCAKdGRQ= +github.com/viant/afs v1.25.1 h1:IPcqwzsPUaWqsSkQXoM1vXwQuRI6u7ZgqQHKQZ8Wxyg= +github.com/viant/afs v1.25.1/go.mod h1:rScbFd9LJPGTM8HOI8Kjwee0AZ+MZMupAvFpPg+Qdj4= +github.com/viant/afsc v1.9.3 h1:E/nt0fCYEqUxiCxexkMGIy2I/1To+Z1PvQhvrjUMjQY= +github.com/viant/afsc v1.9.3/go.mod h1:FA/xVjaMM10qGByabP8anTVMH6N4eUsAeWm5xcEZJJA= github.com/vmihailenco/msgpack/v5 v5.3.5/go.mod h1:7xyJ9e+0+9SaZT0Wt1RGleJXzli6Q/V5KbhBonMG9jc= github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IUPn0Bjt8= github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok= @@ -1138,8 +1157,10 @@ github.com/xitongsys/parquet-go-source v0.0.0-20190524061010-2b72cbee77d5/go.mod github.com/xitongsys/parquet-go-source v0.0.0-20200817004010-026bad9b25d0/go.mod h1:HYhIKsdns7xz80OgkbgJYrtQY7FjHWHKH6cvN7+czGE= github.com/xitongsys/parquet-go-source v0.0.0-20211228015320-b4f792c43cd0 h1:ti/bIIF7mKX56sp90ByfAsJRkkmEkY71PWavIG+BGL4= github.com/xitongsys/parquet-go-source v0.0.0-20211228015320-b4f792c43cd0/go.mod h1:qLb2Itmdcp7KPa5KZKvhE9U1q5bYSOmgeOckF/H2rQA= -github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= -github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= +github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= +github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= +github.com/yalue/onnxruntime_go v1.11.0 h1:aKH4yPIbqfcB3SfnQWq/WxzLelkyolntHnffL3eMBHY= +github.com/yalue/onnxruntime_go v1.11.0/go.mod h1:b4X26A8pekNb1ACJ58wAXgNKeUCGEAQ9dmACut9Sm/4= github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA= github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a h1:fZHgsYlfvtyqToslyjUt3VOPF4J7aK/3MPcK7xp3PDk= github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a/go.mod h1:ul22v+Nro/R083muKhosV54bj5niojjWZvU8xrevuH4= @@ -1154,8 +1175,8 @@ github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q= -go.einride.tech/aip v0.66.0 h1:XfV+NQX6L7EOYK11yoHHFtndeaWh3KbD9/cN/6iWEt8= -go.einride.tech/aip v0.66.0/go.mod h1:qAhMsfT7plxBX+Oy7Huol6YUvZ0ZzdUz26yZsQwfl1M= +go.einride.tech/aip v0.67.1 h1:d/4TW92OxXBngkSOwWS2CH5rez869KpKMaN44mdxkFI= +go.einride.tech/aip v0.67.1/go.mod h1:ZGX4/zKw8dcgzdLsrvpOOGxfxI2QSk12SlP7d6c0/XI= go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= go.etcd.io/bbolt v1.3.10 h1:+BqfJTcCzTItrop8mq/lbzL8wSGtj94UO/3U31shqG0= go.etcd.io/bbolt v1.3.10/go.mod h1:bK3UQLPJZly7IlNmV7uVHJDxfe5aK9Ll93e/74Y9oEQ= @@ -1181,8 +1202,8 @@ go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 h1:4Pp6oUg3+e/6M4C0A/3kJ2VYa++dsWVTtGgLVj5xtHg= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0/go.mod h1:Mjt1i1INqiaoZOMGR1RIUJN+i3ChKoFRqzrRQhlkbs0= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.47.0 h1:sv9kVfal0MK0wBMCOGr+HeJm9v803BkJxGrk2au7j08= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.47.0/go.mod h1:SK2UL73Zy1quvRPonmOmRDiWk1KBV3LyIeeIxcEApWw= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 h1:jq9TW8u3so/bN+JPT166wjOI6/vQPF6Xe7nMNIltagk= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0/go.mod h1:p8pYQP+m5XfbZm9fxtSKAbM6oIllS7s2AfxrChvc7iw= go.opentelemetry.io/otel v1.24.0 h1:0LAOdjNmQeSTzGBzduGe/rU4tZhMwL5rWgtp9Ku5Jfo= go.opentelemetry.io/otel v1.24.0/go.mod h1:W7b9Ozg4nkF5tWI5zsXkaKKDjdVjpD4oAt9Qi/MArHo= go.opentelemetry.io/otel/exporters/jaeger v1.17.0 h1:D7UpUy2Xc2wsi1Ras6V40q806WM07rqoCWzXu7Sqy+4= @@ -1254,8 +1275,8 @@ golang.org/x/crypto v0.5.0/go.mod h1:NK/OQwhpMQP3MwtdjgLlYHnH9ebylxKWv3e0fK+mkQU golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= golang.org/x/crypto v0.20.0/go.mod h1:Xwo95rrVNIoSMx9wa1JroENMToLWn3RNVrTBpLHgZPQ= -golang.org/x/crypto v0.25.0 h1:ypSNr+bnYL2YhwoMt2zPxHFmbAN1KZs/njMG3hxUp30= -golang.org/x/crypto v0.25.0/go.mod h1:T+wALwcMOSE0kXgUAnPAHqTLW+XHgcELELW8VaDgm/M= +golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= +golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -1270,8 +1291,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= -golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= +golang.org/x/exp v0.0.0-20240823005443-9b4947da3948 h1:kx6Ds3MlpiUHKj7syVnbp57++8WpuKPcR5yjLBjvLEA= +golang.org/x/exp v0.0.0-20240823005443-9b4947da3948/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ= golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= @@ -1306,8 +1327,8 @@ golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= -golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= +golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -1363,8 +1384,8 @@ golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= -golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys= -golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE= +golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= +golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -1377,8 +1398,8 @@ golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ golang.org/x/oauth2 v0.0.0-20210220000619-9bb904979d93/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210313182246-cd4f82c27b84/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.17.0 h1:6m3ZPmLEFdVxKKWnKq4VqZ60gutO35zm+zrAHVmHyDQ= -golang.org/x/oauth2 v0.17.0/go.mod h1:OzPDGQiuQMguemayvdylqddI7qcD9lnSDb+1FiwQ5HA= +golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA= +golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -1392,8 +1413,8 @@ golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= -golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -1483,8 +1504,8 @@ golang.org/x/term v0.4.0/go.mod h1:9P2UbLfCdcvo3p/nzKvsmas4TnlujnuoV9hGgYzW1lQ= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= -golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= -golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4= +golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= +golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -1500,8 +1521,8 @@ golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= -golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= +golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -1573,8 +1594,8 @@ golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= -golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= +golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24= +golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ= golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20190513163551-3ee3066db522/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -1612,8 +1633,8 @@ google.golang.org/api v0.36.0/go.mod h1:+z5ficQTmoYpPn8LCUNVpK5I7hwkpjbcgqA7I34q google.golang.org/api v0.40.0/go.mod h1:fYKFpnQN0DsDSKRVRcQSDQNtqWPfM9i+zNPxepjRCQ8= google.golang.org/api v0.41.0/go.mod h1:RkxM5lITDfTzmyKFPt+wGrCJbVfniCr2ool8kTBzRTU= google.golang.org/api v0.43.0/go.mod h1:nQsDGjRXMo4lvh5hP0TKqF244gqhGcr/YSIykhUk/94= -google.golang.org/api v0.162.0 h1:Vhs54HkaEpkMBdgGdOT2P6F0csGG/vxDS0hWHJzmmps= -google.golang.org/api v0.162.0/go.mod h1:6SulDkfoBIg4NFmCuZ39XeeAgSHCPecfSUuDyYlAHs0= +google.golang.org/api v0.184.0 h1:dmEdk6ZkJNXy1JcDhn/ou0ZUq7n9zropG2/tR4z+RDg= +google.golang.org/api v0.184.0/go.mod h1:CeDTtUEiYENAf8PPG5VZW2yNp2VM3VWbCeTioAZBTBA= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -1621,8 +1642,6 @@ google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM= -google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= @@ -1666,12 +1685,12 @@ google.golang.org/genproto v0.0.0-20210310155132-4ce2db91004e/go.mod h1:FWY/as6D google.golang.org/genproto v0.0.0-20210319143718-93e7006c17a6/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210402141018-6c239bbf2bb1/go.mod h1:9lPAdzaEmUacj36I+k7YKbEc5CXzPIeORRgDAUOu28A= google.golang.org/genproto v0.0.0-20210630183607-d20f26d13c79/go.mod h1:yiaVoXHpRzHGyxV3o4DktVWY4mSUErTKaeEOq6C3t3U= -google.golang.org/genproto v0.0.0-20240227224415-6ceb2ff114de h1:F6qOa9AZTYJXOUEr4jDysRDLrm4PHePlge4v4TGAlxY= -google.golang.org/genproto v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:VUhTRKeHn9wwcdrk73nvdC9gF178Tzhmt/qyaFcPLSo= -google.golang.org/genproto/googleapis/api v0.0.0-20240227224415-6ceb2ff114de h1:jFNzHPIeuzhdRwVhbZdiym9q0ory/xY3sA+v2wPg8I0= -google.golang.org/genproto/googleapis/api v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:5iCWqnniDlqZHrd3neWVTOwvh/v6s3232omMecelax8= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda h1:LI5DOvAxUPMv/50agcLLoo+AdWc1irS9Rzz4vPuD1V4= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY= +google.golang.org/genproto v0.0.0-20240604185151-ef581f913117 h1:HCZ6DlkKtCDAtD8ForECsY3tKuaR+p4R3grlK80uCCc= +google.golang.org/genproto v0.0.0-20240604185151-ef581f913117/go.mod h1:lesfX/+9iA+3OdqeCpoDddJaNxVB1AB6tD7EfqMmprc= +google.golang.org/genproto/googleapis/api v0.0.0-20240610135401-a8a62080eff3 h1:QW9+G6Fir4VcRXVH8x3LilNAb6cxBGLa6+GM4hRwexE= +google.golang.org/genproto/googleapis/api v0.0.0-20240610135401-a8a62080eff3/go.mod h1:kdrSS/OiLkPrNUpzD4aHgCq2rVuC/YRxok32HXZ4vRE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240610135401-a8a62080eff3 h1:9Xyg6I9IWQZhRVfCWjKK+l6kI0jHcPesVlMnT//aHNo= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240610135401-a8a62080eff3/go.mod h1:EfXuqaE1J41VCDicxHzUDm+8rk+7ZdXzHV0IhO/I6s0= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -1694,8 +1713,8 @@ google.golang.org/grpc v1.36.1/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAG google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= google.golang.org/grpc v1.43.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= -google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM= -google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA= +google.golang.org/grpc v1.64.0 h1:KH3VH9y/MgNQg1dE7b3XfVK0GsPSIzJwdF617gUSbvY= +google.golang.org/grpc v1.64.0/go.mod h1:oxjF8E3FBnjp+/gVFYdWacaLDx9na1aqy9oovLpxQYg= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= diff --git a/internal/impl/huggingface/integration_test.go b/internal/impl/huggingface/integration_test.go new file mode 100644 index 000000000..8d8b0a9cb --- /dev/null +++ b/internal/impl/huggingface/integration_test.go @@ -0,0 +1,414 @@ +//go:build huggingbento + +package huggingface + +import ( + "context" + _ "embed" + "encoding/json" + "fmt" + "os" + "strings" + "sync" + "testing" + "time" + + "github.com/knights-analytics/hugot" + "github.com/knights-analytics/hugot/pipelines" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + _ "github.com/warpstreamlabs/bento/public/components/io" + _ "github.com/warpstreamlabs/bento/public/components/pure" + "github.com/warpstreamlabs/bento/public/service" + ort "github.com/yalue/onnxruntime_go" +) + +//go:embed testdata/expected_token_classification.json +var tokenClassificationExpectedByte []byte + +//go:embed testdata/expected_feature_extraction.json +var featureExtractionExpectedByte []byte + +var ( + onnxRuntimeSession *hugot.Session + onnxLibPath string +) + +func setup(t *testing.T) { + var ( + ok bool + err error + ) + + onnxLibPath, ok = os.LookupEnv("ONNXRUNTIME_SHARED_LIB_PATH") + if !ok { + t.Error("Required environment variable 'ONNXRUNTIME_SHARED_LIB_PATH' was not set.") + } + + assert.Eventually(t, func() bool { + return !ort.IsInitialized() + }, time.Second*10, time.Millisecond*100) + + onnxRuntimeSession, err = globalSession.NewSession(onnxLibPath) + require.NoError(t, err) + require.NotNil(t, onnxRuntimeSession) +} + +func TestIntegration_TextClassifier(t *testing.T) { + setup(t) + + tmpDir := t.TempDir() + + modelName := "KnightsAnalytics/distilbert-base-uncased-finetuned-sst-2-english" + modelPath, err := onnxRuntimeSession.DownloadModel(modelName, tmpDir, hugot.NewDownloadOptions()) + require.NoError(t, err) + t.Logf("downloading to %v", modelPath) + defer t.Cleanup(func() { + assert.NoError(t, os.RemoveAll(tmpDir)) + }) + + template := fmt.Sprintf(` +nlp_classify_text: + pipeline_name: classify-incoming-data-1 + onnx_library_path: %s + model_path: %s +`, onnxLibPath, modelPath) + + b := service.NewStreamBuilder() + require.NoError(t, b.SetLoggerYAML("level: INFO")) + require.NoError(t, b.AddProcessorYAML(template)) + + outBatches := map[string]struct{}{} + var outMut sync.Mutex + handler := func(_ context.Context, mb service.MessageBatch) error { + outMut.Lock() + defer outMut.Unlock() + + outMsgs := []string{} + for _, m := range mb { + b, err := m.AsBytes() + assert.NoError(t, err) + outMsgs = append(outMsgs, string(b)) + } + + outBatches[strings.Join(outMsgs, ",")] = struct{}{} + return nil + } + require.NoError(t, b.AddBatchConsumerFunc(handler)) + + pushFn, err := b.AddBatchProducerFunc() + + strm, err := b.Build() + require.NoError(t, err) + + promptsBatch := [][]string{ + {"Bento boxes taste amazing!", "Meow meow meow... meow meow."}, + {"Why does the blobfish look so sad? :(", "Sir, are you aware of the magnificent octopus on your head?"}, + {"Streaming data is my favourite pastime.", "You are wearing a silly hat."}, + } + + wg := sync.WaitGroup{} + wg.Add(1) + go func() { + defer wg.Done() + + ctx, done := context.WithTimeout(context.Background(), time.Second*60) + defer done() + + for _, prompts := range promptsBatch { + batch := make(service.MessageBatch, len(prompts)) + for i, prompt := range prompts { + batch[i] = service.NewMessage([]byte(prompt)) + } + require.NoError(t, pushFn(ctx, batch)) + } + + require.NoError(t, strm.StopWithin(time.Second*30)) + }() + + require.NoError(t, strm.Run(context.Background())) + wg.Wait() + + outMut.Lock() + assert.Equal(t, map[string]struct{}{ + `[{"Label":"POSITIVE","Score":0.999869}],[{"Label":"POSITIVE","Score":0.9992634}]`: {}, + `[{"Label":"NEGATIVE","Score":0.9996588}],[{"Label":"POSITIVE","Score":0.9908547}]`: {}, + `[{"Label":"POSITIVE","Score":0.9811118}],[{"Label":"NEGATIVE","Score":0.9700846}]`: {}, + }, outBatches) + outMut.Unlock() + +} + +func TestIntegration_TokenClassifier(t *testing.T) { + setup(t) + + tmpDir := t.TempDir() + + modelName := "KnightsAnalytics/distilbert-NER" + modelPath, err := onnxRuntimeSession.DownloadModel(modelName, tmpDir, hugot.NewDownloadOptions()) + require.NoError(t, err) + + defer t.Cleanup(func() { + assert.NoError(t, os.RemoveAll(tmpDir)) + }) + + template := fmt.Sprintf(` +nlp_classify_tokens: + pipeline_name: classify-tokens + onnx_library_path: %s + model_path: %s +`, onnxLibPath, modelPath) + + b := service.NewStreamBuilder() + require.NoError(t, b.SetLoggerYAML("level: INFO")) + require.NoError(t, b.AddProcessorYAML(template)) + + var outBatches [][]string + var outMut sync.Mutex + handler := func(_ context.Context, mb service.MessageBatch) error { + outMut.Lock() + defer outMut.Unlock() + + outMsgs := []string{} + for _, m := range mb { + b, err := m.AsBytes() + assert.NoError(t, err) + outMsgs = append(outMsgs, string(b)) + } + outBatches = append(outBatches, outMsgs) + return nil + } + require.NoError(t, b.AddBatchConsumerFunc(handler)) + + pushFn, err := b.AddBatchProducerFunc() + + strm, err := b.Build() + require.NoError(t, err) + + promptsBatch := [][]string{ + {"Japanese Bento boxes taste amazing!", "My name is Wolfgang and I live in Berlin."}, + {"WarpStream Labs have a great offering!", "An Italian man went to Malta..."}, + {"NVIDIA corporation was valued higher than Apple!?", "My silly hat is from Hatfield."}, + } + + wg := sync.WaitGroup{} + wg.Add(1) + go func() { + defer wg.Done() + + ctx, done := context.WithTimeout(context.Background(), time.Second*60) + defer done() + + for _, prompts := range promptsBatch { + batch := make(service.MessageBatch, len(prompts)) + for i, prompt := range prompts { + batch[i] = service.NewMessage([]byte(prompt)) + } + require.NoError(t, pushFn(ctx, batch)) + } + + require.NoError(t, strm.StopWithin(time.Second*30)) + }() + + require.NoError(t, strm.Run(context.Background())) + wg.Wait() + + type Tokens struct { + Entities []pipelines.Entity `json:"entities"` + } + + var expectedResults map[string]Tokens + err = json.Unmarshal(tokenClassificationExpectedByte, &expectedResults) + require.NoError(t, err) + + outMut.Lock() + for batch, prompts := range promptsBatch { + for msg, prompt := range prompts { + var actualEntity []pipelines.Entity + err = json.Unmarshal([]byte(outBatches[batch][msg]), &actualEntity) + require.NoError(t, err) + assert.Equal(t, expectedResults[prompt].Entities, actualEntity) + } + + } + outMut.Unlock() +} + +func TestIntegration_FeatureExtractor(t *testing.T) { + setup(t) + + tmpDir := t.TempDir() + + modelName := "sentence-transformers/all-MiniLM-L6-v2" + + modelPath, err := onnxRuntimeSession.DownloadModel(modelName, tmpDir, hugot.NewDownloadOptions()) + require.NoError(t, err) + + defer t.Cleanup(func() { + assert.NoError(t, os.RemoveAll(tmpDir)) + }) + + template := fmt.Sprintf(` +nlp_extract_features: + pipeline_name: classify-incoming-data-1 + onnx_library_path: %s + model_path: %s +`, onnxLibPath, modelPath) + + b := service.NewStreamBuilder() + require.NoError(t, b.SetLoggerYAML("level: INFO")) + require.NoError(t, b.AddProcessorYAML(template)) + + var outBatches [][]string + var outMut sync.Mutex + handler := func(_ context.Context, mb service.MessageBatch) error { + outMut.Lock() + defer outMut.Unlock() + + outMsgs := []string{} + for _, m := range mb { + b, err := m.AsBytes() + assert.NoError(t, err) + outMsgs = append(outMsgs, string(b)) + } + outBatches = append(outBatches, outMsgs) + return nil + } + require.NoError(t, b.AddBatchConsumerFunc(handler)) + + pushFn, err := b.AddBatchProducerFunc() + + strm, err := b.Build() + require.NoError(t, err) + + promptsBatch := [][]string{ + {"Bento boxes taste amazing!", "Meow meow meow... meow meow."}, + {"Streaming data is my favourite pastime.", "You are wearing a silly hat."}, + } + + wg := sync.WaitGroup{} + wg.Add(1) + go func() { + defer wg.Done() + + ctx, done := context.WithTimeout(context.Background(), time.Second*60) + defer done() + + for _, prompts := range promptsBatch { + batch := make(service.MessageBatch, len(prompts)) + for i, prompt := range prompts { + batch[i] = service.NewMessage([]byte(prompt)) + } + require.NoError(t, pushFn(ctx, batch)) + } + + require.NoError(t, strm.StopWithin(time.Second*30)) + }() + + require.NoError(t, strm.Run(context.Background())) + wg.Wait() + + var expectedResults map[string][]float64 + err = json.Unmarshal(featureExtractionExpectedByte, &expectedResults) + require.NoError(t, err) + + outMut.Lock() + for batch, prompts := range promptsBatch { + for msg, prompt := range prompts { + var actualEntity []float64 + err = json.Unmarshal([]byte(outBatches[batch][msg]), &actualEntity) + require.NoError(t, err) + assert.Equal(t, expectedResults[prompt], actualEntity) + } + + } + outMut.Unlock() + +} + +func TestIntegration_TextClassifier_Download(t *testing.T) { + setup(t) + + tmpDir := t.TempDir() + + modelName := "KnightsAnalytics/distilbert-base-uncased-finetuned-sst-2-english" + + defer t.Cleanup(func() { + assert.NoError(t, os.RemoveAll(tmpDir)) + }) + + template := fmt.Sprintf(` +nlp_classify_text: + pipeline_name: classify-incoming-data-1 + onnx_library_path: %s + model_path: %s + enable_model_download: true + model_download_options: + model_repository: %s +`, onnxLibPath, tmpDir, modelName) + + b := service.NewStreamBuilder() + require.NoError(t, b.SetLoggerYAML("level: DEBUG")) + require.NoError(t, b.AddProcessorYAML(template)) + + outBatches := map[string]struct{}{} + var outMut sync.Mutex + handler := func(_ context.Context, mb service.MessageBatch) error { + outMut.Lock() + defer outMut.Unlock() + + outMsgs := []string{} + for _, m := range mb { + b, err := m.AsBytes() + assert.NoError(t, err) + outMsgs = append(outMsgs, string(b)) + } + + outBatches[strings.Join(outMsgs, ",")] = struct{}{} + return nil + } + require.NoError(t, b.AddBatchConsumerFunc(handler)) + + pushFn, err := b.AddBatchProducerFunc() + + strm, err := b.Build() + require.NoError(t, err) + + promptsBatch := [][]string{ + {"Bento boxes taste amazing!", "Meow meow meow... meow meow."}, + {"Why does the blobfish look so sad? :(", "Sir, are you aware of the magnificent octopus on your head?"}, + {"Streaming data is my favourite pastime.", "You are wearing a silly hat."}, + } + + wg := sync.WaitGroup{} + wg.Add(1) + go func() { + defer wg.Done() + time.Sleep(60 * time.Second) + ctx, done := context.WithTimeout(context.Background(), time.Second*60) + defer done() + + for _, prompts := range promptsBatch { + batch := make(service.MessageBatch, len(prompts)) + for i, prompt := range prompts { + batch[i] = service.NewMessage([]byte(prompt)) + } + require.NoError(t, pushFn(ctx, batch)) + } + + require.NoError(t, strm.StopWithin(time.Second*30)) + }() + + require.NoError(t, strm.Run(context.Background())) + + wg.Wait() + + outMut.Lock() + assert.Equal(t, map[string]struct{}{ + `[{"Label":"POSITIVE","Score":0.999869}],[{"Label":"POSITIVE","Score":0.9992634}]`: {}, + `[{"Label":"NEGATIVE","Score":0.9996588}],[{"Label":"POSITIVE","Score":0.9908547}]`: {}, + `[{"Label":"POSITIVE","Score":0.9811118}],[{"Label":"NEGATIVE","Score":0.9700846}]`: {}, + }, outBatches) + outMut.Unlock() + +} diff --git a/internal/impl/huggingface/processor.go b/internal/impl/huggingface/processor.go new file mode 100644 index 000000000..f90bfcdad --- /dev/null +++ b/internal/impl/huggingface/processor.go @@ -0,0 +1,199 @@ +//go:build huggingbento + +package huggingface + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/knights-analytics/hugot" + "github.com/knights-analytics/hugot/pipelines" + + "github.com/warpstreamlabs/bento/public/service" +) + +var description = `This component uses [Hugot](https://github.com/knights-analytics/hugot), a library that provides an interface for running [Open Neural Network Exchange (ONNX) models](https://onnx.ai/onnx/intro/) and transformer pipelines, with a focus on NLP tasks. + +Currently, [HuggingBento only implements](https://github.com/knights-analytics/hugot/tree/main?tab=readme-ov-file#implemented-pipelines): + +- [featureExtraction](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.FeatureExtractionPipeline) +- [textClassification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TextClassificationPipeline) +- [tokenClassification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TokenClassificationPipeline) + +### What is a pipeline? +From [HuggingFace docs](https://huggingface.co/docs/transformers/en/main_classes/pipelines): +> A pipeline in 🤗 Transformers is an abstraction referring to a series of steps that are executed in a specific order to preprocess and transform data and return a prediction from a model. Some example stages found in a pipeline might be data preprocessing, feature extraction, and normalization. + +:::warning +While, only models in [ONNX](https://onnx.ai/) format are supported, exporting existing formats to ONNX is both possible and straightforward in most standard ML libraries. For more on this, check out the [ONNX conversion docs](https://onnx.ai/onnx/intro/converters.html). +Otherwise, check out using [HuggingFace Optimum](https://huggingface.co/docs/optimum/en/exporters/onnx/usage_guides/export_a_model) for easy model conversion. +::: +` + +func hugotConfigSpec() *service.ConfigSpec { + spec := service.NewConfigSpec(). + Beta(). + Categories("Machine Learning", "NLP"). + Version("v1.3.0 (huggingbento)"). + Fields(hugotConfigFields()...) + + return spec +} + +func hugotConfigFields() []*service.ConfigField { + return []*service.ConfigField{ + service.NewStringField("pipeline_name"). + Description("Name of the pipeline. Defaults to uuid_v4() if not set"). + Optional(), + service.NewStringField("model_path"). + Description("Path to the ONNX model directory. If `enable_model_download` is `true`, the model will be downloaded here."). + Example("/path/to/models/my_model.onnx"). + Default("/model_repository"), + service.NewStringField("onnx_library_path"). + Description("The location of the ONNX Runtime dynamic library."). + Default("/usr/lib/onnxruntime.so"). + Advanced(), + service.NewStringField("onnx_filename"). + Description("The filename of the model to run. Only necessary to specify when multiple .onnx files are present."). + Example("model.onnx"). + Default(""). + Advanced(), + service.NewBoolField("enable_model_download"). + Description("If enabled, attempts to download an ONNX Runtime compatible model from HuggingFace specified in `model_name`."). + Default(false). + Advanced(), + service.NewObjectField("model_download_options", + service.NewStringField("model_repository"). + Description("The name of the huggingface model repository."). + Examples( + "KnightsAnalytics/distilbert-NER", + "KnightsAnalytics/distilbert-base-uncased-finetuned-sst-2-english", + "sentence-transformers/all-MiniLM-L6-v2", + ). + Default(""). + Advanced(), + ), + } +} + +//------------------------------------------------------------------------------ + +type pipelineProcessor struct { + log *service.Logger + + session *hugot.Session + pipeline pipelines.Pipeline + + pipelineName string + modelPath string + modelName string + onnxFilename string + + metaOutputType string + metaOutputLayerDim []int64 + + closeOnce sync.Once +} + +func newPipelineProcessor(conf *service.ParsedConfig, mgr *service.Resources) (*pipelineProcessor, error) { + p := &pipelineProcessor{log: mgr.Logger()} + + var modelRepo, modelPath, pipelineName string + var onnxLibraryPath, onnxFileName string + var shouldDownload bool + + var err error + + if onnxLibraryPath, err = conf.FieldString("onnx_library_path"); err != nil { + return nil, err + } + + if p.session, err = globalSession.NewSession(onnxLibraryPath); err != nil { + return nil, err + } + + if modelPath, err = conf.FieldString("model_path"); err != nil { + return nil, err + } + + if pipelineName, err = conf.FieldString("pipeline_name"); err != nil { + return nil, err + } + + if onnxFileName, err = conf.FieldString("onnx_filename"); err != nil { + return nil, err + } + + if shouldDownload, err = conf.FieldBool("enable_model_download"); err != nil { + return nil, err + } + + if shouldDownload { + opts := hugot.NewDownloadOptions() + opts.Verbose = false + + if modelRepo, err = conf.FieldString("model_download_options", "model_repository"); err != nil { + return nil, err + } + + start := time.Now() + if path, err := p.session.DownloadModel(modelRepo, modelPath, opts); err != nil { + return nil, fmt.Errorf("failed to download model %s from HuggingFace to %s: %w", modelRepo, modelPath, err) + } else { + modelPath = path + } + p.log.With("model_repository", modelRepo).Infof("Completed download (took %d ms)", time.Since(start).Milliseconds()) + } + + p.onnxFilename = onnxFileName + p.modelPath = modelPath + p.pipelineName = pipelineName + p.modelName = modelRepo + + return p, nil +} + +//------------------------------------------------------------------------------ + +func (p *pipelineProcessor) ProcessBatch(ctx context.Context, batch service.MessageBatch) ([]service.MessageBatch, error) { + messages := make([]string, len(batch)) + + batch = batch.Copy() + for i, msg := range batch { + msgBytes, err := msg.AsBytes() + if err != nil { + msg.SetError(err) + continue + } + messages[i] = string(msgBytes) + p.log.Debug(string(msgBytes)) + } + + results, err := p.pipeline.Run(messages) + if err != nil { + return nil, err + } + + resultsOut := results.GetOutput() + for i, msg := range batch { + if msg.GetError() != nil { + continue + } + + msg.SetStructuredMut(resultsOut[i]) + msg.MetaSetMut("pipeline_name", p.pipelineName) + msg.MetaSetMut("output_type", p.metaOutputType) + msg.MetaSetMut("output_shape", p.metaOutputLayerDim) + } + + return []service.MessageBatch{batch}, nil +} + +func (p *pipelineProcessor) Close(context.Context) error { + p.closeOnce.Do(func() { + globalSession.Destroy() + }) + return nil +} diff --git a/internal/impl/huggingface/processor_feature_extraction.go b/internal/impl/huggingface/processor_feature_extraction.go new file mode 100644 index 000000000..e00601256 --- /dev/null +++ b/internal/impl/huggingface/processor_feature_extraction.go @@ -0,0 +1,85 @@ +//go:build huggingbento + +package huggingface + +import ( + "github.com/knights-analytics/hugot" + "github.com/knights-analytics/hugot/pipelines" + + "github.com/warpstreamlabs/bento/public/service" +) + +func hugotFeatureExtractionConfigSpec() *service.ConfigSpec { + featureExtractionDescription := "### Feature Extraction" + "\n" + + "Feature extraction is the task of extracting features learnt in a model." + + "This processor runs a feature extraction model against batches of text data, returning a model's multidimensional representation of said features" + + "in tensor/float64 format." + "\n" + description + + spec := hugotConfigSpec(). + Summary("Performs feature extraction using a Hugging Face 🤗 NLP pipeline with an ONNX Runtime model."). + Description(featureExtractionDescription). + Field(service.NewBoolField("normalization"). + Description("Whether to apply normalization in the feature extraction pipeline."). + Default(false)) + + return spec +} + +func init() { + err := service.RegisterBatchProcessor("nlp_extract_features", hugotFeatureExtractionConfigSpec(), newFeatureExtractionPipeline) + if err != nil { + panic(err) + } +} + +//------------------------------------------------------------------------------ + +func getFeatureExtractionOptions(conf *service.ParsedConfig) ([]pipelines.PipelineOption[*pipelines.FeatureExtractionPipeline], error) { + var options []pipelines.PipelineOption[*pipelines.FeatureExtractionPipeline] + + normalization, err := conf.FieldBool("normalization") + if err != nil { + return nil, err + } + if normalization { + options = append(options, pipelines.WithNormalization()) + } + + return options, nil +} + +//------------------------------------------------------------------------------ + +func newFeatureExtractionPipeline(conf *service.ParsedConfig, mgr *service.Resources) (service.BatchProcessor, error) { + p, err := newPipelineProcessor(conf, mgr) + if err != nil { + return nil, err + } + + opts, err := getFeatureExtractionOptions(conf) + if err != nil { + return nil, err + } + + cfg := hugot.FeatureExtractionConfig{ + Name: p.pipelineName, + OnnxFilename: p.onnxFilename, + ModelPath: p.modelPath, + Options: opts, + } + + if p.pipeline, err = hugot.NewPipeline(p.session, cfg); err != nil { + return nil, err + } + + if err := p.pipeline.Validate(); err != nil { + return nil, err + } + + if md := p.pipeline.GetMetadata().OutputsInfo; len(md) > 0 { + p.metaOutputType = md[0].Name + p.metaOutputLayerDim = md[0].Dimensions + } + + return p, nil +} diff --git a/internal/impl/huggingface/processor_text_classification.go b/internal/impl/huggingface/processor_text_classification.go new file mode 100644 index 000000000..f1c7e7720 --- /dev/null +++ b/internal/impl/huggingface/processor_text_classification.go @@ -0,0 +1,141 @@ +//go:build huggingbento + +package huggingface + +import ( + "github.com/daulet/tokenizers" + "github.com/knights-analytics/hugot" + "github.com/knights-analytics/hugot/pipelines" + + "github.com/warpstreamlabs/bento/public/service" +) + +func hugotTextClassificationConfigSpec() *service.ConfigSpec { + textClassificationDescription := "### Text Classification" + "\n" + + "Text Classification is the task of assigning a label or class to a given text." + + "Some use cases are sentiment analysis, natural language inference, and assessing grammatical correctness." + "\n" + + "This processor runs text-classification inference against batches of text data, returning labelled classification corresponding to each input." + "\n" + + description + + spec := hugotConfigSpec(). + Summary("Performs text classification using a Hugging Face 🤗 NLP pipeline with an ONNX Runtime model."). + Description(textClassificationDescription). + Field(service.NewStringEnumField("aggregation_function", + "SOFTMAX", + "SIGMOID", + ).Description("The aggregation function to use for the text classification pipeline.").Default("SOFTMAX")). + Field(service.NewStringEnumField("problem_type", + "singleLabel", + "multiLabel", + ).Description("The problem type for the text classification pipeline.").Default("singleLabel")). + Example("Emotion Scoring (Local Model)", "Here, we load the [Cohee/distilbert-base-uncased-go-emotions-onnx](https://huggingface.co/Cohee/distilbert-base-uncased-go-emotions-onnx) model from the local directory at `models/coheedistilbert_base_uncased_go_emotions_onnx`."+ + "The processor returns a single-label output with the highest emotion score for the text. ", + ` +pipeline: + processors: + - nlp_classify_text: + pipeline_name: classify-incoming-data + model_path: "models/coheedistilbert_base_uncased_go_emotions_onnx" + +# In: "I'm super excited for my Bento box!" +# Out: [{"Label":"excitement","Score":0.34134513}] +`).Example("Sentiment Analysis (Downloaded Model)", "Here, we retrieve the [KnightsAnalytics/distilbert-base-uncased-finetuned-sst-2-english(https://huggingface.co/KnightsAnalytics/distilbert-base-uncased-finetuned-sst-2-english) model from HuggingFace and store it in a `./models` directory."+ + "The processor returns a multi-label output indicating showing a `POSITIVE` and `NEGATIVE` score some input text-data.", + ` +pipeline: + processors: + - nlp_classify_text: + pipeline_name: classify-multi-label + model_path: "./models" + enable_model_download: true + model_download_options: + model_repository: "KnightsAnalytics/distilbert-base-uncased-finetuned-sst-2-english" + + +# In: "This meal tastes like old boots." +# Out: [{"Label":"NEGATIVE","Score":0.9977291},{"Label":"POSITIVE","Score":0.0022708932}] +`) + + return spec +} + +func init() { + err := service.RegisterBatchProcessor("nlp_classify_text", hugotTextClassificationConfigSpec(), newTextClassificationPipeline) + if err != nil { + panic(err) + } +} + +//------------------------------------------------------------------------------ + +func getTextClassificationOptions(conf *service.ParsedConfig) ([]pipelines.PipelineOption[*pipelines.TextClassificationPipeline], error) { + var options []pipelines.PipelineOption[*pipelines.TextClassificationPipeline] + + aggregationFunction, err := conf.FieldString("aggregation_function") + if err != nil { + return nil, err + } + switch aggregationFunction { + case "SOFTMAX": + options = append(options, pipelines.WithSoftmax()) + case "SIGMOID": + options = append(options, pipelines.WithSigmoid()) + } + + problemType, err := conf.FieldString("problem_type") + if err != nil { + return nil, err + } + switch problemType { + case "singleLabel": + options = append(options, pipelines.WithSingleLabel()) + case "multiLabel": + options = append(options, pipelines.WithMultiLabel()) + } + + return options, nil +} + +//------------------------------------------------------------------------------ + +func newTextClassificationPipeline(conf *service.ParsedConfig, mgr *service.Resources) (service.BatchProcessor, error) { + p, err := newPipelineProcessor(conf, mgr) + if err != nil { + return nil, err + } + + opts, err := getTextClassificationOptions(conf) + if err != nil { + return nil, err + } + + cfg := hugot.TextClassificationConfig{ + Name: p.pipelineName, + OnnxFilename: p.onnxFilename, + ModelPath: p.modelPath, + Options: opts, + } + + pipeline, err := hugot.NewPipeline(p.session, cfg) + if err != nil { + return nil, err + } + + pipeline.TokenizerOptions = []tokenizers.EncodeOption{ + tokenizers.WithReturnAttentionMask(), + tokenizers.WithReturnTypeIDs(), + } + + p.pipeline = pipeline + + if err := p.pipeline.Validate(); err != nil { + return nil, err + } + + if md := p.pipeline.GetMetadata().OutputsInfo; len(md) > 0 { + p.metaOutputType = md[0].Name + p.metaOutputLayerDim = md[0].Dimensions + } + + return p, nil +} diff --git a/internal/impl/huggingface/processor_token_classifier.go b/internal/impl/huggingface/processor_token_classifier.go new file mode 100644 index 000000000..c62d18a74 --- /dev/null +++ b/internal/impl/huggingface/processor_token_classifier.go @@ -0,0 +1,101 @@ +//go:build huggingbento + +package huggingface + +import ( + "github.com/knights-analytics/hugot" + "github.com/knights-analytics/hugot/pipelines" + + "github.com/warpstreamlabs/bento/public/service" +) + +func hugotTokenClassificationConfigSpec() *service.ConfigSpec { + tokenClassificaitionDescription := "### Token Classification" + "\n" + + "Token classification assigns a label to individual tokens in a sentence." + + "This processor runs token classification inference against batches of text data, returning a set of Entities classification corresponding to each input." + "\n" + + description + + spec := hugotConfigSpec(). + Summary("Performs token classification using a Hugging Face 🤗 NLP pipeline with an ONNX Runtime model."). + Description(tokenClassificaitionDescription). + Field(service.NewStringEnumField("aggregation_strategy", + "SIMPLE", + "NONE", + ).Description("The aggregation strategy to use for the token classification pipeline.").Default("SIMPLE")). + Field(service.NewStringListField("ignore_labels"). + Description("Labels to ignore in the token classification pipeline."). + Default([]string{}). + Example([]string{"O", "MISC"})) + + return spec +} + +func init() { + err := service.RegisterBatchProcessor("nlp_classify_tokens", hugotTokenClassificationConfigSpec(), newTokenClassificationPipeline) + if err != nil { + panic(err) + } +} + +//------------------------------------------------------------------------------ + +func getTokenClassificationOptions(conf *service.ParsedConfig) ([]pipelines.PipelineOption[*pipelines.TokenClassificationPipeline], error) { + var options []pipelines.PipelineOption[*pipelines.TokenClassificationPipeline] + + aggregationStrategy, err := conf.FieldString("aggregation_strategy") + if err != nil { + return nil, err + } + switch aggregationStrategy { + case "SIMPLE": + options = append(options, pipelines.WithSimpleAggregation()) + case "NONE": + options = append(options, pipelines.WithoutAggregation()) + } + + ignoreLabels, err := conf.FieldStringList("ignore_labels") + if err != nil { + return nil, err + } + if len(ignoreLabels) > 0 { + options = append(options, pipelines.WithIgnoreLabels(ignoreLabels)) + } + + return options, nil +} + +//------------------------------------------------------------------------------ + +func newTokenClassificationPipeline(conf *service.ParsedConfig, mgr *service.Resources) (service.BatchProcessor, error) { + p, err := newPipelineProcessor(conf, mgr) + if err != nil { + return nil, err + } + + opts, err := getTokenClassificationOptions(conf) + if err != nil { + return nil, err + } + + cfg := hugot.TokenClassificationConfig{ + Name: p.pipelineName, + OnnxFilename: p.onnxFilename, + ModelPath: p.modelPath, + Options: opts, + } + + if p.pipeline, err = hugot.NewPipeline(p.session, cfg); err != nil { + return nil, err + } + + if err := p.pipeline.Validate(); err != nil { + return nil, err + } + + if md := p.pipeline.GetMetadata().OutputsInfo; len(md) > 0 { + p.metaOutputType = md[0].Name + p.metaOutputLayerDim = md[0].Dimensions + } + + return p, nil +} diff --git a/internal/impl/huggingface/session.go b/internal/impl/huggingface/session.go new file mode 100644 index 000000000..f607c6882 --- /dev/null +++ b/internal/impl/huggingface/session.go @@ -0,0 +1,61 @@ +//go:build huggingbento + +package huggingface + +import ( + "os" + "sync" + + "github.com/knights-analytics/hugot" + "github.com/warpstreamlabs/bento/public/service" + ort "github.com/yalue/onnxruntime_go" +) + +type ortSession struct { + mut sync.Mutex + session *hugot.Session +} + +func (o *ortSession) Get() *hugot.Session { + o.mut.Lock() + session := o.session + o.mut.Unlock() + return session +} + +func (o *ortSession) Destroy() { + o.mut.Lock() + o.session.Destroy() + o.mut.Unlock() +} + +func (o *ortSession) NewSession(onnxLibraryPath string) (*hugot.Session, error) { + o.mut.Lock() + defer o.mut.Unlock() + + if o.session == nil || !ort.IsInitialized() { + session, err := hugot.NewSession(hugot.WithOnnxLibraryPath(onnxLibraryPath)) + if err != nil { + return nil, err + } + o.session = session + } + + return o.session, nil +} + +func (o *ortSession) DownloadModel(logger *service.Logger, modelName string, destination string, options hugot.DownloadOptions) (string, error) { + // Hacky workaround since the DownloadModel prints to stdout. Currently disabled by default. + tempStdOut := os.Stdout + os.Stdout = nil + defer func() { + os.Stdout = tempStdOut + }() + path, err := o.session.DownloadModel(modelName, destination, options) + return path, err +} + +var globalSession = &ortSession{ + session: nil, + mut: sync.Mutex{}, +} diff --git a/internal/impl/huggingface/testdata/expected_feature_extraction.json b/internal/impl/huggingface/testdata/expected_feature_extraction.json new file mode 100644 index 000000000..884f5f677 --- /dev/null +++ b/internal/impl/huggingface/testdata/expected_feature_extraction.json @@ -0,0 +1,1546 @@ +{ + "Bento boxes taste amazing!": [ + -0.16674368, + -0.22170256, + -0.032073833, + 0.062232673, + -0.6393418, + -0.41529745, + -0.03138932, + 0.20705068, + -0.49506932, + 0.069463655, + -0.3592735, + 0.03255941, + -0.27644458, + -0.1467912, + 0.038243134, + 0.15527101, + 0.43837914, + 0.119780734, + -0.09499901, + 0.094092324, + 0.4487812, + -0.38697535, + -0.1997513, + 0.2684958, + -0.492739, + 0.2402206, + 0.5627584, + 0.61998767, + -0.25938115, + -0.5066912, + 0.008063905, + 0.7685617, + 0.07672526, + 0.0026178686, + 0.1715703, + 0.26886424, + 0.26766738, + -0.63009685, + 0.39485517, + -0.23801173, + -0.0838076, + 0.20280364, + 0.51747936, + 0.10466704, + 0.16337383, + -0.29975954, + 0.1419708, + -0.29067484, + 0.26529664, + 0.12536862, + 0.08109855, + 0.2427063, + 0.0023970655, + 0.0015429687, + -0.46884748, + -0.08867278, + -0.19265768, + -0.00892048, + -0.016985036, + 0.3983293, + -0.27056304, + -0.14004576, + 0.12687787, + 0.13492206, + -0.23905203, + -0.06539294, + -0.34166092, + 0.36877194, + -0.5832123, + -0.2267201, + 0.024646105, + 0.4643832, + 0.26812306, + -0.34186894, + 0.19174151, + -0.2376416, + 0.64644295, + -0.6473305, + 0.030196887, + 0.3601797, + -0.41846853, + -0.22748251, + -0.103845835, + 0.32183498, + -0.6618641, + -0.15084122, + -0.005149115, + -0.03504438, + -0.2302905, + 0.032871, + 0.42974475, + 0.015748795, + -0.015370709, + -0.04818632, + -0.3440619, + 0.13814472, + -0.29499978, + -0.086257935, + -0.45581773, + 0.012275577, + -0.06497588, + 0.17677438, + 0.02567197, + -0.6297274, + 0.36313722, + -0.014464573, + 0.038183954, + 0.15337892, + 0.32538867, + 0.41940492, + -0.4376947, + -0.18514742, + 0.24952233, + -0.001611555, + -0.6536691, + -0.100449964, + 0.16476965, + 0.14918429, + -0.7100737, + -0.2590415, + 0.08327439, + -0.43489018, + 0.16093116, + -0.149174, + -0.34205946, + -0.08374312, + 0.26313332, + -7.702422e-33, + -0.26660588, + 0.0101380665, + -0.103695944, + 0.15765163, + 0.68238395, + -0.111157, + -0.27203953, + 0.23516642, + -0.744279, + 0.03366915, + -0.6201719, + -0.21144874, + -0.2469452, + -0.06811682, + -0.04117257, + -0.2822026, + -0.446668, + -0.114633195, + -0.2691595, + 0.16717935, + -0.62426543, + -0.13401562, + 0.27573884, + 0.55855787, + -0.1619603, + 0.40924233, + 0.058682676, + -0.25650746, + -0.046810176, + -0.013290341, + 0.33432987, + -0.07182218, + 0.11087857, + -0.025627526, + -0.3006606, + 0.08223866, + -0.22491667, + 0.01567217, + -0.16164789, + 0.51107574, + 0.3203218, + 0.19773242, + 0.24243248, + 0.21693498, + 0.11814863, + -0.32226655, + 0.0071426667, + 0.44161353, + 0.822067, + -0.33104208, + -0.39119047, + 0.101685874, + -0.12303032, + 0.5264542, + 0.2428021, + -0.5859884, + -0.068240345, + 0.109131396, + 0.64711654, + -0.12946345, + 0.4552416, + 0.21840084, + -0.11358249, + -0.5237139, + -0.40773958, + -0.028075447, + -0.3852664, + -0.27689642, + -0.025334714, + -0.2174746, + 0.4222873, + 0.15651636, + -0.45573097, + -0.45283097, + -0.2158657, + -0.15738218, + 0.3986361, + 0.40454683, + 0.64108014, + 0.038841534, + 0.582218, + 0.03996531, + 0.061820865, + 0.004375958, + -0.022676196, + 0.3921404, + -0.42066902, + -0.19456428, + 0.31804654, + 0.1524327, + -0.20739134, + -0.5800187, + 0.31754082, + -0.5299829, + -0.37728932, + 1.16622925e-32, + -0.0113471635, + -0.08749265, + -0.10831466, + 0.48088717, + 0.6272215, + -0.21538827, + -0.4176555, + 0.16880572, + 0.54301494, + -0.16009064, + 0.27779534, + 0.013296545, + 0.1308926, + 0.1459664, + -0.29040143, + 0.43241835, + 0.42292577, + 0.48408625, + 0.082588375, + -0.71435356, + -0.0035945997, + 0.37882206, + -0.2714593, + 0.020402472, + -0.51451033, + 0.41395938, + 0.044190854, + 0.012848194, + 0.139471, + 0.32290345, + 0.026390374, + -0.4546126, + -0.20931037, + 0.20798165, + 0.1300823, + 0.78901154, + -0.20459436, + 0.051370833, + -0.12323867, + 0.46287316, + -0.49919444, + 0.3613042, + -0.008434266, + 0.3403453, + -0.19255513, + -0.47482568, + -0.36974877, + -0.06530568, + -0.23206593, + 0.3379299, + -0.11984491, + -0.110151835, + -0.42148963, + 0.10687341, + -0.43120003, + -0.026225662, + -0.011928812, + 0.23813951, + 0.047913853, + -0.13350847, + -0.30116746, + 0.12522939, + -0.029835533, + -0.18940148, + 0.44147906, + 0.43276983, + 0.29991803, + -0.57708883, + 0.09322225, + -0.36197224, + 0.31117883, + 0.1745632, + -0.05847375, + -0.0416397, + 0.37314478, + -0.23587927, + -0.20085499, + -0.1422869, + 0.08861342, + 0.28698996, + -0.2616853, + 0.27976534, + 0.07660548, + 0.55734235, + 0.17506316, + -0.06322743, + 0.16131325, + -0.07347109, + -0.13727699, + 0.044825874, + 0.050779603, + 0.029284999, + 0.18112819, + 0.50608546, + 0.47963965, + -9.231383e-8, + -0.1435922, + -0.2327053, + -0.17848489, + 0.7023755, + 0.16067873, + -0.13795102, + 0.11347644, + -0.054581825, + -0.34490535, + -0.023213752, + 0.4304882, + 0.39270627, + -0.9024936, + 0.17111938, + 0.53707314, + 0.3465414, + 0.08680876, + 0.7857064, + -0.09797321, + 0.53779054, + 0.44141027, + 0.0052508153, + 0.42062104, + 0.06674033, + -0.4124903, + -0.22801633, + -0.19004771, + -0.342439, + 0.37016225, + 0.15461335, + 0.21963052, + 0.20577143, + 0.35898125, + 0.17983374, + -0.2378167, + -0.29997864, + -0.44055697, + 0.29051948, + -0.544341, + -0.08921235, + 0.24639326, + -1.0390278, + -0.096134774, + -0.23125961, + -0.14658855, + -0.20305693, + 0.54394907, + 0.6231115, + 0.15897423, + 0.38171327, + 0.080406114, + 0.11558633, + 0.41593686, + 0.41899747, + 0.23518631, + -0.48754913, + 0.42501974, + -0.03166996, + 0.60862035, + 0.43514326, + 0.07630372, + 0.48636606, + 0.13556471, + -0.14778581 + ], + "Meow meow meow... meow meow.": [ + 0.15637723, + 0.27836707, + 0.32392648, + 0.12420614, + -0.4122135, + -0.008943693, + 0.4368708, + -0.100039124, + 0.18513137, + -0.16555801, + -0.17520545, + -0.31652847, + 0.08226115, + 0.110111445, + -0.510651, + 0.26081607, + 0.13485445, + 0.081930354, + -0.10702782, + 0.101691596, + -0.08194803, + 0.052201502, + -0.18106782, + -0.0661436, + -0.29852676, + 0.1382651, + -0.25194138, + 0.12941112, + -0.08745528, + -0.19773065, + 0.05520475, + 0.1596035, + 0.055901162, + 0.03052104, + -0.25067076, + 0.1130374, + 0.10731878, + -0.1134125, + 0.2960661, + 0.28123325, + -0.4539054, + -0.2951886, + -0.06013838, + 0.0698782, + -0.050692946, + 0.033675604, + -0.08689313, + -0.09594688, + 0.24259675, + -0.06768646, + -0.25998816, + -0.014393644, + -0.1712641, + 0.26353395, + 0.37038332, + 0.1540199, + 0.30148673, + 0.0900148, + -0.14487585, + -0.07544956, + -0.16818637, + 0.22264199, + 0.20273642, + 0.28806692, + 0.06620519, + 0.06071928, + 0.016959887, + 0.15991561, + -0.20322719, + -0.21680346, + 0.005827653, + -0.058518227, + -0.082851976, + -0.18408045, + 0.011829881, + 0.17356046, + -0.22867271, + -0.23788007, + 0.36698207, + 0.11947009, + -0.2802242, + -0.00009296648, + -0.21881318, + 0.04414539, + 0.21922392, + -0.08089143, + 0.017334178, + -0.21013765, + -0.021579012, + -0.14636599, + -0.045122053, + -0.42394575, + -0.33370897, + 0.32532296, + -0.12396773, + -0.17880483, + 0.08677923, + -0.07191366, + -0.21204962, + 0.066616714, + -0.021146711, + 0.039390735, + -0.2280617, + -0.25807008, + -0.004695235, + 0.057916496, + 0.062666446, + 0.17647497, + 0.06146744, + 0.0983791, + -0.06768534, + -0.41729504, + -0.37071726, + 0.0020676702, + 0.12339811, + 0.058720946, + -0.2408654, + -0.027972784, + -0.15707123, + 0.121904284, + 0.30525315, + 0.08549896, + 0.07643991, + -0.24256659, + -0.039654344, + -0.097960174, + -0.019930374, + 4.5468504e-33, + -0.1756179, + -0.048909288, + 0.08754631, + 0.25367457, + 0.08420348, + -0.12057494, + -0.26027396, + 0.12646213, + -0.06558277, + 0.07000475, + 0.0841188, + 0.13850883, + -0.33397517, + -0.3362081, + 0.049629316, + -0.2066904, + 0.19138086, + -0.20490237, + 0.033412784, + 0.055478983, + -0.063508496, + 0.20210288, + 0.047874127, + -0.087965354, + -0.17426454, + -0.53554285, + 0.19416429, + -0.2597898, + -0.24396625, + 0.13470043, + 0.20197004, + -0.1506331, + 0.23820055, + -0.28346583, + -0.24578825, + -0.33410302, + 0.113019064, + -0.25510472, + -0.09391379, + 0.17827593, + 0.044965252, + 0.06099175, + 0.064390615, + -0.292253, + -0.03721547, + 0.16617969, + 0.2663693, + 0.38293424, + -0.070591, + -0.106118605, + 0.33038548, + 0.19407578, + 0.13705425, + -0.33165032, + 0.13195992, + -0.065211214, + 0.044473562, + 0.017415367, + 0.3085141, + 0.41981968, + -0.27757886, + -0.22959505, + 0.16264741, + 0.003622117, + -0.03673903, + -0.5454461, + -0.09162053, + 0.005971837, + 0.17005192, + 0.48562628, + 0.027425809, + -0.23248506, + -0.23866276, + -0.18467933, + -0.10256317, + -0.0943141, + 0.21388885, + 0.16161707, + -0.0074002286, + -0.34449014, + 0.17710747, + 0.26171246, + 0.00003363937, + 0.057235923, + -0.01510524, + 0.19889826, + 0.18636805, + -0.38441598, + -0.056606196, + 0.39603695, + -0.2371072, + -0.018886315, + 0.4957356, + -0.25210994, + 0.02127621, + -1.3966613e-32, + -0.06597106, + -0.00871083, + -0.39490935, + 0.30457807, + -0.24645172, + 0.33494025, + 0.43752164, + -0.09999472, + 0.21468268, + 0.357783, + -0.3262289, + -0.03384742, + 0.11233823, + -0.046595316, + 0.043287385, + 0.1619159, + 0.20880395, + 0.19406082, + 0.3238991, + -0.052105717, + -0.14570716, + -0.05481577, + -0.12256244, + 0.10776037, + -0.3628025, + 0.16671132, + 0.037646294, + 0.068002716, + 0.14490041, + 0.074240714, + 0.22091137, + -0.36093932, + -0.23266321, + 0.10946065, + -0.20539697, + 0.25333363, + 0.23371874, + -0.14202246, + -0.30506313, + -0.19117306, + 0.13098246, + 0.18063204, + 0.06673363, + -0.005377829, + -0.11130432, + -0.11207855, + -0.11722988, + 0.19397008, + 0.020753823, + 0.011490074, + -0.28348026, + -0.27894175, + -0.17945239, + -0.34756857, + -0.1624235, + 0.14527534, + 0.08342466, + -0.2047049, + 0.029886141, + 0.015002901, + -0.28381258, + 0.12461719, + 0.21391666, + 0.05217778, + 0.009905819, + -0.152343, + 0.105508305, + -0.36457226, + 0.108915634, + -0.14713557, + 0.15801114, + 0.09408918, + -0.15478803, + -0.14491013, + 0.021117311, + 0.25206918, + -0.001925353, + -0.22792469, + 0.10024833, + -0.39397204, + 0.13327402, + -0.25551447, + 0.14025936, + 0.23099253, + -0.17228551, + -0.2825274, + 0.18623923, + 0.12513581, + 0.06390937, + -0.027779713, + 0.33647487, + 0.3110584, + -0.0024402216, + -0.40892604, + 0.16605271, + -1.01988775e-7, + -0.14311662, + -0.13509719, + 0.029921597, + 0.12602577, + 0.32284, + -0.13512345, + 0.06854327, + -0.2048816, + 0.026940329, + -0.041910253, + 0.14322735, + -0.13973188, + -0.18920533, + -0.07621957, + 0.17830417, + 0.37452748, + -0.15657611, + -0.17650256, + -0.03859459, + 0.23088674, + -0.43405753, + -0.0853143, + 0.12870532, + -0.098523736, + -0.097691044, + -0.11553188, + 0.04732848, + 0.089734696, + 0.21692856, + 0.24791665, + -0.16980858, + 0.039974272, + -0.38291276, + -0.24575931, + -0.15899484, + -0.26287362, + 0.0077358177, + 0.27611396, + 0.090811804, + -0.18990625, + 0.124515496, + 0.28875077, + 0.15307845, + -0.14195547, + -0.15947644, + -0.02134832, + -0.050751675, + -0.23101777, + -0.12020526, + 0.16254473, + 0.10992786, + 0.23134807, + 0.49782285, + 0.14621937, + 0.031218115, + 0.24783067, + -0.11218174, + 0.058527835, + 0.08854503, + -0.11455172, + 0.2901104, + 0.10851829, + 0.11021252, + -0.00080822036 + ], + "Streaming data is my favourite pastime.": [ + 0.08678012, + -0.12769794, + 0.082551055, + -0.116687156, + 0.41999477, + 0.026412368, + 0.11276046, + -0.22693673, + 0.058520496, + 0.21197803, + -0.15724543, + 0.02058479, + -0.33901864, + 0.46762195, + 0.18634114, + -0.18062305, + 0.27907035, + -0.19506899, + -0.08029558, + -0.5939473, + -0.4639248, + -0.2552386, + -0.13273156, + -0.036678564, + 0.61611027, + 0.7115763, + 0.30715296, + 0.010229403, + 0.15911712, + -0.2672463, + -0.25950152, + 0.114014484, + -0.29467815, + 0.2155604, + -0.35092288, + -0.2332494, + 0.24521895, + -0.17079844, + -0.517029, + 0.071328975, + 0.34918645, + -0.24889126, + 0.18551743, + 0.17863561, + -0.21505408, + 0.07587242, + 0.32834157, + -0.087223575, + 0.35329276, + 0.64309454, + -0.43920374, + 0.045081325, + 0.028938938, + 0.29296455, + -0.17162976, + -0.021497753, + -0.04975946, + 0.21127209, + 0.034545902, + 0.1488971, + -0.11515436, + -0.12663938, + -0.41852507, + 0.14476292, + -0.081459, + -0.24603727, + -0.13640407, + 0.7648642, + 0.45170468, + 0.21813624, + -0.3724473, + 0.49339756, + -0.33845863, + 0.36777037, + -0.13600144, + -0.25836173, + -0.09412579, + -0.43417898, + -0.17555004, + -0.017682325, + 0.13501224, + -0.17319247, + -0.09277325, + 0.28045604, + 0.26813108, + -0.48691082, + 0.1699541, + 0.15595429, + -0.28359112, + -0.5717148, + -0.42333895, + 0.36241332, + -0.18916896, + 0.31529647, + -0.31180033, + 0.03586361, + -0.030432686, + 0.08910283, + 0.31417045, + 0.38071963, + 0.021339677, + 0.16907635, + -0.36551672, + 0.5065244, + -0.06956223, + -0.545804, + 0.067058586, + 0.5492185, + -0.054359294, + -0.09453006, + -0.37524927, + 0.27079856, + -0.0017305166, + -0.39778474, + 0.12308651, + 0.102084324, + -0.7793907, + 0.22995687, + -0.22068866, + 0.49733108, + 0.18015817, + 0.21216631, + -0.112541065, + 0.014450798, + 0.4929797, + -0.32780677, + 0.031574283, + -1.9792543e-32, + -0.14162374, + -0.2361987, + 0.040942185, + 0.22768822, + 0.22763678, + 0.071725175, + -0.23597601, + -0.049850896, + -0.077229604, + 0.03901745, + -0.10254673, + 0.42047414, + -0.36821374, + -0.22229543, + 0.14243756, + -0.47152132, + -0.15030351, + 0.28676614, + 0.31712896, + -0.079752006, + 0.19088624, + -0.31403595, + 0.25653112, + -0.1199348, + 0.42625237, + 0.005146871, + 0.23004782, + -0.33255148, + 0.7117083, + 0.19681045, + -0.023894671, + -0.24999496, + -0.44504744, + -0.33748937, + 0.024883961, + 0.03563863, + -0.089101754, + -0.83863497, + -0.0441739, + 0.17266083, + 0.13541274, + 0.021925436, + -0.12897752, + -0.4893771, + -0.27357572, + 0.025301838, + -0.09585966, + 0.13558038, + -0.26846996, + -0.28857192, + 0.07020078, + 0.13599782, + -0.014492661, + -0.068450786, + 0.08500629, + 0.33497253, + 0.047382455, + -0.38296634, + 0.07909783, + 0.11494887, + -0.24866076, + 0.14037584, + 0.28257197, + -0.54020625, + -0.63153803, + 0.3216539, + 0.62726927, + 0.16593203, + 0.26664957, + -0.0623761, + -0.36469397, + 0.06076969, + 0.09242987, + -0.33662063, + 0.06736313, + 0.20690438, + -0.017785277, + -0.25538555, + -0.3534425, + -0.069344796, + 0.33800808, + -0.451046, + 0.37142807, + -0.030692872, + 0.17743677, + 0.05545379, + -0.0025160462, + -0.9821741, + -0.62557006, + 0.29350033, + -0.5206202, + -0.0006385356, + 0.32594937, + -0.038259897, + -0.51228005, + 1.274844e-32, + -0.014374526, + -0.31517714, + -0.054363966, + 0.44796497, + 0.51065934, + -0.34779748, + -0.121149495, + -0.033142433, + 0.37656564, + 0.2620326, + -0.29165828, + -0.41640282, + -0.16976142, + -0.07522472, + -0.20482647, + 0.07450318, + 0.30803898, + -0.3915492, + -0.27986655, + -0.30218652, + -0.39650422, + 0.1462356, + -0.16784006, + -0.20162818, + 0.17078872, + 0.0818248, + -0.042554833, + 0.22732726, + -0.2674684, + 0.25756714, + -0.08856105, + -0.2932238, + -0.24357507, + -0.9261626, + -0.28821838, + 0.5004984, + 0.5655039, + 0.0369096, + -0.35576457, + 0.19081995, + 0.21349986, + 0.2593604, + 0.098572925, + 0.2945103, + 0.20675206, + 0.052672565, + -0.71639234, + 0.6821162, + -0.22330837, + 0.17360552, + 0.29739028, + 0.13484815, + 0.1598203, + -0.4709069, + 0.4711969, + 0.20849907, + 0.7792479, + -0.06368965, + 0.07286604, + 0.061130404, + -0.5789955, + -0.48703194, + -0.25865072, + 0.42338508, + 0.13289778, + -0.07674676, + 0.4255003, + -0.23244342, + -0.6952194, + -0.020652989, + -0.20697208, + -0.038262147, + -0.6153826, + 0.20434813, + -0.0910187, + 0.025133029, + -0.06917389, + 0.65110934, + -0.155337, + 0.37648648, + 0.107003644, + 0.13129649, + 0.08264707, + 0.4939564, + 0.36603525, + 0.5209323, + 0.33380473, + -0.47883096, + -0.17651561, + -0.29646072, + -0.030261427, + -0.022389166, + -0.93269396, + 0.5760791, + 0.41038495, + -9.323391e-8, + -0.28297532, + -0.201685, + -0.06578542, + -0.17152533, + 0.14920396, + -0.4369875, + -0.045477852, + 0.39793688, + 0.2898048, + -0.026910651, + 0.69725865, + -0.24016285, + 0.15400168, + 0.23232584, + 0.51643914, + -0.16475222, + 0.7574528, + -0.63223535, + -0.12854113, + 0.4422285, + 0.33090425, + 0.24616122, + 0.05775843, + -0.12936792, + 0.10450213, + 0.049330957, + 0.37026784, + 0.39125767, + 0.43743187, + -0.17087965, + -0.017602995, + 0.04041165, + 0.26055095, + 0.23033965, + 0.22418518, + -0.17933479, + 0.34527704, + -0.08434513, + -0.34523135, + 0.10217228, + 0.24894246, + 0.011013279, + -0.22674915, + 0.059479833, + -0.05035019, + 0.38424462, + 0.39532846, + -0.13794577, + 0.15312436, + 0.19032788, + -0.23354197, + -0.1397289, + 0.44028336, + 0.22681692, + 0.92411554, + 0.064874135, + -0.21152802, + -0.29979524, + -0.35932565, + 0.44770318, + 0.27302366, + 0.586516, + -0.8791503, + 0.010116899 + ], + "You are wearing a silly hat.": [ + 0.18002091, + 0.21364392, + 0.20197809, + -0.05800058, + 0.5993241, + -0.009542759, + 0.7689698, + 0.06182242, + 0.08899465, + 0.08628768, + 0.20699018, + -0.3944949, + -0.14164951, + -0.14762557, + -0.11450306, + -0.176009, + -0.25259265, + -0.0879672, + -0.08108656, + 0.13516302, + 0.10951018, + 0.6803262, + -0.009167357, + 0.28297335, + -0.2121239, + 0.15043248, + -0.18011416, + 0.06785517, + -0.053250875, + -0.119519316, + -0.16834468, + 0.1731705, + -0.033815518, + -0.07274622, + -0.17614466, + -0.018178694, + 0.31106114, + 0.14312823, + 0.26072645, + -0.027180608, + -0.10046855, + -0.22047345, + 0.1130046, + -0.12036761, + 0.08580452, + 0.36957884, + -0.2234603, + 0.22574592, + 0.071578175, + 0.16724914, + -0.0610829, + -0.07075546, + 0.09950111, + 0.17354614, + -0.010670628, + 0.4233054, + 0.16305904, + -0.24976699, + 0.051298507, + 0.3164315, + -0.004115336, + -0.20481995, + -0.0712287, + 0.36172786, + -0.113326535, + -0.23718159, + -0.13546947, + -0.16165684, + -0.3380645, + -0.33395284, + -0.2325904, + -0.036710512, + -0.12475298, + 0.24500836, + 0.21126875, + -0.21924202, + -0.14998424, + -0.29984656, + 0.4281851, + 0.5559507, + -0.50546765, + -0.29053774, + 0.39632988, + -0.120192066, + -0.14866412, + 0.021644624, + -0.108866334, + 0.14888507, + -0.35411242, + -0.13101812, + -0.19697681, + 0.08060866, + 0.24212915, + 0.3439581, + 0.23988098, + 0.31621397, + -0.19296935, + 0.08694286, + -0.12616497, + 0.77694976, + 0.05613401, + -0.30272746, + -0.36941272, + -0.051770844, + 0.32405618, + -0.014775204, + -0.2191191, + -0.2501601, + 0.118906826, + -0.029369056, + -0.092727385, + -0.09052259, + -0.012495169, + 0.15228814, + -0.34106085, + -0.012584958, + -0.0065341126, + 0.059245296, + -0.26492175, + -0.1473113, + 0.46568075, + 0.21590564, + 0.03262417, + 0.16322021, + -0.040895436, + -0.35877392, + 0.13646412, + -4.167319e-32, + -0.023615215, + 0.12847501, + 0.363975, + -0.020351665, + 0.2884413, + -0.12716033, + -0.19632876, + -0.10500587, + -0.17314774, + 0.15830013, + -0.10438709, + 0.14850336, + -0.3062293, + 0.1149762, + -0.056783013, + -0.054084282, + -0.004806713, + -0.058254547, + -0.059871897, + -0.13794075, + -0.10383822, + -0.047803976, + 0.0917068, + 0.07221811, + -0.09488787, + 0.41330993, + 0.0574568, + -0.033732064, + 0.2005464, + 0.2526724, + -0.08053832, + 0.22242016, + -0.056071892, + -0.41133764, + -0.13192098, + -0.41737616, + 0.013125852, + -0.19597784, + -0.101495996, + 0.1675444, + -0.036626037, + -0.22715047, + -0.2810582, + 0.09422579, + -0.42454246, + 0.29351276, + 0.34866965, + 0.22712141, + -0.53274757, + 0.32991663, + -0.2042127, + -0.28404987, + -0.029798891, + -0.049804427, + -0.064516425, + -0.07016197, + -0.21989572, + -0.03167905, + 0.07349563, + -0.29115963, + -0.2844243, + -0.042267714, + -0.12288752, + 0.116534725, + -0.016324993, + -0.22138824, + -0.003718565, + 0.058132477, + -0.1428754, + 0.0059288307, + 0.3016812, + 0.278094, + 0.024939284, + 0.04576151, + -0.04107925, + -0.039243117, + 0.15173537, + 0.09865007, + -0.06664281, + -0.12298606, + 0.08883042, + 0.30487496, + 0.1662803, + -0.20248954, + 0.26947775, + -0.51236534, + -0.09064518, + -0.21095988, + -0.07885623, + -0.13728237, + 0.010284147, + 0.12879245, + 0.012477174, + -0.022748888, + -0.5061875, + 2.538143e-32, + 0.35242298, + 0.36122477, + -0.09778757, + 0.1351897, + 0.27576283, + -0.13478233, + 0.2184166, + -0.10733136, + -0.04286634, + 0.03876342, + -0.29040182, + 0.07770177, + 0.014303333, + 0.086613335, + 0.28426886, + -0.08911194, + 0.22625007, + 0.13025407, + -0.29451716, + 0.07892749, + -0.23554632, + -0.29528967, + 0.0344389, + -0.13475713, + -0.2114807, + 0.074978925, + 0.11820006, + 0.08346159, + -0.38473487, + 0.42406225, + -0.060115203, + -0.18961099, + -0.26965106, + -0.104423076, + 0.1808093, + 0.027518265, + -0.42833835, + -0.5206225, + 0.020710658, + -0.2658431, + -0.24024564, + -0.07073342, + 0.6585806, + -0.089581706, + -0.0059699584, + -0.4489078, + -0.24736291, + -0.036784597, + -0.1435848, + 0.09876278, + -0.17299002, + 0.14553969, + -0.11452004, + -0.28620672, + -0.24302, + 0.47407776, + -0.6018177, + 0.19564496, + 0.09163936, + 0.08378128, + -0.03216946, + -0.15471895, + -0.26317224, + 0.08903066, + 0.16129172, + 0.07334927, + -0.19396573, + 0.18651599, + 0.042054087, + 0.0053824717, + 0.30813634, + -0.07849894, + -0.17328787, + -0.13366231, + -0.016491663, + -0.06340514, + 0.11845575, + 0.30231154, + 0.24772188, + 0.29292643, + 0.014657729, + -0.09469977, + -0.14168344, + 0.27077186, + -0.12528336, + -0.18600295, + 0.023759747, + 0.23564087, + -0.33164492, + 0.1226431, + -0.17027718, + 0.39653838, + -0.15348496, + 0.33461744, + -0.03331748, + -9.2402956e-8, + 0.14408912, + -0.27165705, + -0.024373071, + -0.058519, + 0.061868176, + 0.23354118, + -0.1302883, + -0.23778433, + -0.3285621, + -0.5267639, + 0.2028559, + -0.11064168, + 0.13373893, + 0.20260222, + 0.1876685, + 0.2512435, + -0.5989506, + 0.1890392, + -0.14925757, + -0.045375023, + -0.087758094, + 0.43079036, + 0.27966487, + 0.10807341, + -0.15153547, + 0.13403147, + 0.3049448, + 0.52505124, + -0.1199102, + 0.65521336, + 0.24873954, + 0.34760848, + -0.32984027, + 0.050649326, + -0.0058471467, + -0.0583394, + -0.18948396, + 0.04642572, + 0.4100865, + -0.20869803, + 0.022507096, + 0.11913465, + 0.064550586, + 0.21797444, + 0.05032976, + 0.10102482, + 0.31665415, + 0.053272925, + 0.14985639, + 0.076726, + -0.013332718, + 0.23030274, + 0.17200038, + 0.49783182, + 0.14103818, + -0.024885366, + 0.0647167, + 0.012117245, + -0.27015942, + -0.14433724, + -0.060112257, + -0.06654009, + -0.18487605, + -0.22048208 + ] +} \ No newline at end of file diff --git a/internal/impl/huggingface/testdata/expected_token_classification.json b/internal/impl/huggingface/testdata/expected_token_classification.json new file mode 100644 index 000000000..e5764c69a --- /dev/null +++ b/internal/impl/huggingface/testdata/expected_token_classification.json @@ -0,0 +1,290 @@ +{ + "WarpStream Labs have a great offering!": { + "entities": [ + { + "Entity": "LABEL_3", + "Score": 0.9882245, + "Scores": null, + "Index": 0, + "Word": "WarpStream", + "TokenID": 0, + "Start": 0, + "End": 10, + "IsSubword": false + }, + { + "Entity": "LABEL_4", + "Score": 0.99202913, + "Scores": null, + "Index": 0, + "Word": "Labs", + "TokenID": 0, + "Start": 11, + "End": 15, + "IsSubword": false + }, + { + "Entity": "LABEL_0", + "Score": 0.999846, + "Scores": null, + "Index": 0, + "Word": "have a great offering!", + "TokenID": 0, + "Start": 16, + "End": 38, + "IsSubword": false + } + ] + }, + "An Italian man went to Malta...": { + "entities": [ + { + "Entity": "LABEL_0", + "Score": 0.99961716, + "Scores": null, + "Index": 0, + "Word": "An", + "TokenID": 0, + "Start": 0, + "End": 2, + "IsSubword": false + }, + { + "Entity": "LABEL_7", + "Score": 0.9893719, + "Scores": null, + "Index": 0, + "Word": "Italian", + "TokenID": 0, + "Start": 3, + "End": 10, + "IsSubword": false + }, + { + "Entity": "LABEL_0", + "Score": 0.99976796, + "Scores": null, + "Index": 0, + "Word": "man went to", + "TokenID": 0, + "Start": 11, + "End": 22, + "IsSubword": false + }, + { + "Entity": "LABEL_5", + "Score": 0.998925, + "Scores": null, + "Index": 0, + "Word": "Malta", + "TokenID": 0, + "Start": 23, + "End": 28, + "IsSubword": false + }, + { + "Entity": "LABEL_0", + "Score": 0.99969673, + "Scores": null, + "Index": 0, + "Word": "...", + "TokenID": 0, + "Start": 28, + "End": 31, + "IsSubword": false + } + ] + }, + "NVIDIA corporation was valued higher than Apple!?": { + "entities": [ + { + "Entity": "LABEL_3", + "Score": 0.9968505, + "Scores": null, + "Index": 0, + "Word": "NVIDIA", + "TokenID": 0, + "Start": 0, + "End": 6, + "IsSubword": false + }, + { + "Entity": "LABEL_0", + "Score": 0.97990334, + "Scores": null, + "Index": 0, + "Word": "corporation was valued higher than", + "TokenID": 0, + "Start": 7, + "End": 41, + "IsSubword": false + }, + { + "Entity": "LABEL_3", + "Score": 0.98733974, + "Scores": null, + "Index": 0, + "Word": "Apple", + "TokenID": 0, + "Start": 42, + "End": 47, + "IsSubword": false + }, + { + "Entity": "LABEL_4", + "Score": 0.465456, + "Scores": null, + "Index": 0, + "Word": "!", + "TokenID": 0, + "Start": 47, + "End": 48, + "IsSubword": false + }, + { + "Entity": "LABEL_0", + "Score": 0.99968076, + "Scores": null, + "Index": 0, + "Word": "?", + "TokenID": 0, + "Start": 48, + "End": 49, + "IsSubword": false + } + ] + }, + "My silly hat is from Hatfield.": { + "entities": [ + { + "Entity": "LABEL_0", + "Score": 0.9996177, + "Scores": null, + "Index": 0, + "Word": "My silly hat is from", + "TokenID": 0, + "Start": 0, + "End": 20, + "IsSubword": false + }, + { + "Entity": "LABEL_5", + "Score": 0.9851442, + "Scores": null, + "Index": 0, + "Word": "Hatfield", + "TokenID": 0, + "Start": 21, + "End": 29, + "IsSubword": false + }, + { + "Entity": "LABEL_0", + "Score": 0.9997869, + "Scores": null, + "Index": 0, + "Word": ".", + "TokenID": 0, + "Start": 29, + "End": 30, + "IsSubword": false + } + ] + }, + "Japanese Bento boxes taste amazing!": { + "entities": [ + { + "Entity": "LABEL_7", + "Score": 0.7260668, + "Scores": null, + "Index": 0, + "Word": "Japanese Ben", + "TokenID": 0, + "Start": 0, + "End": 12, + "IsSubword": false + }, + { + "Entity": "LABEL_8", + "Score": 0.66532207, + "Scores": null, + "Index": 0, + "Word": "##to", + "TokenID": 0, + "Start": 12, + "End": 14, + "IsSubword": false + }, + { + "Entity": "LABEL_0", + "Score": 0.99887407, + "Scores": null, + "Index": 0, + "Word": "boxes taste amazing!", + "TokenID": 0, + "Start": 15, + "End": 35, + "IsSubword": false + } + ] + }, + "My name is Wolfgang and I live in Berlin.": { + "entities": [ + { + "Entity": "LABEL_0", + "Score": 0.9993014, + "Scores": null, + "Index": 0, + "Word": "My name is", + "TokenID": 0, + "Start": 0, + "End": 10, + "IsSubword": false + }, + { + "Entity": "LABEL_1", + "Score": 0.99230945, + "Scores": null, + "Index": 0, + "Word": "Wolfgang", + "TokenID": 0, + "Start": 11, + "End": 19, + "IsSubword": false + }, + { + "Entity": "LABEL_0", + "Score": 0.9984816, + "Scores": null, + "Index": 0, + "Word": "and I live in", + "TokenID": 0, + "Start": 20, + "End": 33, + "IsSubword": false + }, + { + "Entity": "LABEL_5", + "Score": 0.997545, + "Scores": null, + "Index": 0, + "Word": "Berlin", + "TokenID": 0, + "Start": 34, + "End": 40, + "IsSubword": false + }, + { + "Entity": "LABEL_0", + "Score": 0.9997385, + "Scores": null, + "Index": 0, + "Word": ".", + "TokenID": 0, + "Start": 40, + "End": 41, + "IsSubword": false + } + ] + } +} \ No newline at end of file diff --git a/public/components/all/x_benthos_extra.go b/public/components/all/x_bento_extra.go similarity index 100% rename from public/components/all/x_benthos_extra.go rename to public/components/all/x_bento_extra.go diff --git a/public/components/all/x_huggingface.go b/public/components/all/x_huggingface.go new file mode 100644 index 000000000..11cbd170b --- /dev/null +++ b/public/components/all/x_huggingface.go @@ -0,0 +1,8 @@ +//go:build huggingbento + +package all + +import ( + // Bring in the internal plugin definitions. + _ "github.com/warpstreamlabs/bento/internal/impl/huggingface" +) diff --git a/resources/huggingbento/Dockerfile b/resources/huggingbento/Dockerfile new file mode 100644 index 000000000..eae2c7420 --- /dev/null +++ b/resources/huggingbento/Dockerfile @@ -0,0 +1,42 @@ +ARG BUILD_PLATFORM=linux/amd64 + +FROM --platform=$BUILD_PLATFORM golang:1.22 AS build + +ENV CGO_ENABLED=1 +ENV CGO_LDFLAGS="-L/usr/lib/" +ENV GOOS="linux" +ENV GOARCH="amd64" +ENV TAGS="huggingbento" + +WORKDIR /go/src/github.com/warpstreamlabs/bento/ +# Update dependencies: On unchanged dependencies, cached layer will be reused +COPY go.* /go/src/github.com/warpstreamlabs/bento/ +RUN go mod download + +COPY --from=ghcr.io/knights-analytics/hugot:latest /usr/lib/libtokenizers.a /usr/lib/libtokenizers.a +COPY --from=ghcr.io/knights-analytics/hugot:latest /usr/lib64/onnxruntime.so /usr/lib/onnxruntime.so +# Build +COPY . /go/src/github.com/warpstreamlabs/bento/ + +RUN make huggingbento + +# Pack +FROM --platform=$BUILD_PLATFORM public.ecr.aws/amazonlinux/amazonlinux:2023 + +LABEL maintainer="WarpStream Labs " +LABEL org.opencontainers.image.source="https://github.com/warpstreamlabs/bento" + +WORKDIR /root/ + +COPY --from=build /usr/lib/libtokenizers.a /usr/lib/libtokenizers.a +COPY --from=build /usr/lib/onnxruntime.so /usr/lib/onnxruntime.so + +COPY --from=build /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ +COPY --from=build /go/src/github.com/warpstreamlabs/bento/target/bin/huggingbento . +COPY ./config/docker.yaml /bento.yaml + +EXPOSE 4195 + +ENTRYPOINT ["./huggingbento"] + +CMD ["-c", "/bento.yaml"] diff --git a/resources/huggingbento/README.md b/resources/huggingbento/README.md new file mode 100644 index 000000000..cf42f2a3d --- /dev/null +++ b/resources/huggingbento/README.md @@ -0,0 +1,106 @@ +# HuggingBento + +HuggingBento is a distribution of Bento built for running Hugging Face transformer pipelines. Built on [Knight's Analytics Hugot](https://github.com/knights-analytics/hugot) library which has two external dependencies: +- An Open Neural Network Exchange (ONNX) Runtime dynamic library ([See installation](#install-onnx-runtime)) +- A Hugging Face tokenizers C binding ([See installation](#install-the-hugging-face-tokenizers-binding)) + +For the purposes of getting started, we'd suggest instead [running with Docker](#run-with-docker), where the `warpstreamlabs/huggingbento` image has the necessary dependencies baked in. + +## Running inference + +HuggingBento can either be passed a locally downloaded model: +```yaml +pipeline: + processors: + - huggingface_resource: + pipeline_name: classify-with-local-model + model_path: "./models/KnightsAnalytics_distilbert-base-uncased-finetuned-sst-2-english" +``` + +Otherwise, you can toggle `enable_model_download: true` and set the `model_repository` to a HuggingFace repository one or more valid ONNX models: +```yaml +pipeline: + processors: + - huggingface_resource: + pipeline_name: classify-with-downloaded-model + model_path: "./models" + enable_model_download: true + model_repository: "KnightsAnalytics/distilbert-base-uncased-finetuned-sst-2-english" +``` + +## Run with Docker + +```shell +docker run --rm -v /path/to/config.yaml:/bento.yaml -v /path/to/model/repository:/model_repository warpstreamlabs/huggingbento:latest +``` + +## Install ONNX Runtime +You can follow the [official getting started instructions](https://onnxruntime.ai/getting-started) or follow the guide below: + +Find the appropriate prebuilt Open Neural Network Exchange (ONNX) Runtime libraries at the [microsoft/onnxruntime](https://github.com/microsoft/onnxruntime/releases) release page. While, HuggingBento is currently built and tested using `v1.18.x`, feel free to experiment with different versions. + +### ONNX Runtime Supported Platforms + +ONNX Runtime version +`1.18` supports the following operating system and architectures combintations: + +| Operating System (OS) | Architecture (ARCH)| CPU | GPU | +|-----------------------|--------------------|-----|-----| +| `linux` | `x64` | ✅ | ✅ | +| `linux` | `arm64` | ✅ | ❌ | +| `osx` | `x86_64` | ✅ | ❌ | +| `osx` | `arm64` | ✅ | ❌ | +| `osx` | `universal2` | ✅ | ❌ | +| `windows` | `x64` | ✅ | ✅ | +| `windows` | `x86` | ✅ | ❌ | +| `windows` | `arm64` | ✅ | ❌ | + + +## Notes: +- HuggingBento has not been CUDA or GPU tested but the underlying `Hugot` library has -- so proceed with caution! +- GPU support is available for Linux and Windows x64 builds with CUDA. +- Training support is available for all Windows architectures and Linux x64 and ARM64. +- macOS builds currently do not support GPU acceleration or training. + +For the latest information and downloads, please visit the [official ONNX Runtime GitHub repository](https://github.com/microsoft/onnxruntime). + +```shell +curl -LO https://github.com/microsoft/onnxruntime/releases/download/v${ONNXRUNTIME_VERSION}/onnxruntime-${OS}-${ARCH}-${ONNXRUNTIME_VERSION}.tgz && tar -xzf onnxruntime-${OS}-${ARCH}-${ONNXRUNTIME_VERSION}.tgz +``` + +### For Linux +```shell +mv ./onnxruntime-${OS}-${ARCH}-${ONNXRUNTIME_VERSION}/lib/libonnxruntime.${ONNXRUNTIME_VERSION}.so /usr/lib/onnxruntime.so +``` + +### For Mac + +Using shell: +```shell +mv ./onnxruntime-${OS}-${ARCH}-${ONNXRUNTIME_VERSION}/lib/libonnxruntime.${ONNXRUNTIME_VERSION}.dylib /usr/local/lib/onnxruntime.so +``` + +Using Homebrew: +```shell +brew install onnxruntime && mv /opt/homebrew/opt/onnxruntime/lib/libonnxruntime.dylib /usr/local/lib/onnxruntime.so +``` + +## Install the Hugging Face tokenizers binding + +Hugot uses Hugging Face tokenizers that are compiled from Rust, meaning you'll also need the `libtokenizers.a`. + +These can either be [compiled yourself](https://github.com/daulet/tokenizers?tab=readme-ov-file#installation) or downloaded as a [release](https://github.com/daulet/tokenizers). + +You can use the below (flaky) script to download the correct C bindings matching the current version on `github.com/daulet/tokenizers`: +```shell +GOOS=$(go env GOOS) +GOARCH=$(go env GOARCH) + +tokenizer_version=$(go list -m -f '{{.Version}}' 'github.com/daulet/tokenizers') +tokenizer_version=$(echo $tokenizer_version | awk -F'-' '{print $NF}') +echo "Downloading ${tokenizer_version}/libtokenizers.${GOOS}-${GOARCH}.tar.gz..." +curl -LOs https://github.com/daulet/tokenizers/releases/download/${tokenizer_version}/libtokenizers.${GOOS}-${GOARCH}.tar.gz +echo "Completed downloading ${tokenizer_version}/libtokenizers.${GOOS}-${GOARCH}.tar.gz." + +tar -C ${DEPENDENCY_DEST} -xzf libtokenizers.${GOOS}-${GOARCH}.tar.gz +rm libtokenizers.${GOOS}-${GOARCH}.tar.gz +``` diff --git a/resources/huggingbento/install.sh b/resources/huggingbento/install.sh new file mode 100755 index 000000000..4a601fd12 --- /dev/null +++ b/resources/huggingbento/install.sh @@ -0,0 +1,85 @@ +#!/bin/bash + +ONNXRUNTIME_VERSION=${ONNXRUNTIME_VERSION:-"1.18.0"} +DEPENDENCY_DEST=${DEPENDENCY_DEST:-"/usr/lib"} + +# Get the OS and ARCH in correct format to download ONNX libs +# Note: only linux and mac supported + +# Get OS +if [[ "$OSTYPE" == "darwin"* ]]; then + OS="osx" +elif [[ "$OSTYPE" == "linux-gnu"* ]]; then + OS="linux" +else + echo "Unsupported OS" + exit 1 +fi + +# Get architecture +ARCH=$(uname -m) +case $ARCH in + x86_64|amd64) + ARCH="x64" + ;; + aarch64|arm64) + ARCH="arm64" + ;; + i386|i686) + ARCH="x86" + ;; + *) + echo "Unsupported architecture" + exit 1 + ;; +esac + +# Special case for macOS universal binary +if [[ "$OS" == "osx" && "$ARCH" == "x64" ]]; then + FILENAME="onnxruntime-osx-universal2-1.19.0.tgz" +else + FILENAME="onnxruntime-$OS-$ARCH-1.19.0.tgz" +fi + +# For Windows, use .zip instead of .tgz +if [[ "$OS" == "win" ]]; then + FILENAME="${FILENAME%.tgz}.zip" +fi + +echo "Detected OS: $OS" +echo "Detected architecture: $ARCH" + +# Download ONNX +echo "Downloading v${ONNXRUNTIME_VERSION}/onnxruntime-${OS}-${ARCH}-${ONNXRUNTIME_VERSION}.tgz..." +curl -LOs https://github.com/microsoft/onnxruntime/releases/download/v${ONNXRUNTIME_VERSION}/onnxruntime-${OS}-${ARCH}-${ONNXRUNTIME_VERSION}.tgz +echo "Completed downloading v${ONNXRUNTIME_VERSION}/onnxruntime-${OS}-${ARCH}-${ONNXRUNTIME_VERSION}.tgz" + +tar -xzf onnxruntime-${OS}-${ARCH}-${ONNXRUNTIME_VERSION}.tgz + +source_file_prefix="./onnxruntime-${OS}-${ARCH}-${ONNXRUNTIME_VERSION}/lib/libonnxruntime.${ONNXRUNTIME_VERSION}" +target_file="${DEPENDENCY_DEST}/onnxruntime.so" + +if [[ -f "${source_file_prefix}.so" ]]; then + mv -f "${source_file_prefix}.so" "${target_file}" +elif [[ -f "${source_file_prefix}.dylib" ]]; then + mv -f ${source_file_prefix}.dylib "${target_file}" +else + echo "Error: Neither .so nor .dylib file found." + exit 1 +fi +rm onnxruntime-${OS}-${ARCH}-${ONNXRUNTIME_VERSION}.tgz + +# Get tokenizer library + +GOOS=$(go env GOOS) +GOARCH=$(go env GOARCH) + +tokenizer_version=$(go list -m -f '{{.Version}}' 'github.com/daulet/tokenizers') +tokenizer_version=$(echo $tokenizer_version | awk -F'-' '{print $NF}') +echo "Downloading ${tokenizer_version}/libtokenizers.${GOOS}-${GOARCH}.tar.gz..." +curl -LOs https://github.com/daulet/tokenizers/releases/download/${tokenizer_version}/libtokenizers.${GOOS}-${GOARCH}.tar.gz +echo "Completed downloading ${tokenizer_version}/libtokenizers.${GOOS}-${GOARCH}.tar.gz." + +tar -C ${DEPENDENCY_DEST} -xzf libtokenizers.${GOOS}-${GOARCH}.tar.gz +rm libtokenizers.${GOOS}-${GOARCH}.tar.gz + diff --git a/website/docs/components/processors/nlp_classify_text.md b/website/docs/components/processors/nlp_classify_text.md new file mode 100644 index 000000000..f2120f51b --- /dev/null +++ b/website/docs/components/processors/nlp_classify_text.md @@ -0,0 +1,227 @@ +--- +title: nlp_classify_text +slug: nlp_classify_text +type: processor +status: beta +categories: ["Machine Learning","NLP"] +--- + + + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +:::caution BETA +This component is mostly stable but breaking changes could still be made outside of major version releases if a fundamental problem with the component is found. +::: +Performs text classification using a Hugging Face 🤗 NLP pipeline with an ONNX Runtime model. + +Introduced in version v1.3.0 (huggingbento). + + + + + + +```yml +# Common config fields, showing default values +label: "" +nlp_classify_text: + pipeline_name: "" # No default (optional) + model_path: /model_repository + model_download_options: {} + aggregation_function: SOFTMAX + problem_type: singleLabel +``` + + + + +```yml +# All config fields, showing default values +label: "" +nlp_classify_text: + pipeline_name: "" # No default (optional) + model_path: /model_repository + onnx_library_path: /usr/lib/onnxruntime.so + onnx_filename: "" + enable_model_download: false + model_download_options: + model_repository: "" + aggregation_function: SOFTMAX + problem_type: singleLabel +``` + + + + +### Text Classification +Text Classification is the task of assigning a label or class to a given text.Some use cases are sentiment analysis, natural language inference, and assessing grammatical correctness. +This processor runs text-classification inference against batches of text data, returning labelled classification corresponding to each input. +This component uses [Hugot](https://github.com/knights-analytics/hugot), a library that provides an interface for running [Open Neural Network Exchange (ONNX) models](https://onnx.ai/onnx/intro/) and transformer pipelines, with a focus on NLP tasks. + +Currently, [HuggingBento only implements](https://github.com/knights-analytics/hugot/tree/main?tab=readme-ov-file#implemented-pipelines): + +- [featureExtraction](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.FeatureExtractionPipeline) +- [textClassification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TextClassificationPipeline) +- [tokenClassification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TokenClassificationPipeline) + +### What is a pipeline? +From [HuggingFace docs](https://huggingface.co/docs/transformers/en/main_classes/pipelines): +> A pipeline in 🤗 Transformers is an abstraction referring to a series of steps that are executed in a specific order to preprocess and transform data and return a prediction from a model. Some example stages found in a pipeline might be data preprocessing, feature extraction, and normalization. + +:::warning +While, only models in [ONNX](https://onnx.ai/) format are supported, exporting existing formats to ONNX is both possible and straightforward in most standard ML libraries. For more on this, check out the [ONNX conversion docs](https://onnx.ai/onnx/intro/converters.html). +Otherwise, check out using [HuggingFace Optimum](https://huggingface.co/docs/optimum/en/exporters/onnx/usage_guides/export_a_model) for easy model conversion. +::: + + +## Examples + + + + + +Here, we load the [Cohee/distilbert-base-uncased-go-emotions-onnx](https://huggingface.co/Cohee/distilbert-base-uncased-go-emotions-onnx) model from the local directory at `models/coheedistilbert_base_uncased_go_emotions_onnx`.The processor returns a single-label output with the highest emotion score for the text. + +```yaml +pipeline: + processors: + - nlp_classify_text: + pipeline_name: classify-incoming-data + model_path: "models/coheedistilbert_base_uncased_go_emotions_onnx" + +# In: "I'm super excited for my Bento box!" +# Out: [{"Label":"excitement","Score":0.34134513}] +``` + + + + +Here, we retrieve the [KnightsAnalytics/distilbert-base-uncased-finetuned-sst-2-english(https://huggingface.co/KnightsAnalytics/distilbert-base-uncased-finetuned-sst-2-english) model from HuggingFace and store it in a `./models` directory.The processor returns a multi-label output indicating showing a `POSITIVE` and `NEGATIVE` score some input text-data. + +```yaml +pipeline: + processors: + - nlp_classify_text: + pipeline_name: classify-multi-label + model_path: "./models" + enable_model_download: true + model_download_options: + model_repository: "KnightsAnalytics/distilbert-base-uncased-finetuned-sst-2-english" + + +# In: "This meal tastes like old boots." +# Out: [{"Label":"NEGATIVE","Score":0.9977291},{"Label":"POSITIVE","Score":0.0022708932}] +``` + + + + +## Fields + +### `pipeline_name` + +Name of the pipeline. Defaults to uuid_v4() if not set + + +Type: `string` + +### `model_path` + +Path to the ONNX model directory. If `enable_model_download` is `true`, the model will be downloaded here. + + +Type: `string` +Default: `"/model_repository"` + +```yml +# Examples + +model_path: /path/to/models/my_model.onnx +``` + +### `onnx_library_path` + +The location of the ONNX Runtime dynamic library. + + +Type: `string` +Default: `"/usr/lib/onnxruntime.so"` + +### `onnx_filename` + +The filename of the model to run. Only necessary to specify when multiple .onnx files are present. + + +Type: `string` +Default: `""` + +```yml +# Examples + +onnx_filename: model.onnx +``` + +### `enable_model_download` + +If enabled, attempts to download an ONNX Runtime compatible model from HuggingFace specified in `model_name`. + + +Type: `bool` +Default: `false` + +### `model_download_options` + +Sorry! This field is missing documentation. + + +Type: `object` + +### `model_download_options.model_repository` + +The name of the huggingface model repository. + + +Type: `string` +Default: `""` + +```yml +# Examples + +model_repository: KnightsAnalytics/distilbert-NER + +model_repository: KnightsAnalytics/distilbert-base-uncased-finetuned-sst-2-english + +model_repository: sentence-transformers/all-MiniLM-L6-v2 +``` + +### `aggregation_function` + +The aggregation function to use for the text classification pipeline. + + +Type: `string` +Default: `"SOFTMAX"` +Options: `SOFTMAX`, `SIGMOID`. + +### `problem_type` + +The problem type for the text classification pipeline. + + +Type: `string` +Default: `"singleLabel"` +Options: `singleLabel`, `multiLabel`. + + diff --git a/website/docs/components/processors/nlp_classify_tokens.md b/website/docs/components/processors/nlp_classify_tokens.md new file mode 100644 index 000000000..5bdf92efb --- /dev/null +++ b/website/docs/components/processors/nlp_classify_tokens.md @@ -0,0 +1,188 @@ +--- +title: nlp_classify_tokens +slug: nlp_classify_tokens +type: processor +status: beta +categories: ["Machine Learning","NLP"] +--- + + + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +:::caution BETA +This component is mostly stable but breaking changes could still be made outside of major version releases if a fundamental problem with the component is found. +::: +Performs token classification using a Hugging Face 🤗 NLP pipeline with an ONNX Runtime model. + +Introduced in version v1.3.0 (huggingbento). + + + + + + +```yml +# Common config fields, showing default values +label: "" +nlp_classify_tokens: + pipeline_name: "" # No default (optional) + model_path: /model_repository + model_download_options: {} + aggregation_strategy: SIMPLE + ignore_labels: [] +``` + + + + +```yml +# All config fields, showing default values +label: "" +nlp_classify_tokens: + pipeline_name: "" # No default (optional) + model_path: /model_repository + onnx_library_path: /usr/lib/onnxruntime.so + onnx_filename: "" + enable_model_download: false + model_download_options: + model_repository: "" + aggregation_strategy: SIMPLE + ignore_labels: [] +``` + + + + +### Token Classification +Token classification assigns a label to individual tokens in a sentence.This processor runs token classification inference against batches of text data, returning a set of Entities classification corresponding to each input. +This component uses [Hugot](https://github.com/knights-analytics/hugot), a library that provides an interface for running [Open Neural Network Exchange (ONNX) models](https://onnx.ai/onnx/intro/) and transformer pipelines, with a focus on NLP tasks. + +Currently, [HuggingBento only implements](https://github.com/knights-analytics/hugot/tree/main?tab=readme-ov-file#implemented-pipelines): + +- [featureExtraction](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.FeatureExtractionPipeline) +- [textClassification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TextClassificationPipeline) +- [tokenClassification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TokenClassificationPipeline) + +### What is a pipeline? +From [HuggingFace docs](https://huggingface.co/docs/transformers/en/main_classes/pipelines): +> A pipeline in 🤗 Transformers is an abstraction referring to a series of steps that are executed in a specific order to preprocess and transform data and return a prediction from a model. Some example stages found in a pipeline might be data preprocessing, feature extraction, and normalization. + +:::warning +While, only models in [ONNX](https://onnx.ai/) format are supported, exporting existing formats to ONNX is both possible and straightforward in most standard ML libraries. For more on this, check out the [ONNX conversion docs](https://onnx.ai/onnx/intro/converters.html). +Otherwise, check out using [HuggingFace Optimum](https://huggingface.co/docs/optimum/en/exporters/onnx/usage_guides/export_a_model) for easy model conversion. +::: + + +## Fields + +### `pipeline_name` + +Name of the pipeline. Defaults to uuid_v4() if not set + + +Type: `string` + +### `model_path` + +Path to the ONNX model directory. If `enable_model_download` is `true`, the model will be downloaded here. + + +Type: `string` +Default: `"/model_repository"` + +```yml +# Examples + +model_path: /path/to/models/my_model.onnx +``` + +### `onnx_library_path` + +The location of the ONNX Runtime dynamic library. + + +Type: `string` +Default: `"/usr/lib/onnxruntime.so"` + +### `onnx_filename` + +The filename of the model to run. Only necessary to specify when multiple .onnx files are present. + + +Type: `string` +Default: `""` + +```yml +# Examples + +onnx_filename: model.onnx +``` + +### `enable_model_download` + +If enabled, attempts to download an ONNX Runtime compatible model from HuggingFace specified in `model_name`. + + +Type: `bool` +Default: `false` + +### `model_download_options` + +Sorry! This field is missing documentation. + + +Type: `object` + +### `model_download_options.model_repository` + +The name of the huggingface model repository. + + +Type: `string` +Default: `""` + +```yml +# Examples + +model_repository: KnightsAnalytics/distilbert-NER + +model_repository: KnightsAnalytics/distilbert-base-uncased-finetuned-sst-2-english + +model_repository: sentence-transformers/all-MiniLM-L6-v2 +``` + +### `aggregation_strategy` + +The aggregation strategy to use for the token classification pipeline. + + +Type: `string` +Default: `"SIMPLE"` +Options: `SIMPLE`, `NONE`. + +### `ignore_labels` + +Labels to ignore in the token classification pipeline. + + +Type: `array` +Default: `[]` + +```yml +# Examples + +ignore_labels: + - O + - MISC +``` + + diff --git a/website/docs/components/processors/nlp_extract_features.md b/website/docs/components/processors/nlp_extract_features.md new file mode 100644 index 000000000..f8373e652 --- /dev/null +++ b/website/docs/components/processors/nlp_extract_features.md @@ -0,0 +1,169 @@ +--- +title: nlp_extract_features +slug: nlp_extract_features +type: processor +status: beta +categories: ["Machine Learning","NLP"] +--- + + + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +:::caution BETA +This component is mostly stable but breaking changes could still be made outside of major version releases if a fundamental problem with the component is found. +::: +Performs feature extraction using a Hugging Face 🤗 NLP pipeline with an ONNX Runtime model. + +Introduced in version v1.3.0 (huggingbento). + + + + + + +```yml +# Common config fields, showing default values +label: "" +nlp_extract_features: + pipeline_name: "" # No default (optional) + model_path: /model_repository + model_download_options: {} + normalization: false +``` + + + + +```yml +# All config fields, showing default values +label: "" +nlp_extract_features: + pipeline_name: "" # No default (optional) + model_path: /model_repository + onnx_library_path: /usr/lib/onnxruntime.so + onnx_filename: "" + enable_model_download: false + model_download_options: + model_repository: "" + normalization: false +``` + + + + +### Feature Extraction +Feature extraction is the task of extracting features learnt in a model.This processor runs a feature extraction model against batches of text data, returning a model's multidimensional representation of said featuresin tensor/float64 format. +This component uses [Hugot](https://github.com/knights-analytics/hugot), a library that provides an interface for running [Open Neural Network Exchange (ONNX) models](https://onnx.ai/onnx/intro/) and transformer pipelines, with a focus on NLP tasks. + +Currently, [HuggingBento only implements](https://github.com/knights-analytics/hugot/tree/main?tab=readme-ov-file#implemented-pipelines): + +- [featureExtraction](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.FeatureExtractionPipeline) +- [textClassification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TextClassificationPipeline) +- [tokenClassification](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.TokenClassificationPipeline) + +### What is a pipeline? +From [HuggingFace docs](https://huggingface.co/docs/transformers/en/main_classes/pipelines): +> A pipeline in 🤗 Transformers is an abstraction referring to a series of steps that are executed in a specific order to preprocess and transform data and return a prediction from a model. Some example stages found in a pipeline might be data preprocessing, feature extraction, and normalization. + +:::warning +While, only models in [ONNX](https://onnx.ai/) format are supported, exporting existing formats to ONNX is both possible and straightforward in most standard ML libraries. For more on this, check out the [ONNX conversion docs](https://onnx.ai/onnx/intro/converters.html). +Otherwise, check out using [HuggingFace Optimum](https://huggingface.co/docs/optimum/en/exporters/onnx/usage_guides/export_a_model) for easy model conversion. +::: + + +## Fields + +### `pipeline_name` + +Name of the pipeline. Defaults to uuid_v4() if not set + + +Type: `string` + +### `model_path` + +Path to the ONNX model directory. If `enable_model_download` is `true`, the model will be downloaded here. + + +Type: `string` +Default: `"/model_repository"` + +```yml +# Examples + +model_path: /path/to/models/my_model.onnx +``` + +### `onnx_library_path` + +The location of the ONNX Runtime dynamic library. + + +Type: `string` +Default: `"/usr/lib/onnxruntime.so"` + +### `onnx_filename` + +The filename of the model to run. Only necessary to specify when multiple .onnx files are present. + + +Type: `string` +Default: `""` + +```yml +# Examples + +onnx_filename: model.onnx +``` + +### `enable_model_download` + +If enabled, attempts to download an ONNX Runtime compatible model from HuggingFace specified in `model_name`. + + +Type: `bool` +Default: `false` + +### `model_download_options` + +Sorry! This field is missing documentation. + + +Type: `object` + +### `model_download_options.model_repository` + +The name of the huggingface model repository. + + +Type: `string` +Default: `""` + +```yml +# Examples + +model_repository: KnightsAnalytics/distilbert-NER + +model_repository: KnightsAnalytics/distilbert-base-uncased-finetuned-sst-2-english + +model_repository: sentence-transformers/all-MiniLM-L6-v2 +``` + +### `normalization` + +Whether to apply normalization in the feature extraction pipeline. + + +Type: `bool` +Default: `false` + +