From 8af72924a436d5d8d7dbc6c9a711656d6f43a1bd Mon Sep 17 00:00:00 2001 From: Abirdcfly Date: Fri, 5 Jan 2024 16:08:33 +0800 Subject: [PATCH] feat: add pgvector as vectorstore Signed-off-by: Abirdcfly --- .github/workflows/codespell.yaml | 2 +- .github/workflows/example_test.yaml | 1 + .github/workflows/pgvector_image_build.yml | 57 + api/base/v1alpha1/common.go | 22 +- api/base/v1alpha1/vectorstore.go | 18 +- api/base/v1alpha1/vectorstore_types.go | 15 + api/base/v1alpha1/zz_generated.deepcopy.go | 25 + ...cadia.kubeagi.k8s.com.cn_vectorstores.yaml | 41 + ...trievalqachain_knowledgebase_pgvector.yaml | 63 + ...rcadia_v1alpha1_datasource_postgresql.yaml | 3 +- ...cadia_v1alpha1_knowledgebase_pgvector.yaml | 23 + ...arcadia_v1alpha1_vectorstore_pgvector.yaml | 14 + controllers/base/datasource_controller.go | 15 +- controllers/base/knowledgebase_controller.go | 64 +- controllers/base/vectorstore_controller.go | 31 +- deploy/charts/arcadia/Chart.yaml | 2 +- ...cadia.kubeagi.k8s.com.cn_vectorstores.yaml | 41 + .../charts/arcadia/templates/controller.yaml | 2 + .../arcadia/templates/post-datasource.yaml | 3 +- .../arcadia/templates/post-vectorstore.yaml | 20 + deploy/charts/arcadia/values.yaml | 3 +- deploy/pgvector/Dockerfile | 69 + deploy/pgvector/README.md | 5 + deploy/pgvector/docker-compose.yml | 18 + deploy/pgvector/pgvector.sh | 25 + .../opt/bitnami/.bitnami_components.json | 8 + .../opt/bitnami/licenses/licenses.txt | 2 + .../opt/bitnami/scripts/libbitnami.sh | 53 + .../prebuildfs/opt/bitnami/scripts/libfile.sh | 141 ++ .../prebuildfs/opt/bitnami/scripts/libfs.sh | 193 +++ .../prebuildfs/opt/bitnami/scripts/libhook.sh | 18 + .../prebuildfs/opt/bitnami/scripts/liblog.sh | 114 ++ .../prebuildfs/opt/bitnami/scripts/libnet.sh | 165 ++ .../prebuildfs/opt/bitnami/scripts/libos.sh | 657 ++++++++ .../opt/bitnami/scripts/libpersistence.sh | 124 ++ .../opt/bitnami/scripts/libservice.sh | 496 ++++++ .../opt/bitnami/scripts/libvalidations.sh | 304 ++++ .../opt/bitnami/scripts/libversion.sh | 51 + .../opt/bitnami/scripts/libwebserver.sh | 476 ++++++ .../prebuildfs/usr/sbin/install_packages | 27 + .../pgvector/prebuildfs/usr/sbin/run-script | 24 + .../rootfs/opt/bitnami/scripts/libautoctl.sh | 308 ++++ .../opt/bitnami/scripts/libpostgresql.sh | 1323 +++++++++++++++++ .../scripts/locales/add-extra-locales.sh | 46 + .../opt/bitnami/scripts/postgresql-env.sh | 351 +++++ .../bitnami/scripts/postgresql/entrypoint.sh | 32 + .../bitnami/scripts/postgresql/postunpack.sh | 26 + .../bitnami/scripts/postgresql/run-autoctl.sh | 32 + .../opt/bitnami/scripts/postgresql/run.sh | 39 + .../opt/bitnami/scripts/postgresql/setup.sh | 49 + deploy/pgvector/run.sh | 51 + deploy/pgvector/tags-info.yaml | 5 + go.mod | 9 +- go.sum | 90 +- .../retriever/knowledgebaseretriever.go | 21 +- pkg/datasource/oss.go | 32 +- pkg/datasource/postgresql.go | 79 +- pkg/embeddings/zhipuai/zhipuai.go | 3 + pkg/llms/zhipuai/api.go | 4 +- pkg/utils/structured.go | 9 - pkg/vectorstore/vectorstore.go | 180 +++ tests/deploy-values.yaml | 3 +- tests/example-test.sh | 21 +- 63 files changed, 5940 insertions(+), 208 deletions(-) create mode 100644 .github/workflows/pgvector_image_build.yml create mode 100644 config/samples/app_retrievalqachain_knowledgebase_pgvector.yaml create mode 100644 config/samples/arcadia_v1alpha1_knowledgebase_pgvector.yaml create mode 100644 config/samples/arcadia_v1alpha1_vectorstore_pgvector.yaml create mode 100644 deploy/pgvector/Dockerfile create mode 100644 deploy/pgvector/README.md create mode 100644 deploy/pgvector/docker-compose.yml create mode 100755 deploy/pgvector/pgvector.sh create mode 100644 deploy/pgvector/prebuildfs/opt/bitnami/.bitnami_components.json create mode 100644 deploy/pgvector/prebuildfs/opt/bitnami/licenses/licenses.txt create mode 100644 deploy/pgvector/prebuildfs/opt/bitnami/scripts/libbitnami.sh create mode 100644 deploy/pgvector/prebuildfs/opt/bitnami/scripts/libfile.sh create mode 100644 deploy/pgvector/prebuildfs/opt/bitnami/scripts/libfs.sh create mode 100644 deploy/pgvector/prebuildfs/opt/bitnami/scripts/libhook.sh create mode 100644 deploy/pgvector/prebuildfs/opt/bitnami/scripts/liblog.sh create mode 100644 deploy/pgvector/prebuildfs/opt/bitnami/scripts/libnet.sh create mode 100644 deploy/pgvector/prebuildfs/opt/bitnami/scripts/libos.sh create mode 100644 deploy/pgvector/prebuildfs/opt/bitnami/scripts/libpersistence.sh create mode 100644 deploy/pgvector/prebuildfs/opt/bitnami/scripts/libservice.sh create mode 100644 deploy/pgvector/prebuildfs/opt/bitnami/scripts/libvalidations.sh create mode 100644 deploy/pgvector/prebuildfs/opt/bitnami/scripts/libversion.sh create mode 100644 deploy/pgvector/prebuildfs/opt/bitnami/scripts/libwebserver.sh create mode 100755 deploy/pgvector/prebuildfs/usr/sbin/install_packages create mode 100755 deploy/pgvector/prebuildfs/usr/sbin/run-script create mode 100644 deploy/pgvector/rootfs/opt/bitnami/scripts/libautoctl.sh create mode 100644 deploy/pgvector/rootfs/opt/bitnami/scripts/libpostgresql.sh create mode 100755 deploy/pgvector/rootfs/opt/bitnami/scripts/locales/add-extra-locales.sh create mode 100644 deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql-env.sh create mode 100755 deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/entrypoint.sh create mode 100755 deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/postunpack.sh create mode 100755 deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/run-autoctl.sh create mode 100755 deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/run.sh create mode 100755 deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/setup.sh create mode 100755 deploy/pgvector/run.sh create mode 100644 deploy/pgvector/tags-info.yaml create mode 100644 pkg/vectorstore/vectorstore.go diff --git a/.github/workflows/codespell.yaml b/.github/workflows/codespell.yaml index e3fc78428..5773b9afb 100644 --- a/.github/workflows/codespell.yaml +++ b/.github/workflows/codespell.yaml @@ -15,5 +15,5 @@ jobs: with: ignore_words_file: .github/.codespellignore check_filenames: true - skip: go.*,**/*.drawio,./deploy/charts/*,./config/crd/*,./deploy/llms/* + skip: go.*,**/*.drawio,./deploy/charts/*,./config/crd/*,./deploy/llms/*,./deploy/pgvector diff --git a/.github/workflows/example_test.yaml b/.github/workflows/example_test.yaml index 48e17bd92..2f40f3809 100644 --- a/.github/workflows/example_test.yaml +++ b/.github/workflows/example_test.yaml @@ -39,6 +39,7 @@ jobs: # see https://github.com/actions/runner/issues/662 if: steps.cache.outputs.cache-hit != 'true' - name: Setup Golang + if: steps.cache.outputs.cache-hit != 'true' uses: actions/setup-go@v5 with: go-version-file: 'go.mod' diff --git a/.github/workflows/pgvector_image_build.yml b/.github/workflows/pgvector_image_build.yml new file mode 100644 index 000000000..1953e5864 --- /dev/null +++ b/.github/workflows/pgvector_image_build.yml @@ -0,0 +1,57 @@ +name: Build pgvector images + +on: + pull_request: + branches: [main] + push: + branches: [main] + paths: + - 'deploy/pgvector/Dockerfile' + workflow_dispatch: + +env: + TAG: 16.1.0-debian-11-r18-pgvector-v0.5.1 + +jobs: + image: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + buildkitd-flags: --debug + config-inline: | + [worker.oci] + max-parallelism = 1 + - name: Login to the dockerhub Registry only push + if: github.event_name == 'push' + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_TOKEN }} + - name: only Build image + if: github.event_name != 'push' + uses: docker/build-push-action@v5 + with: + context: deploy/pgvector + file: deploy/pgvector/Dockerfile + platforms: linux/amd64 + tags: | + kubeagi/postgresql:latest + kubeagi/postgresql:${{ env.TAG }} + push: false + load: true + - name: Build and export to Docker + if: github.event_name == 'push' + uses: docker/build-push-action@v5 + with: + context: deploy/pgvector + file: deploy/pgvector/Dockerfile + platforms: linux/amd64,linux/arm64 + tags: | + kubeagi/postgresql:latest + kubeagi/postgresql:${{ env.TAG }} + push: true diff --git a/api/base/v1alpha1/common.go b/api/base/v1alpha1/common.go index d0c6f6e3f..30f8c9182 100644 --- a/api/base/v1alpha1/common.go +++ b/api/base/v1alpha1/common.go @@ -183,28 +183,36 @@ type CommonSpec struct { Description string `json:"description,omitempty"` } -func (endpoint Endpoint) AuthAPIKey(ctx context.Context, ns string, c client.Client, cli dynamic.Interface) (string, error) { +func (endpoint Endpoint) AuthData(ctx context.Context, ns string, c client.Client, cli dynamic.Interface) (map[string][]byte, error) { if endpoint.AuthSecret == nil { - return "", nil + return nil, nil } if err := utils.ValidateClient(c, cli); err != nil { - return "", err + return nil, err } authSecret := &corev1.Secret{} if c != nil { if err := c.Get(ctx, types.NamespacedName{Name: endpoint.AuthSecret.Name, Namespace: endpoint.AuthSecret.GetNamespace(ns)}, authSecret); err != nil { - return "", err + return nil, err } } else { obj, err := cli.Resource(schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"}). Namespace(endpoint.AuthSecret.GetNamespace(ns)).Get(ctx, endpoint.AuthSecret.Name, metav1.GetOptions{}) if err != nil { - return "", err + return nil, err } err = runtime.DefaultUnstructuredConverter.FromUnstructured(obj.UnstructuredContent(), authSecret) if err != nil { - return "", err + return nil, err } } - return string(authSecret.Data["apiKey"]), nil + return authSecret.Data, nil +} + +func (endpoint Endpoint) AuthAPIKey(ctx context.Context, ns string, c client.Client, cli dynamic.Interface) (string, error) { + data, err := endpoint.AuthData(ctx, ns, c, cli) + if err != nil { + return "", err + } + return string(data["apiKey"]), nil } diff --git a/api/base/v1alpha1/vectorstore.go b/api/base/v1alpha1/vectorstore.go index 604dbc454..b86d33fb8 100644 --- a/api/base/v1alpha1/vectorstore.go +++ b/api/base/v1alpha1/vectorstore.go @@ -28,20 +28,20 @@ const ( type VectorStoreType string const ( - VectorStoreTypeChroma VectorStoreType = "chroma" - VectorStoreTypeUnknown VectorStoreType = "unknown" + VectorStoreTypeChroma VectorStoreType = "chroma" + VectorStoreTypePGVector VectorStoreType = "pgvector" + VectorStoreTypeUnknown VectorStoreType = "unknown" ) func (vs VectorStoreSpec) Type() VectorStoreType { - if vs.Endpoint == nil { - return VectorStoreTypeUnknown - } - - if vs.Chroma != nil { + switch { + case vs.Chroma != nil: return VectorStoreTypeChroma + case vs.PGVector != nil: + return VectorStoreTypePGVector + default: + return VectorStoreTypeUnknown } - - return VectorStoreTypeUnknown } func (vs *VectorStore) InitCondition() Condition { diff --git a/api/base/v1alpha1/vectorstore_types.go b/api/base/v1alpha1/vectorstore_types.go index 6a5d95920..40a00889d 100644 --- a/api/base/v1alpha1/vectorstore_types.go +++ b/api/base/v1alpha1/vectorstore_types.go @@ -29,6 +29,8 @@ type VectorStoreSpec struct { Endpoint *Endpoint `json:"endpoint,omitempty"` Chroma *Chroma `json:"chroma,omitempty"` + + PGVector *PGVector `json:"pgvector,omitempty"` } // Chroma defines the configuration of Chroma @@ -36,6 +38,19 @@ type Chroma struct { DistanceFunction chromago.DistanceFunction `json:"distanceFunction,omitempty"` } +type PGVector struct { + // PreDeleteCollection defines if the collection should be deleted before creating. + PreDeleteCollection bool `json:"preDeleteCollection,omitempty"` + // CollectionName defines the name of the collection + CollectionName string `json:"collectionName,omitempty"` + // EmbeddingTableName defines the name of the embedding table. if empty, use `langchain_pg_embedding` + EmbeddingTableName string `json:"embeddingTableName,omitempty"` + // CollectionTableName defines the name of the collection table. if empty, use `langchain_pg_collection` + CollectionTableName string `json:"collectionTableName,omitempty"` + // DataSourceRef defines the reference of the data source + DataSourceRef *TypedObjectReference `json:"dataSourceRef,omitempty"` +} + // VectorStoreStatus defines the observed state of VectorStore type VectorStoreStatus struct { // ConditionedStatus is the current status diff --git a/api/base/v1alpha1/zz_generated.deepcopy.go b/api/base/v1alpha1/zz_generated.deepcopy.go index 3c8c3c5d3..3ed9a4f39 100644 --- a/api/base/v1alpha1/zz_generated.deepcopy.go +++ b/api/base/v1alpha1/zz_generated.deepcopy.go @@ -964,6 +964,26 @@ func (in *OSS) DeepCopy() *OSS { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PGVector) DeepCopyInto(out *PGVector) { + *out = *in + if in.DataSourceRef != nil { + in, out := &in.DataSourceRef, &out.DataSourceRef + *out = new(TypedObjectReference) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PGVector. +func (in *PGVector) DeepCopy() *PGVector { + if in == nil { + return nil + } + out := new(PGVector) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PostgreSQL) DeepCopyInto(out *PostgreSQL) { *out = *in @@ -1229,6 +1249,11 @@ func (in *VectorStoreSpec) DeepCopyInto(out *VectorStoreSpec) { *out = new(Chroma) **out = **in } + if in.PGVector != nil { + in, out := &in.PGVector, &out.PGVector + *out = new(PGVector) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VectorStoreSpec. diff --git a/config/crd/bases/arcadia.kubeagi.k8s.com.cn_vectorstores.yaml b/config/crd/bases/arcadia.kubeagi.k8s.com.cn_vectorstores.yaml index d64d3a994..3e2071fef 100644 --- a/config/crd/bases/arcadia.kubeagi.k8s.com.cn_vectorstores.yaml +++ b/config/crd/bases/arcadia.kubeagi.k8s.com.cn_vectorstores.yaml @@ -93,6 +93,47 @@ spec: description: URL for this endpoint type: string type: object + pgvector: + properties: + collectionName: + description: CollectionName defines the name of the collection + type: string + collectionTableName: + description: CollectionTableName defines the name of the collection + table. if empty, use `langchain_pg_collection` + type: string + dataSourceRef: + description: DataSourceRef defines the reference of the data source + properties: + apiGroup: + description: APIGroup is the group for the resource being + referenced. If APIGroup is not specified, the specified + Kind must be in the core API group. For any other third-party + types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + namespace: + description: Namespace is the namespace of resource being + referenced + type: string + required: + - kind + - name + type: object + embeddingTableName: + description: EmbeddingTableName defines the name of the embedding + table. if empty, use `langchain_pg_embedding` + type: string + preDeleteCollection: + description: PreDeleteCollection defines if the collection should + be deleted before creating. + type: boolean + type: object type: object status: description: VectorStoreStatus defines the observed state of VectorStore diff --git a/config/samples/app_retrievalqachain_knowledgebase_pgvector.yaml b/config/samples/app_retrievalqachain_knowledgebase_pgvector.yaml new file mode 100644 index 000000000..1d4cc9f04 --- /dev/null +++ b/config/samples/app_retrievalqachain_knowledgebase_pgvector.yaml @@ -0,0 +1,63 @@ +apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1 +kind: Application +metadata: + name: base-chat-with-knowledgebase-pgvector + namespace: arcadia +spec: + displayName: "知识库应用" + description: "最简单的和知识库对话的应用" + prologue: "Welcome to talk to the KnowledgeBase!🤖" + nodes: + - name: Input + displayName: "用户输入" + description: "用户输入节点,必须" + ref: + kind: Input + name: Input + nextNodeName: ["prompt-node"] + - name: prompt-node + displayName: "prompt" + description: "设定prompt,template中可以使用{{xx}}来替换变量" + ref: + apiGroup: prompt.arcadia.kubeagi.k8s.com.cn + kind: Prompt + name: base-chat-with-knowledgebase + nextNodeName: ["chain-node"] + - name: llm-node + displayName: "zhipu大模型服务" + description: "设定大模型的访问信息" + ref: + apiGroup: arcadia.kubeagi.k8s.com.cn + kind: LLM + name: app-shared-llm-service + nextNodeName: ["chain-node"] + - name: knowledgebase-node + displayName: "使用的知识库" + description: "要用哪个知识库" + ref: + apiGroup: arcadia.kubeagi.k8s.com.cn + kind: KnowledgeBase + name: knowledgebase-sample-pgvector + nextNodeName: ["retriever-node"] + - name: retriever-node + displayName: "从知识库提取信息的retriever" + description: "连接应用和知识库" + ref: + apiGroup: retriever.arcadia.kubeagi.k8s.com.cn + kind: KnowledgeBaseRetriever + name: base-chat-with-knowledgebase + nextNodeName: ["chain-node"] + - name: chain-node + displayName: "RetrievalQA chain" + description: "chain是langchain的核心概念,RetrievalQAChain用于从 retriever 中提取信息,供llm调用" + ref: + apiGroup: chain.arcadia.kubeagi.k8s.com.cn + kind: RetrievalQAChain + name: base-chat-with-knowledgebase + nextNodeName: ["Output"] + - name: Output + displayName: "最终输出" + description: "最终输出节点,必须" + ref: + kind: Output + name: Output diff --git a/config/samples/arcadia_v1alpha1_datasource_postgresql.yaml b/config/samples/arcadia_v1alpha1_datasource_postgresql.yaml index efe58721f..9ec9d9086 100644 --- a/config/samples/arcadia_v1alpha1_datasource_postgresql.yaml +++ b/config/samples/arcadia_v1alpha1_datasource_postgresql.yaml @@ -5,7 +5,7 @@ metadata: name: datasource-postgresql-sample-authsecret namespace: arcadia data: - PGUSER: YWRtaW4= + PGUSER: cG9zdGdyZXM= PGPASSWORD: UGFzc3cwcmQh --- apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1 @@ -20,5 +20,6 @@ spec: authSecret: kind: Secret name: datasource-postgresql-sample-authsecret + namespace: arcadia postgresql: PGDATABASE: arcadia diff --git a/config/samples/arcadia_v1alpha1_knowledgebase_pgvector.yaml b/config/samples/arcadia_v1alpha1_knowledgebase_pgvector.yaml new file mode 100644 index 000000000..e63c8f2b0 --- /dev/null +++ b/config/samples/arcadia_v1alpha1_knowledgebase_pgvector.yaml @@ -0,0 +1,23 @@ +apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1 +kind: KnowledgeBase +metadata: + name: knowledgebase-sample-pgvector + namespace: arcadia +spec: + displayName: "测试 KnowledgeBase" + description: "测试 KnowledgeBase" + embedder: + kind: Embedders + name: zhipuai-embedders-sample + namespace: arcadia + vectorStore: + kind: VectorStores + name: pgvector-sample + namespace: arcadia + fileGroups: + - source: + kind: VersionedDataset + name: dataset-playground-v1 + namespace: arcadia + paths: + - qa.csv diff --git a/config/samples/arcadia_v1alpha1_vectorstore_pgvector.yaml b/config/samples/arcadia_v1alpha1_vectorstore_pgvector.yaml new file mode 100644 index 000000000..fa52d0f30 --- /dev/null +++ b/config/samples/arcadia_v1alpha1_vectorstore_pgvector.yaml @@ -0,0 +1,14 @@ +apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1 +kind: VectorStore +metadata: + name: pgvector-sample + namespace: arcadia +spec: + displayName: "测试 PGVector VectorStore" + description: "测试 PGvector VectorStore" + pgvector: + dataSourceRef: + apiGroup: arcadia.kubeagi.k8s.com.cn + kind: Datasource + name: arcadia-postgresql + namespace: arcadia diff --git a/controllers/base/datasource_controller.go b/controllers/base/datasource_controller.go index c6f62578d..39a4a58b3 100644 --- a/controllers/base/datasource_controller.go +++ b/controllers/base/datasource_controller.go @@ -88,7 +88,7 @@ func (r *DatasourceReconciler) Reconcile(ctx context.Context, req ctrl.Request) // indicated by the deletion timestamp being set. if instance.GetDeletionTimestamp() != nil && controllerutil.ContainsFinalizer(instance, arcadiav1alpha1.Finalizer) { logger.Info("Performing Finalizer Operations for Datasource before delete CR") - // TODO perform the finalizer operations here, for example: remove data? + r.RemoveDatasource(logger, instance) logger.Info("Removing Finalizer for Datasource after successfully performing the operations") controllerutil.RemoveFinalizer(instance, arcadiav1alpha1.Finalizer) if err := r.Update(ctx, instance); err != nil { @@ -172,7 +172,7 @@ func (r *DatasourceReconciler) Checkdatasource(ctx context.Context, logger logr. case arcadiav1alpha1.DatasourceTypeRDMA: return r.UpdateStatus(ctx, instance, nil) case arcadiav1alpha1.DatasourceTypePostgreSQL: - ds, err = datasource.NewPostgreSQL(ctx, r.Client, nil, instance.Spec.PostgreSQL, endpoint) + ds, err = datasource.GetPostgreSQLPool(ctx, r.Client, nil, instance) if err != nil { return r.UpdateStatus(ctx, instance, err) } @@ -219,3 +219,14 @@ func (r *DatasourceReconciler) UpdateStatus(ctx context.Context, instance *arcad instanceCopy.Status.SetConditions(newCondition) return r.Client.Status().Update(ctx, instanceCopy) } + +func (r *DatasourceReconciler) RemoveDatasource(logger logr.Logger, instance *arcadiav1alpha1.Datasource) { + logger.V(5).Info("remove datasource") + switch instance.Spec.Type() { + case arcadiav1alpha1.DatasourceTypeOSS: + case arcadiav1alpha1.DatasourceTypeRDMA: + case arcadiav1alpha1.DatasourceTypePostgreSQL: + datasource.RemovePostgreSQLPool(*instance) + default: + } +} diff --git a/controllers/base/knowledgebase_controller.go b/controllers/base/knowledgebase_controller.go index cdc536bc1..d14da6d66 100644 --- a/controllers/base/knowledgebase_controller.go +++ b/controllers/base/knowledgebase_controller.go @@ -31,7 +31,6 @@ import ( "github.com/tmc/langchaingo/documentloaders" "github.com/tmc/langchaingo/schema" "github.com/tmc/langchaingo/textsplitter" - "github.com/tmc/langchaingo/vectorstores/chroma" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -46,6 +45,7 @@ import ( pkgdocumentloaders "github.com/kubeagi/arcadia/pkg/documentloaders" "github.com/kubeagi/arcadia/pkg/langchainwrap" "github.com/kubeagi/arcadia/pkg/utils" + "github.com/kubeagi/arcadia/pkg/vectorstore" ) const ( @@ -158,7 +158,7 @@ func (r *KnowledgeBaseReconciler) reconcile(ctx context.Context, log logr.Logger // Observe generation change if kb.Status.ObservedGeneration != kb.Generation { kb.Status.ObservedGeneration = kb.Generation - r.setCondition(kb, kb.InitCondition()) + kb = r.setCondition(kb, kb.InitCondition()) if updateStatusErr := r.patchStatus(ctx, kb); updateStatusErr != nil { log.Error(updateStatusErr, "unable to update status after generation update") return kb, ctrl.Result{Requeue: true}, updateStatusErr @@ -173,27 +173,27 @@ func (r *KnowledgeBaseReconciler) reconcile(ctx context.Context, log logr.Logger vectorStoreReq := kb.Spec.VectorStore fileGroupsReq := kb.Spec.FileGroups if embedderReq == nil || vectorStoreReq == nil || len(fileGroupsReq) == 0 { - r.setCondition(kb, kb.PendingCondition("embedder or vectorstore or filegroups is not setting")) + kb = r.setCondition(kb, kb.PendingCondition("embedder or vectorstore or filegroups is not setting")) return kb, ctrl.Result{}, nil } embedder := &arcadiav1alpha1.Embedder{} if err := r.Get(ctx, types.NamespacedName{Name: kb.Spec.Embedder.Name, Namespace: kb.Spec.Embedder.GetNamespace(kb.GetNamespace())}, embedder); err != nil { if apierrors.IsNotFound(err) { - r.setCondition(kb, kb.PendingCondition("embedder is not found")) + kb = r.setCondition(kb, kb.PendingCondition("embedder is not found")) return kb, ctrl.Result{RequeueAfter: waitLonger}, nil } - r.setCondition(kb, kb.ErrorCondition(err.Error())) + kb = r.setCondition(kb, kb.ErrorCondition(err.Error())) return kb, ctrl.Result{}, err } vectorStore := &arcadiav1alpha1.VectorStore{} if err := r.Get(ctx, types.NamespacedName{Name: kb.Spec.VectorStore.Name, Namespace: kb.Spec.VectorStore.GetNamespace(kb.GetNamespace())}, vectorStore); err != nil { if apierrors.IsNotFound(err) { - r.setCondition(kb, kb.PendingCondition("vectorStore is not found")) + kb = r.setCondition(kb, kb.PendingCondition("vectorStore is not found")) return kb, ctrl.Result{RequeueAfter: waitLonger}, nil } - r.setCondition(kb, kb.ErrorCondition(err.Error())) + kb = r.setCondition(kb, kb.ErrorCondition(err.Error())) return kb, ctrl.Result{}, err } @@ -205,18 +205,18 @@ func (r *KnowledgeBaseReconciler) reconcile(ctx context.Context, log logr.Logger } } if err := errors.Join(errs...); err != nil { - r.setCondition(kb, kb.ErrorCondition(err.Error())) - return kb, ctrl.Result{RequeueAfter: waitLonger}, nil + kb = r.setCondition(kb, kb.ErrorCondition(err.Error())) + return kb, ctrl.Result{RequeueAfter: waitMedium}, nil } else { for _, fileGroupDetail := range kb.Status.FileGroupDetail { for _, fileDetail := range fileGroupDetail.FileDetails { if fileDetail.Phase == arcadiav1alpha1.FileProcessPhaseFailed && fileDetail.ErrMessage != "" { - r.setCondition(kb, kb.ErrorCondition(fileDetail.ErrMessage)) - return kb, ctrl.Result{RequeueAfter: waitLonger}, nil + kb = r.setCondition(kb, kb.ErrorCondition(fileDetail.ErrMessage)) + return kb, ctrl.Result{RequeueAfter: waitMedium}, nil } } } - r.setCondition(kb, kb.ReadyCondition()) + kb = r.setCondition(kb, kb.ReadyCondition()) } return kb, ctrl.Result{}, nil @@ -461,20 +461,16 @@ func (r *KnowledgeBaseReconciler) handleFile(ctx context.Context, log logr.Logge for i, doc := range documents { log.V(5).Info(fmt.Sprintf("document[%d]: embedding:%s, metadata:%v", i, doc.PageContent, doc.Metadata)) } - switch store.Spec.Type() { // nolint: gocritic - case arcadiav1alpha1.VectorStoreTypeChroma: - s, err := chroma.New( - chroma.WithChromaURL(store.Spec.Endpoint.URL), - chroma.WithDistanceFunction(store.Spec.Chroma.DistanceFunction), - chroma.WithNameSpace(kb.VectorStoreCollectionName()), - chroma.WithEmbedder(em), - ) - if err != nil { - return err - } - if _, err = s.AddDocuments(ctx, documents); err != nil { - return err - } + s, finish, err := vectorstore.NewVectorStore(ctx, store, em, kb.VectorStoreCollectionName(), r.Client, nil) + if err != nil { + return err + } + log.Info("handle file: add documents to embedder") + if _, err = s.AddDocuments(ctx, documents); err != nil { + return err + } + if finish != nil { + finish() } log.Info("handle file succeeded") return nil @@ -493,21 +489,7 @@ func (r *KnowledgeBaseReconciler) reconcileDelete(ctx context.Context, log logr. log.Error(err, "reconcile delete: get vector store error, may leave garbage data") return } - switch vectorStore.Spec.Type() { // nolint: gocritic - case arcadiav1alpha1.VectorStoreTypeChroma: - s, err := chroma.New( - chroma.WithChromaURL(vectorStore.Spec.Endpoint.URL), - chroma.WithNameSpace(kb.VectorStoreCollectionName()), - // workaround to fix 'invalid options: missing embedder or openai api key' - chroma.WithOpenAiAPIKey("fake-api-key"), - ) - if err != nil { - log.Error(err, "reconcile delete: init vector store error, may leave garbage data") - } - if err = s.RemoveCollection(); err != nil { - log.Error(err, "reconcile delete: remove vector store error, may leave garbage data") - } - } + _ = vectorstore.RemoveCollection(ctx, log, vectorStore, kb.VectorStoreCollectionName()) } func (r *KnowledgeBaseReconciler) hasHandledPathKey(kb *arcadiav1alpha1.KnowledgeBase, filegroup arcadiav1alpha1.FileGroup, path string) string { diff --git a/controllers/base/vectorstore_controller.go b/controllers/base/vectorstore_controller.go index 772362bbc..a41534181 100644 --- a/controllers/base/vectorstore_controller.go +++ b/controllers/base/vectorstore_controller.go @@ -21,7 +21,6 @@ import ( "reflect" "github.com/go-logr/logr" - "github.com/tmc/langchaingo/vectorstores/chroma" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" @@ -31,6 +30,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" arcadiav1alpha1 "github.com/kubeagi/arcadia/api/base/v1alpha1" + "github.com/kubeagi/arcadia/pkg/vectorstore" ) // VectorStoreReconciler reconciles a VectorStore object @@ -124,27 +124,28 @@ func (r *VectorStoreReconciler) SetupWithManager(mgr ctrl.Manager) error { func (r *VectorStoreReconciler) CheckVectorStore(ctx context.Context, log logr.Logger, vs *arcadiav1alpha1.VectorStore) (err error) { log.V(5).Info("check vectorstore") - switch vs.Spec.Type() { - case arcadiav1alpha1.VectorStoreTypeChroma: - _, err = chroma.New( - chroma.WithOpenAiAPIKey("fake_key_just_for_chroma_heartbeat"), - chroma.WithChromaURL(vs.Spec.Endpoint.URL), - chroma.WithDistanceFunction(vs.Spec.Chroma.DistanceFunction), - ) - if err != nil { - log.Error(err, "failed to connect to vectorstore") - r.setCondition(vs, vs.ErrorCondition(err.Error())) - } else { - r.setCondition(vs, vs.ReadyCondition()) + vsRaw := vs.DeepCopy() + _, finish, err := vectorstore.NewVectorStore(ctx, vs, nil, "", r.Client, nil) + if err != nil { + log.Error(err, "failed to connect to vectorstore") + r.setCondition(vs, vs.ErrorCondition(err.Error())) + } else { + r.setCondition(vs, vs.ReadyCondition()) + if finish != nil { + finish() } - default: - r.setCondition(vs, vs.ErrorCondition("unsupported vectorstore type")) } if err := r.patchStatus(ctx, vs); err != nil { return err } + if !reflect.DeepEqual(vsRaw, vs) { + if err := r.Patch(ctx, vs, client.MergeFrom(vsRaw)); err != nil { + return err + } + } return err } + func (r *VectorStoreReconciler) setCondition(vs *arcadiav1alpha1.VectorStore, condition ...arcadiav1alpha1.Condition) *arcadiav1alpha1.VectorStore { vs.Status.SetConditions(condition...) return vs diff --git a/deploy/charts/arcadia/Chart.yaml b/deploy/charts/arcadia/Chart.yaml index 5175580ca..8cbd8ad92 100644 --- a/deploy/charts/arcadia/Chart.yaml +++ b/deploy/charts/arcadia/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: arcadia description: A Helm chart(KubeBB Component) for KubeAGI Arcadia type: application -version: 0.2.7 +version: 0.2.8 appVersion: "0.1.0" keywords: diff --git a/deploy/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_vectorstores.yaml b/deploy/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_vectorstores.yaml index d64d3a994..3e2071fef 100644 --- a/deploy/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_vectorstores.yaml +++ b/deploy/charts/arcadia/crds/arcadia.kubeagi.k8s.com.cn_vectorstores.yaml @@ -93,6 +93,47 @@ spec: description: URL for this endpoint type: string type: object + pgvector: + properties: + collectionName: + description: CollectionName defines the name of the collection + type: string + collectionTableName: + description: CollectionTableName defines the name of the collection + table. if empty, use `langchain_pg_collection` + type: string + dataSourceRef: + description: DataSourceRef defines the reference of the data source + properties: + apiGroup: + description: APIGroup is the group for the resource being + referenced. If APIGroup is not specified, the specified + Kind must be in the core API group. For any other third-party + types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + namespace: + description: Namespace is the namespace of resource being + referenced + type: string + required: + - kind + - name + type: object + embeddingTableName: + description: EmbeddingTableName defines the name of the embedding + table. if empty, use `langchain_pg_embedding` + type: string + preDeleteCollection: + description: PreDeleteCollection defines if the collection should + be deleted before creating. + type: boolean + type: object type: object status: description: VectorStoreStatus defines the observed state of VectorStore diff --git a/deploy/charts/arcadia/templates/controller.yaml b/deploy/charts/arcadia/templates/controller.yaml index e1759c087..dd800303d 100644 --- a/deploy/charts/arcadia/templates/controller.yaml +++ b/deploy/charts/arcadia/templates/controller.yaml @@ -27,6 +27,8 @@ spec: containers: - command: - /manager + - -zap-log-level={{ .Values.controller.loglevel }} + - -v={{ .Values.controller.loglevel }} env: - name: POD_NAME valueFrom: diff --git a/deploy/charts/arcadia/templates/post-datasource.yaml b/deploy/charts/arcadia/templates/post-datasource.yaml index 022d0658e..10dbafd05 100644 --- a/deploy/charts/arcadia/templates/post-datasource.yaml +++ b/deploy/charts/arcadia/templates/post-datasource.yaml @@ -39,6 +39,7 @@ spec: authSecret: kind: Secret name: {{ .Release.Name }}-postgresql-datasource-authsecret + namespace: {{ .Release.Namespace }} postgresql: PGDATABASE: {{ .Values.postgresql.global.postgresql.auth.database }} --- @@ -49,6 +50,6 @@ metadata: namespace: {{ .Release.Namespace }} type: Opaque data: - PGUSER: {{ .Values.postgresql.global.postgresql.auth.username | b64enc | quote }} + PGUSER: {{ "postgres" | b64enc | quote }} PGPASSWORD: {{ .Values.postgresql.global.postgresql.auth.password | b64enc | quote }} {{- end }} diff --git a/deploy/charts/arcadia/templates/post-vectorstore.yaml b/deploy/charts/arcadia/templates/post-vectorstore.yaml index 75144b8e5..7d3905e0f 100644 --- a/deploy/charts/arcadia/templates/post-vectorstore.yaml +++ b/deploy/charts/arcadia/templates/post-vectorstore.yaml @@ -14,3 +14,23 @@ spec: chroma: distanceFunction: cosine +{{- if .Values.postgresql.enabled }} +--- +apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1 +kind: VectorStore +metadata: + name: {{ .Release.Name }}-pgvector-vectorstore + namespace: {{ .Release.Namespace }} + annotations: + "helm.sh/hook": post-install + "helm.sh/hook-weight": "1" +spec: + displayName: "pgvector 向量数据库" + description: "pgvector 类型的向量数据库" + pgvector: + dataSourceRef: + apiGroup: arcadia.kubeagi.k8s.com.cn + kind: Datasource + name: {{ .Release.Name }}-postgresql + namespace: {{ .Release.Namespace }} +{{- end }} diff --git a/deploy/charts/arcadia/values.yaml b/deploy/charts/arcadia/values.yaml index 654b57ffa..e220c6fbc 100644 --- a/deploy/charts/arcadia/values.yaml +++ b/deploy/charts/arcadia/values.yaml @@ -7,6 +7,7 @@ global: # @param imagePullPolcy ImagePullPolicy # @param resources Resources to be used controller: + loglevel: 3 image: kubeagi/arcadia:latest imagePullPolicy: IfNotPresent resources: @@ -140,7 +141,7 @@ postgresql: image: registry: docker.io repository: kubeagi/postgresql - tag: 16.1.0-debian-11-r15 + tag: 16.1.0-debian-11-r18-pgvector-v0.5.1 pullPolicy: IfNotPresent primary: initdb: diff --git a/deploy/pgvector/Dockerfile b/deploy/pgvector/Dockerfile new file mode 100644 index 000000000..87e7289bd --- /dev/null +++ b/deploy/pgvector/Dockerfile @@ -0,0 +1,69 @@ +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 + +FROM docker.io/bitnami/minideb:bullseye + +ARG EXTRA_LOCALES +ARG TARGETARCH +ARG WITH_ALL_LOCALES="no" + +LABEL com.vmware.cp.artifact.flavor="sha256:1e1b4657a77f0d47e9220f0c37b9bf7802581b93214fff7d1bd2364c8bf22e8e" \ + org.opencontainers.image.base.name="docker.io/bitnami/minideb:bullseye" \ + org.opencontainers.image.created="2023-12-31T11:04:56Z" \ + org.opencontainers.image.description="Application packaged by VMware, Inc" \ + org.opencontainers.image.licenses="Apache-2.0" \ + org.opencontainers.image.ref.name="16.1.0-debian-11-r18" \ + org.opencontainers.image.title="postgresql" \ + org.opencontainers.image.vendor="VMware, Inc." \ + org.opencontainers.image.version="16.1.0" + +ENV HOME="/" \ + OS_ARCH="${TARGETARCH:-amd64}" \ + OS_FLAVOUR="debian-11" \ + OS_NAME="linux" + +COPY prebuildfs / +SHELL ["/bin/bash", "-o", "errexit", "-o", "nounset", "-o", "pipefail", "-c"] +# Install required system packages and dependencies +RUN install_packages ca-certificates curl libbsd0 libbz2-1.0 libedit2 libffi7 libgcc-s1 libgmp10 libgnutls30 libhogweed6 libicu67 libidn2-0 libldap-2.4-2 liblz4-1 liblzma5 libmd0 libnettle8 libp11-kit0 libpcre3 libreadline8 libsasl2-2 libsqlite3-0 libssl1.1 libstdc++6 libtasn1-6 libtinfo6 libunistring2 libuuid1 libxml2 libxslt1.1 libzstd1 locales procps zlib1g +RUN mkdir -p /tmp/bitnami/pkg/cache/ ; cd /tmp/bitnami/pkg/cache/ ; \ + COMPONENTS=( \ + "postgresql-16.1.0-20-linux-${OS_ARCH}-debian-11" \ + ) ; \ + for COMPONENT in "${COMPONENTS[@]}"; do \ + if [ ! -f "${COMPONENT}.tar.gz" ]; then \ + curl -SsLf "https://downloads.bitnami.com/files/stacksmith/${COMPONENT}.tar.gz" -O ; \ + curl -SsLf "https://downloads.bitnami.com/files/stacksmith/${COMPONENT}.tar.gz.sha256" -O ; \ + fi ; \ + sha256sum -c "${COMPONENT}.tar.gz.sha256" ; \ + tar -zxf "${COMPONENT}.tar.gz" -C /opt/bitnami --strip-components=2 --no-same-owner --wildcards '*/files' ; \ + rm -rf "${COMPONENT}".tar.gz{,.sha256} ; \ + done +RUN apt-get autoremove --purge -y curl && \ + apt-get update && apt-get upgrade -y && \ + apt-get clean && rm -rf /var/lib/apt/lists /var/cache/apt/archives +RUN chmod g+rwX /opt/bitnami +RUN localedef -c -f UTF-8 -i en_US en_US.UTF-8 +RUN update-locale LANG=C.UTF-8 LC_MESSAGES=POSIX && \ + DEBIAN_FRONTEND=noninteractive dpkg-reconfigure locales +RUN echo 'en_GB.UTF-8 UTF-8' >> /etc/locale.gen && locale-gen +RUN echo 'en_US.UTF-8 UTF-8' >> /etc/locale.gen && locale-gen + +COPY rootfs / +RUN /opt/bitnami/scripts/postgresql/postunpack.sh +RUN /opt/bitnami/scripts/locales/add-extra-locales.sh +ENV APP_VERSION="16.1.0" \ + BITNAMI_APP_NAME="postgresql" \ + LANG="en_US.UTF-8" \ + LANGUAGE="en_US:en" \ + NSS_WRAPPER_LIB="/opt/bitnami/common/lib/libnss_wrapper.so" \ + PATH="/opt/bitnami/postgresql/bin:$PATH" + +VOLUME [ "/bitnami/postgresql", "/docker-entrypoint-initdb.d", "/docker-entrypoint-preinitdb.d" ] + +EXPOSE 5432 +COPY pgvector.sh . +RUN bash -x pgvector.sh +USER 1001 +ENTRYPOINT [ "/opt/bitnami/scripts/postgresql/entrypoint.sh" ] +CMD [ "/opt/bitnami/scripts/postgresql/run.sh" ] diff --git a/deploy/pgvector/README.md b/deploy/pgvector/README.md new file mode 100644 index 000000000..5fdaeb52a --- /dev/null +++ b/deploy/pgvector/README.md @@ -0,0 +1,5 @@ +# build pgvector image based on bitnami/postgresql + +use `run.sh` to simplify your work flow. + +Also, you may need update `env.TAG` in line 12 in `.github/workflows/pgvector_image_build.yml` diff --git a/deploy/pgvector/docker-compose.yml b/deploy/pgvector/docker-compose.yml new file mode 100644 index 000000000..a16e90573 --- /dev/null +++ b/deploy/pgvector/docker-compose.yml @@ -0,0 +1,18 @@ +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 + +version: '2' + +services: + postgresql: + image: docker.io/bitnami/postgresql:16 + ports: + - '5432:5432' + volumes: + - 'postgresql_data:/bitnami/postgresql' + environment: + - 'ALLOW_EMPTY_PASSWORD=yes' + +volumes: + postgresql_data: + driver: local diff --git a/deploy/pgvector/pgvector.sh b/deploy/pgvector/pgvector.sh new file mode 100755 index 000000000..7b43a627a --- /dev/null +++ b/deploy/pgvector/pgvector.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -e +# install git +apt-get update +apt-get install -y git +# clone pgvector +git clone --branch v0.5.1 https://github.com/pgvector/pgvector.git /tmp/pgvector +# install pg packages for make install pgvector +apt install -y postgresql-common gnupg2 +export YES=yes && /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh +apt-get update +apt-mark hold locales +apt-get install -y --no-install-recommends build-essential postgresql-server-dev-16 +# build pgvector +cd /tmp/pgvector +make clean +make OPTFLAGS="" +make install +mkdir /usr/share/doc/pgvector +cp LICENSE README.md /usr/share/doc/pgvector +rm -r /tmp/pgvector +apt-get remove -y build-essential postgresql-server-dev-16 +apt-get autoremove -y +apt-mark unhold locales +rm -rf /var/lib/apt/lists/* diff --git a/deploy/pgvector/prebuildfs/opt/bitnami/.bitnami_components.json b/deploy/pgvector/prebuildfs/opt/bitnami/.bitnami_components.json new file mode 100644 index 000000000..ea81047b8 --- /dev/null +++ b/deploy/pgvector/prebuildfs/opt/bitnami/.bitnami_components.json @@ -0,0 +1,8 @@ +{ + "postgresql": { + "arch": "amd64", + "distro": "debian-11", + "type": "NAMI", + "version": "16.1.0-20" + } +} \ No newline at end of file diff --git a/deploy/pgvector/prebuildfs/opt/bitnami/licenses/licenses.txt b/deploy/pgvector/prebuildfs/opt/bitnami/licenses/licenses.txt new file mode 100644 index 000000000..76956b38e --- /dev/null +++ b/deploy/pgvector/prebuildfs/opt/bitnami/licenses/licenses.txt @@ -0,0 +1,2 @@ +Bitnami containers ship with software bundles. You can find the licenses under: +/opt/bitnami/[name-of-bundle]/licenses/[bundle-version].txt diff --git a/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libbitnami.sh b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libbitnami.sh new file mode 100644 index 000000000..3853c789b --- /dev/null +++ b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libbitnami.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 +# +# Bitnami custom library + +# shellcheck disable=SC1091 + +# Load Generic Libraries +. /opt/bitnami/scripts/liblog.sh + +# Constants +BOLD='\033[1m' + +# Functions + +######################## +# Print the welcome page +# Globals: +# DISABLE_WELCOME_MESSAGE +# BITNAMI_APP_NAME +# Arguments: +# None +# Returns: +# None +######################### +print_welcome_page() { + if [[ -z "${DISABLE_WELCOME_MESSAGE:-}" ]]; then + if [[ -n "$BITNAMI_APP_NAME" ]]; then + print_image_welcome_page + fi + fi +} + +######################## +# Print the welcome page for a Bitnami Docker image +# Globals: +# BITNAMI_APP_NAME +# Arguments: +# None +# Returns: +# None +######################### +print_image_welcome_page() { + local github_url="https://github.com/bitnami/containers" + + info "" + info "${BOLD}Welcome to the Bitnami ${BITNAMI_APP_NAME} container${RESET}" + info "Subscribe to project updates by watching ${BOLD}${github_url}${RESET}" + info "Submit issues and feature requests at ${BOLD}${github_url}/issues${RESET}" + info "" +} + diff --git a/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libfile.sh b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libfile.sh new file mode 100644 index 000000000..63759c777 --- /dev/null +++ b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libfile.sh @@ -0,0 +1,141 @@ +#!/bin/bash +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 +# +# Library for managing files + +# shellcheck disable=SC1091 + +# Load Generic Libraries +. /opt/bitnami/scripts/libos.sh + +# Functions + +######################## +# Replace a regex-matching string in a file +# Arguments: +# $1 - filename +# $2 - match regex +# $3 - substitute regex +# $4 - use POSIX regex. Default: true +# Returns: +# None +######################### +replace_in_file() { + local filename="${1:?filename is required}" + local match_regex="${2:?match regex is required}" + local substitute_regex="${3:?substitute regex is required}" + local posix_regex=${4:-true} + + local result + + # We should avoid using 'sed in-place' substitutions + # 1) They are not compatible with files mounted from ConfigMap(s) + # 2) We found incompatibility issues with Debian10 and "in-place" substitutions + local -r del=$'\001' # Use a non-printable character as a 'sed' delimiter to avoid issues + if [[ $posix_regex = true ]]; then + result="$(sed -E "s${del}${match_regex}${del}${substitute_regex}${del}g" "$filename")" + else + result="$(sed "s${del}${match_regex}${del}${substitute_regex}${del}g" "$filename")" + fi + echo "$result" > "$filename" +} + +######################## +# Replace a regex-matching multiline string in a file +# Arguments: +# $1 - filename +# $2 - match regex +# $3 - substitute regex +# Returns: +# None +######################### +replace_in_file_multiline() { + local filename="${1:?filename is required}" + local match_regex="${2:?match regex is required}" + local substitute_regex="${3:?substitute regex is required}" + + local result + local -r del=$'\001' # Use a non-printable character as a 'sed' delimiter to avoid issues + result="$(perl -pe "BEGIN{undef $/;} s${del}${match_regex}${del}${substitute_regex}${del}sg" "$filename")" + echo "$result" > "$filename" +} + +######################## +# Remove a line in a file based on a regex +# Arguments: +# $1 - filename +# $2 - match regex +# $3 - use POSIX regex. Default: true +# Returns: +# None +######################### +remove_in_file() { + local filename="${1:?filename is required}" + local match_regex="${2:?match regex is required}" + local posix_regex=${3:-true} + local result + + # We should avoid using 'sed in-place' substitutions + # 1) They are not compatible with files mounted from ConfigMap(s) + # 2) We found incompatibility issues with Debian10 and "in-place" substitutions + if [[ $posix_regex = true ]]; then + result="$(sed -E "/$match_regex/d" "$filename")" + else + result="$(sed "/$match_regex/d" "$filename")" + fi + echo "$result" > "$filename" +} + +######################## +# Appends text after the last line matching a pattern +# Arguments: +# $1 - file +# $2 - match regex +# $3 - contents to add +# Returns: +# None +######################### +append_file_after_last_match() { + local file="${1:?missing file}" + local match_regex="${2:?missing pattern}" + local value="${3:?missing value}" + + # We read the file in reverse, replace the first match (0,/pattern/s) and then reverse the results again + result="$(tac "$file" | sed -E "0,/($match_regex)/s||${value}\n\1|" | tac)" + echo "$result" > "$file" +} + +######################## +# Wait until certain entry is present in a log file +# Arguments: +# $1 - entry to look for +# $2 - log file +# $3 - max retries. Default: 12 +# $4 - sleep between retries (in seconds). Default: 5 +# Returns: +# Boolean +######################### +wait_for_log_entry() { + local -r entry="${1:-missing entry}" + local -r log_file="${2:-missing log file}" + local -r retries="${3:-12}" + local -r interval_time="${4:-5}" + local attempt=0 + + check_log_file_for_entry() { + if ! grep -qE "$entry" "$log_file"; then + debug "Entry \"${entry}\" still not present in ${log_file} (attempt $((++attempt))/${retries})" + return 1 + fi + } + debug "Checking that ${log_file} log file contains entry \"${entry}\"" + if retry_while check_log_file_for_entry "$retries" "$interval_time"; then + debug "Found entry \"${entry}\" in ${log_file}" + true + else + error "Could not find entry \"${entry}\" in ${log_file} after ${retries} retries" + debug_execute cat "$log_file" + return 1 + fi +} diff --git a/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libfs.sh b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libfs.sh new file mode 100644 index 000000000..96b22f997 --- /dev/null +++ b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libfs.sh @@ -0,0 +1,193 @@ +#!/bin/bash +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 +# +# Library for file system actions + +# shellcheck disable=SC1091 + +# Load Generic Libraries +. /opt/bitnami/scripts/liblog.sh + +# Functions + +######################## +# Ensure a file/directory is owned (user and group) but the given user +# Arguments: +# $1 - filepath +# $2 - owner +# Returns: +# None +######################### +owned_by() { + local path="${1:?path is missing}" + local owner="${2:?owner is missing}" + local group="${3:-}" + + if [[ -n $group ]]; then + chown "$owner":"$group" "$path" + else + chown "$owner":"$owner" "$path" + fi +} + +######################## +# Ensure a directory exists and, optionally, is owned by the given user +# Arguments: +# $1 - directory +# $2 - owner +# Returns: +# None +######################### +ensure_dir_exists() { + local dir="${1:?directory is missing}" + local owner_user="${2:-}" + local owner_group="${3:-}" + + [ -d "${dir}" ] || mkdir -p "${dir}" + if [[ -n $owner_user ]]; then + owned_by "$dir" "$owner_user" "$owner_group" + fi +} + +######################## +# Checks whether a directory is empty or not +# arguments: +# $1 - directory +# returns: +# boolean +######################### +is_dir_empty() { + local -r path="${1:?missing directory}" + # Calculate real path in order to avoid issues with symlinks + local -r dir="$(realpath "$path")" + if [[ ! -e "$dir" ]] || [[ -z "$(ls -A "$dir")" ]]; then + true + else + false + fi +} + +######################## +# Checks whether a mounted directory is empty or not +# arguments: +# $1 - directory +# returns: +# boolean +######################### +is_mounted_dir_empty() { + local dir="${1:?missing directory}" + + if is_dir_empty "$dir" || find "$dir" -mindepth 1 -maxdepth 1 -not -name ".snapshot" -not -name "lost+found" -exec false {} +; then + true + else + false + fi +} + +######################## +# Checks whether a file can be written to or not +# arguments: +# $1 - file +# returns: +# boolean +######################### +is_file_writable() { + local file="${1:?missing file}" + local dir + dir="$(dirname "$file")" + + if [[ (-f "$file" && -w "$file") || (! -f "$file" && -d "$dir" && -w "$dir") ]]; then + true + else + false + fi +} + +######################## +# Relativize a path +# arguments: +# $1 - path +# $2 - base +# returns: +# None +######################### +relativize() { + local -r path="${1:?missing path}" + local -r base="${2:?missing base}" + pushd "$base" >/dev/null || exit + realpath -q --no-symlinks --relative-base="$base" "$path" | sed -e 's|^/$|.|' -e 's|^/||' + popd >/dev/null || exit +} + +######################## +# Configure permisions and ownership recursively +# Globals: +# None +# Arguments: +# $1 - paths (as a string). +# Flags: +# -f|--file-mode - mode for directories. +# -d|--dir-mode - mode for files. +# -u|--user - user +# -g|--group - group +# Returns: +# None +######################### +configure_permissions_ownership() { + local -r paths="${1:?paths is missing}" + local dir_mode="" + local file_mode="" + local user="" + local group="" + + # Validate arguments + shift 1 + while [ "$#" -gt 0 ]; do + case "$1" in + -f | --file-mode) + shift + file_mode="${1:?missing mode for files}" + ;; + -d | --dir-mode) + shift + dir_mode="${1:?missing mode for directories}" + ;; + -u | --user) + shift + user="${1:?missing user}" + ;; + -g | --group) + shift + group="${1:?missing group}" + ;; + *) + echo "Invalid command line flag $1" >&2 + return 1 + ;; + esac + shift + done + + read -r -a filepaths <<<"$paths" + for p in "${filepaths[@]}"; do + if [[ -e "$p" ]]; then + find -L "$p" -printf "" + if [[ -n $dir_mode ]]; then + find -L "$p" -type d ! -perm "$dir_mode" -print0 | xargs -r -0 chmod "$dir_mode" + fi + if [[ -n $file_mode ]]; then + find -L "$p" -type f ! -perm "$file_mode" -print0 | xargs -r -0 chmod "$file_mode" + fi + if [[ -n $user ]] && [[ -n $group ]]; then + find -L "$p" -print0 | xargs -r -0 chown "${user}:${group}" + elif [[ -n $user ]] && [[ -z $group ]]; then + find -L "$p" -print0 | xargs -r -0 chown "${user}" + elif [[ -z $user ]] && [[ -n $group ]]; then + find -L "$p" -print0 | xargs -r -0 chgrp "${group}" + fi + else + stderr_print "$p does not exist" + fi + done +} diff --git a/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libhook.sh b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libhook.sh new file mode 100644 index 000000000..dadd06149 --- /dev/null +++ b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libhook.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 +# +# Library to use for scripts expected to be used as Kubernetes lifecycle hooks + +# shellcheck disable=SC1091 + +# Load generic libraries +. /opt/bitnami/scripts/liblog.sh +. /opt/bitnami/scripts/libos.sh + +# Override functions that log to stdout/stderr of the current process, so they print to process 1 +for function_to_override in stderr_print debug_execute; do + # Output is sent to output of process 1 and thus end up in the container log + # The hook output in general isn't saved + eval "$(declare -f "$function_to_override") >/proc/1/fd/1 2>/proc/1/fd/2" +done diff --git a/deploy/pgvector/prebuildfs/opt/bitnami/scripts/liblog.sh b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/liblog.sh new file mode 100644 index 000000000..2a9e76a4d --- /dev/null +++ b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/liblog.sh @@ -0,0 +1,114 @@ +#!/bin/bash +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 +# +# Library for logging functions + +# Constants +RESET='\033[0m' +RED='\033[38;5;1m' +GREEN='\033[38;5;2m' +YELLOW='\033[38;5;3m' +MAGENTA='\033[38;5;5m' +CYAN='\033[38;5;6m' + +# Functions + +######################## +# Print to STDERR +# Arguments: +# Message to print +# Returns: +# None +######################### +stderr_print() { + # 'is_boolean_yes' is defined in libvalidations.sh, but depends on this file so we cannot source it + local bool="${BITNAMI_QUIET:-false}" + # comparison is performed without regard to the case of alphabetic characters + shopt -s nocasematch + if ! [[ "$bool" = 1 || "$bool" =~ ^(yes|true)$ ]]; then + printf "%b\\n" "${*}" >&2 + fi +} + +######################## +# Log message +# Arguments: +# Message to log +# Returns: +# None +######################### +log() { + stderr_print "${CYAN}${MODULE:-} ${MAGENTA}$(date "+%T.%2N ")${RESET}${*}" +} +######################## +# Log an 'info' message +# Arguments: +# Message to log +# Returns: +# None +######################### +info() { + log "${GREEN}INFO ${RESET} ==> ${*}" +} +######################## +# Log message +# Arguments: +# Message to log +# Returns: +# None +######################### +warn() { + log "${YELLOW}WARN ${RESET} ==> ${*}" +} +######################## +# Log an 'error' message +# Arguments: +# Message to log +# Returns: +# None +######################### +error() { + log "${RED}ERROR${RESET} ==> ${*}" +} +######################## +# Log a 'debug' message +# Globals: +# BITNAMI_DEBUG +# Arguments: +# None +# Returns: +# None +######################### +debug() { + # 'is_boolean_yes' is defined in libvalidations.sh, but depends on this file so we cannot source it + local bool="${BITNAMI_DEBUG:-false}" + # comparison is performed without regard to the case of alphabetic characters + shopt -s nocasematch + if [[ "$bool" = 1 || "$bool" =~ ^(yes|true)$ ]]; then + log "${MAGENTA}DEBUG${RESET} ==> ${*}" + fi +} + +######################## +# Indent a string +# Arguments: +# $1 - string +# $2 - number of indentation characters (default: 4) +# $3 - indentation character (default: " ") +# Returns: +# None +######################### +indent() { + local string="${1:-}" + local num="${2:?missing num}" + local char="${3:-" "}" + # Build the indentation unit string + local indent_unit="" + for ((i = 0; i < num; i++)); do + indent_unit="${indent_unit}${char}" + done + # shellcheck disable=SC2001 + # Complex regex, see https://github.com/koalaman/shellcheck/wiki/SC2001#exceptions + echo "$string" | sed "s/^/${indent_unit}/" +} diff --git a/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libnet.sh b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libnet.sh new file mode 100644 index 000000000..b47c69a56 --- /dev/null +++ b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libnet.sh @@ -0,0 +1,165 @@ +#!/bin/bash +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 +# +# Library for network functions + +# shellcheck disable=SC1091 + +# Load Generic Libraries +. /opt/bitnami/scripts/liblog.sh + +# Functions + +######################## +# Resolve IP address for a host/domain (i.e. DNS lookup) +# Arguments: +# $1 - Hostname to resolve +# $2 - IP address version (v4, v6), leave empty for resolving to any version +# Returns: +# IP +######################### +dns_lookup() { + local host="${1:?host is missing}" + local ip_version="${2:-}" + getent "ahosts${ip_version}" "$host" | awk '/STREAM/ {print $1 }' | head -n 1 +} + +######################### +# Wait for a hostname and return the IP +# Arguments: +# $1 - hostname +# $2 - number of retries +# $3 - seconds to wait between retries +# Returns: +# - IP address that corresponds to the hostname +######################### +wait_for_dns_lookup() { + local hostname="${1:?hostname is missing}" + local retries="${2:-5}" + local seconds="${3:-1}" + check_host() { + if [[ $(dns_lookup "$hostname") == "" ]]; then + false + else + true + fi + } + # Wait for the host to be ready + retry_while "check_host ${hostname}" "$retries" "$seconds" + dns_lookup "$hostname" +} + +######################## +# Get machine's IP +# Arguments: +# None +# Returns: +# Machine IP +######################### +get_machine_ip() { + local -a ip_addresses + local hostname + hostname="$(hostname)" + read -r -a ip_addresses <<< "$(dns_lookup "$hostname" | xargs echo)" + if [[ "${#ip_addresses[@]}" -gt 1 ]]; then + warn "Found more than one IP address associated to hostname ${hostname}: ${ip_addresses[*]}, will use ${ip_addresses[0]}" + elif [[ "${#ip_addresses[@]}" -lt 1 ]]; then + error "Could not find any IP address associated to hostname ${hostname}" + exit 1 + fi + echo "${ip_addresses[0]}" +} + +######################## +# Check if the provided argument is a resolved hostname +# Arguments: +# $1 - Value to check +# Returns: +# Boolean +######################### +is_hostname_resolved() { + local -r host="${1:?missing value}" + if [[ -n "$(dns_lookup "$host")" ]]; then + true + else + false + fi +} + +######################## +# Parse URL +# Globals: +# None +# Arguments: +# $1 - uri - String +# $2 - component to obtain. Valid options (scheme, authority, userinfo, host, port, path, query or fragment) - String +# Returns: +# String +parse_uri() { + local uri="${1:?uri is missing}" + local component="${2:?component is missing}" + + # Solution based on https://tools.ietf.org/html/rfc3986#appendix-B with + # additional sub-expressions to split authority into userinfo, host and port + # Credits to Patryk Obara (see https://stackoverflow.com/a/45977232/6694969) + local -r URI_REGEX='^(([^:/?#]+):)?(//((([^@/?#]+)@)?([^:/?#]+)(:([0-9]+))?))?(/([^?#]*))?(\?([^#]*))?(#(.*))?' + # || | ||| | | | | | | | | | + # |2 scheme | ||6 userinfo 7 host | 9 port | 11 rpath | 13 query | 15 fragment + # 1 scheme: | |5 userinfo@ 8 :... 10 path 12 ?... 14 #... + # | 4 authority + # 3 //... + local index=0 + case "$component" in + scheme) + index=2 + ;; + authority) + index=4 + ;; + userinfo) + index=6 + ;; + host) + index=7 + ;; + port) + index=9 + ;; + path) + index=10 + ;; + query) + index=13 + ;; + fragment) + index=14 + ;; + *) + stderr_print "unrecognized component $component" + return 1 + ;; + esac + [[ "$uri" =~ $URI_REGEX ]] && echo "${BASH_REMATCH[${index}]}" +} + +######################## +# Wait for a HTTP connection to succeed +# Globals: +# * +# Arguments: +# $1 - URL to wait for +# $2 - Maximum amount of retries (optional) +# $3 - Time between retries (optional) +# Returns: +# true if the HTTP connection succeeded, false otherwise +######################### +wait_for_http_connection() { + local url="${1:?missing url}" + local retries="${2:-}" + local sleep_time="${3:-}" + if ! retry_while "debug_execute curl --silent ${url}" "$retries" "$sleep_time"; then + error "Could not connect to ${url}" + return 1 + fi +} diff --git a/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libos.sh b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libos.sh new file mode 100644 index 000000000..c0500acee --- /dev/null +++ b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libos.sh @@ -0,0 +1,657 @@ +#!/bin/bash +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 +# +# Library for operating system actions + +# shellcheck disable=SC1091 + +# Load Generic Libraries +. /opt/bitnami/scripts/liblog.sh +. /opt/bitnami/scripts/libfs.sh +. /opt/bitnami/scripts/libvalidations.sh + +# Functions + +######################## +# Check if an user exists in the system +# Arguments: +# $1 - user +# Returns: +# Boolean +######################### +user_exists() { + local user="${1:?user is missing}" + id "$user" >/dev/null 2>&1 +} + +######################## +# Check if a group exists in the system +# Arguments: +# $1 - group +# Returns: +# Boolean +######################### +group_exists() { + local group="${1:?group is missing}" + getent group "$group" >/dev/null 2>&1 +} + +######################## +# Create a group in the system if it does not exist already +# Arguments: +# $1 - group +# Flags: +# -i|--gid - the ID for the new group +# -s|--system - Whether to create new user as system user (uid <= 999) +# Returns: +# None +######################### +ensure_group_exists() { + local group="${1:?group is missing}" + local gid="" + local is_system_user=false + + # Validate arguments + shift 1 + while [ "$#" -gt 0 ]; do + case "$1" in + -i | --gid) + shift + gid="${1:?missing gid}" + ;; + -s | --system) + is_system_user=true + ;; + *) + echo "Invalid command line flag $1" >&2 + return 1 + ;; + esac + shift + done + + if ! group_exists "$group"; then + local -a args=("$group") + if [[ -n "$gid" ]]; then + if group_exists "$gid"; then + error "The GID $gid is already in use." >&2 + return 1 + fi + args+=("--gid" "$gid") + fi + $is_system_user && args+=("--system") + groupadd "${args[@]}" >/dev/null 2>&1 + fi +} + +######################## +# Create an user in the system if it does not exist already +# Arguments: +# $1 - user +# Flags: +# -i|--uid - the ID for the new user +# -g|--group - the group the new user should belong to +# -a|--append-groups - comma-separated list of supplemental groups to append to the new user +# -h|--home - the home directory for the new user +# -s|--system - whether to create new user as system user (uid <= 999) +# Returns: +# None +######################### +ensure_user_exists() { + local user="${1:?user is missing}" + local uid="" + local group="" + local append_groups="" + local home="" + local is_system_user=false + + # Validate arguments + shift 1 + while [ "$#" -gt 0 ]; do + case "$1" in + -i | --uid) + shift + uid="${1:?missing uid}" + ;; + -g | --group) + shift + group="${1:?missing group}" + ;; + -a | --append-groups) + shift + append_groups="${1:?missing append_groups}" + ;; + -h | --home) + shift + home="${1:?missing home directory}" + ;; + -s | --system) + is_system_user=true + ;; + *) + echo "Invalid command line flag $1" >&2 + return 1 + ;; + esac + shift + done + + if ! user_exists "$user"; then + local -a user_args=("-N" "$user") + if [[ -n "$uid" ]]; then + if user_exists "$uid"; then + error "The UID $uid is already in use." + return 1 + fi + user_args+=("--uid" "$uid") + else + $is_system_user && user_args+=("--system") + fi + useradd "${user_args[@]}" >/dev/null 2>&1 + fi + + if [[ -n "$group" ]]; then + local -a group_args=("$group") + $is_system_user && group_args+=("--system") + ensure_group_exists "${group_args[@]}" + usermod -g "$group" "$user" >/dev/null 2>&1 + fi + + if [[ -n "$append_groups" ]]; then + local -a groups + read -ra groups <<<"$(tr ',;' ' ' <<<"$append_groups")" + for group in "${groups[@]}"; do + ensure_group_exists "$group" + usermod -aG "$group" "$user" >/dev/null 2>&1 + done + fi + + if [[ -n "$home" ]]; then + mkdir -p "$home" + usermod -d "$home" "$user" >/dev/null 2>&1 + configure_permissions_ownership "$home" -d "775" -f "664" -u "$user" -g "$group" + fi +} + +######################## +# Check if the script is currently running as root +# Arguments: +# $1 - user +# $2 - group +# Returns: +# Boolean +######################### +am_i_root() { + if [[ "$(id -u)" = "0" ]]; then + true + else + false + fi +} + +######################## +# Print OS metadata +# Arguments: +# $1 - Flag name +# Flags: +# --id - Distro ID +# --version - Distro version +# --branch - Distro branch +# --codename - Distro codename +# --name - Distro name +# --pretty-name - Distro pretty name +# Returns: +# String +######################### +get_os_metadata() { + local -r flag_name="${1:?missing flag}" + # Helper function + get_os_release_metadata() { + local -r env_name="${1:?missing environment variable name}" + ( + . /etc/os-release + echo "${!env_name}" + ) + } + case "$flag_name" in + --id) + get_os_release_metadata ID + ;; + --version) + get_os_release_metadata VERSION_ID + ;; + --branch) + get_os_release_metadata VERSION_ID | sed 's/\..*//' + ;; + --codename) + get_os_release_metadata VERSION_CODENAME + ;; + --name) + get_os_release_metadata NAME + ;; + --pretty-name) + get_os_release_metadata PRETTY_NAME + ;; + *) + error "Unknown flag ${flag_name}" + return 1 + ;; + esac +} + +######################## +# Get total memory available +# Arguments: +# None +# Returns: +# Memory in bytes +######################### +get_total_memory() { + echo $(($(grep MemTotal /proc/meminfo | awk '{print $2}') / 1024)) +} + +######################## +# Get machine size depending on specified memory +# Globals: +# None +# Arguments: +# None +# Flags: +# --memory - memory size (optional) +# Returns: +# Detected instance size +######################### +get_machine_size() { + local memory="" + # Validate arguments + while [[ "$#" -gt 0 ]]; do + case "$1" in + --memory) + shift + memory="${1:?missing memory}" + ;; + *) + echo "Invalid command line flag $1" >&2 + return 1 + ;; + esac + shift + done + if [[ -z "$memory" ]]; then + debug "Memory was not specified, detecting available memory automatically" + memory="$(get_total_memory)" + fi + sanitized_memory=$(convert_to_mb "$memory") + if [[ "$sanitized_memory" -gt 26000 ]]; then + echo 2xlarge + elif [[ "$sanitized_memory" -gt 13000 ]]; then + echo xlarge + elif [[ "$sanitized_memory" -gt 6000 ]]; then + echo large + elif [[ "$sanitized_memory" -gt 3000 ]]; then + echo medium + elif [[ "$sanitized_memory" -gt 1500 ]]; then + echo small + else + echo micro + fi +} + +######################## +# Get machine size depending on specified memory +# Globals: +# None +# Arguments: +# $1 - memory size (optional) +# Returns: +# Detected instance size +######################### +get_supported_machine_sizes() { + echo micro small medium large xlarge 2xlarge +} + +######################## +# Convert memory size from string to amount of megabytes (i.e. 2G -> 2048) +# Globals: +# None +# Arguments: +# $1 - memory size +# Returns: +# Result of the conversion +######################### +convert_to_mb() { + local amount="${1:-}" + if [[ $amount =~ ^([0-9]+)(m|M|g|G) ]]; then + size="${BASH_REMATCH[1]}" + unit="${BASH_REMATCH[2]}" + if [[ "$unit" = "g" || "$unit" = "G" ]]; then + amount="$((size * 1024))" + else + amount="$size" + fi + fi + echo "$amount" +} + +######################### +# Redirects output to /dev/null if debug mode is disabled +# Globals: +# BITNAMI_DEBUG +# Arguments: +# $@ - Command to execute +# Returns: +# None +######################### +debug_execute() { + if is_boolean_yes "${BITNAMI_DEBUG:-false}"; then + "$@" + else + "$@" >/dev/null 2>&1 + fi +} + +######################## +# Retries a command a given number of times +# Arguments: +# $1 - cmd (as a string) +# $2 - max retries. Default: 12 +# $3 - sleep between retries (in seconds). Default: 5 +# Returns: +# Boolean +######################### +retry_while() { + local cmd="${1:?cmd is missing}" + local retries="${2:-12}" + local sleep_time="${3:-5}" + local return_value=1 + + read -r -a command <<<"$cmd" + for ((i = 1; i <= retries; i += 1)); do + "${command[@]}" && return_value=0 && break + sleep "$sleep_time" + done + return $return_value +} + +######################## +# Generate a random string +# Arguments: +# -t|--type - String type (ascii, alphanumeric, numeric), defaults to ascii +# -c|--count - Number of characters, defaults to 32 +# Arguments: +# None +# Returns: +# None +# Returns: +# String +######################### +generate_random_string() { + local type="ascii" + local count="32" + local filter + local result + # Validate arguments + while [[ "$#" -gt 0 ]]; do + case "$1" in + -t | --type) + shift + type="$1" + ;; + -c | --count) + shift + count="$1" + ;; + *) + echo "Invalid command line flag $1" >&2 + return 1 + ;; + esac + shift + done + # Validate type + case "$type" in + ascii) + filter="[:print:]" + ;; + numeric) + filter="0-9" + ;; + alphanumeric) + filter="a-zA-Z0-9" + ;; + alphanumeric+special|special+alphanumeric) + # Limit variety of special characters, so there is a higher chance of containing more alphanumeric characters + # Special characters are harder to write, and it could impact the overall UX if most passwords are too complex + filter='a-zA-Z0-9:@.,/+!=' + ;; + *) + echo "Invalid type ${type}" >&2 + return 1 + ;; + esac + # Obtain count + 10 lines from /dev/urandom to ensure that the resulting string has the expected size + # Note there is a very small chance of strings starting with EOL character + # Therefore, the higher amount of lines read, this will happen less frequently + result="$(head -n "$((count + 10))" /dev/urandom | tr -dc "$filter" | head -c "$count")" + echo "$result" +} + +######################## +# Create md5 hash from a string +# Arguments: +# $1 - string +# Returns: +# md5 hash - string +######################### +generate_md5_hash() { + local -r str="${1:?missing input string}" + echo -n "$str" | md5sum | awk '{print $1}' +} + +######################## +# Create sha1 hash from a string +# Arguments: +# $1 - string +# $2 - algorithm - 1 (default), 224, 256, 384, 512 +# Returns: +# sha1 hash - string +######################### +generate_sha_hash() { + local -r str="${1:?missing input string}" + local -r algorithm="${2:-1}" + echo -n "$str" | "sha${algorithm}sum" | awk '{print $1}' +} + +######################## +# Converts a string to its hexadecimal representation +# Arguments: +# $1 - string +# Returns: +# hexadecimal representation of the string +######################### +convert_to_hex() { + local -r str=${1:?missing input string} + local -i iterator + local char + for ((iterator = 0; iterator < ${#str}; iterator++)); do + char=${str:iterator:1} + printf '%x' "'${char}" + done +} + +######################## +# Get boot time +# Globals: +# None +# Arguments: +# None +# Returns: +# Boot time metadata +######################### +get_boot_time() { + stat /proc --format=%Y +} + +######################## +# Get machine ID +# Globals: +# None +# Arguments: +# None +# Returns: +# Machine ID +######################### +get_machine_id() { + local machine_id + if [[ -f /etc/machine-id ]]; then + machine_id="$(cat /etc/machine-id)" + fi + if [[ -z "$machine_id" ]]; then + # Fallback to the boot-time, which will at least ensure a unique ID in the current session + machine_id="$(get_boot_time)" + fi + echo "$machine_id" +} + +######################## +# Get the root partition's disk device ID (e.g. /dev/sda1) +# Globals: +# None +# Arguments: +# None +# Returns: +# Root partition disk ID +######################### +get_disk_device_id() { + local device_id="" + if grep -q ^/dev /proc/mounts; then + device_id="$(grep ^/dev /proc/mounts | awk '$2 == "/" { print $1 }' | tail -1)" + fi + # If it could not be autodetected, fallback to /dev/sda1 as a default + if [[ -z "$device_id" || ! -b "$device_id" ]]; then + device_id="/dev/sda1" + fi + echo "$device_id" +} + +######################## +# Get the root disk device ID (e.g. /dev/sda) +# Globals: +# None +# Arguments: +# None +# Returns: +# Root disk ID +######################### +get_root_disk_device_id() { + get_disk_device_id | sed -E 's/p?[0-9]+$//' +} + +######################## +# Get the root disk size in bytes +# Globals: +# None +# Arguments: +# None +# Returns: +# Root disk size in bytes +######################### +get_root_disk_size() { + fdisk -l "$(get_root_disk_device_id)" | grep 'Disk.*bytes' | sed -E 's/.*, ([0-9]+) bytes,.*/\1/' || true +} + +######################## +# Run command as a specific user and group (optional) +# Arguments: +# $1 - USER(:GROUP) to switch to +# $2..$n - command to execute +# Returns: +# Exit code of the specified command +######################### +run_as_user() { + run_chroot "$@" +} + +######################## +# Execute command as a specific user and group (optional), +# replacing the current process image +# Arguments: +# $1 - USER(:GROUP) to switch to +# $2..$n - command to execute +# Returns: +# Exit code of the specified command +######################### +exec_as_user() { + run_chroot --replace-process "$@" +} + +######################## +# Run a command using chroot +# Arguments: +# $1 - USER(:GROUP) to switch to +# $2..$n - command to execute +# Flags: +# -r | --replace-process - Replace the current process image (optional) +# Returns: +# Exit code of the specified command +######################### +run_chroot() { + local userspec + local user + local homedir + local replace=false + local -r cwd="$(pwd)" + + # Parse and validate flags + while [[ "$#" -gt 0 ]]; do + case "$1" in + -r | --replace-process) + replace=true + ;; + --) + shift + break + ;; + -*) + stderr_print "unrecognized flag $1" + return 1 + ;; + *) + break + ;; + esac + shift + done + + # Parse and validate arguments + if [[ "$#" -lt 2 ]]; then + echo "expected at least 2 arguments" + return 1 + else + userspec=$1 + shift + + # userspec can optionally include the group, so we parse the user + user=$(echo "$userspec" | cut -d':' -f1) + fi + + if ! am_i_root; then + error "Could not switch to '${userspec}': Operation not permitted" + return 1 + fi + + # Get the HOME directory for the user to switch, as chroot does + # not properly update this env and some scripts rely on it + homedir=$(eval echo "~${user}") + if [[ ! -d $homedir ]]; then + homedir="${HOME:-/}" + fi + + # Obtaining value for "$@" indirectly in order to properly support shell parameter expansion + if [[ "$replace" = true ]]; then + exec chroot --userspec="$userspec" / bash -c "cd ${cwd}; export HOME=${homedir}; exec \"\$@\"" -- "$@" + else + chroot --userspec="$userspec" / bash -c "cd ${cwd}; export HOME=${homedir}; exec \"\$@\"" -- "$@" + fi +} diff --git a/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libpersistence.sh b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libpersistence.sh new file mode 100644 index 000000000..af6af64d6 --- /dev/null +++ b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libpersistence.sh @@ -0,0 +1,124 @@ +#!/bin/bash +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 +# +# Bitnami persistence library +# Used for bringing persistence capabilities to applications that don't have clear separation of data and logic + +# shellcheck disable=SC1091 + +# Load Generic Libraries +. /opt/bitnami/scripts/libfs.sh +. /opt/bitnami/scripts/libos.sh +. /opt/bitnami/scripts/liblog.sh +. /opt/bitnami/scripts/libversion.sh + +# Functions + +######################## +# Persist an application directory +# Globals: +# BITNAMI_ROOT_DIR +# BITNAMI_VOLUME_DIR +# Arguments: +# $1 - App folder name +# $2 - List of app files to persist +# Returns: +# true if all steps succeeded, false otherwise +######################### +persist_app() { + local -r app="${1:?missing app}" + local -a files_to_restore + read -r -a files_to_persist <<< "$(tr ',;:' ' ' <<< "$2")" + local -r install_dir="${BITNAMI_ROOT_DIR}/${app}" + local -r persist_dir="${BITNAMI_VOLUME_DIR}/${app}" + # Persist the individual files + if [[ "${#files_to_persist[@]}" -le 0 ]]; then + warn "No files are configured to be persisted" + return + fi + pushd "$install_dir" >/dev/null || exit + local file_to_persist_relative file_to_persist_destination file_to_persist_destination_folder + local -r tmp_file="/tmp/perms.acl" + for file_to_persist in "${files_to_persist[@]}"; do + if [[ ! -f "$file_to_persist" && ! -d "$file_to_persist" ]]; then + error "Cannot persist '${file_to_persist}' because it does not exist" + return 1 + fi + file_to_persist_relative="$(relativize "$file_to_persist" "$install_dir")" + file_to_persist_destination="${persist_dir}/${file_to_persist_relative}" + file_to_persist_destination_folder="$(dirname "$file_to_persist_destination")" + # Get original permissions for existing files, which will be applied later + # Exclude the root directory with 'sed', to avoid issues when copying the entirety of it to a volume + getfacl -R "$file_to_persist_relative" | sed -E '/# file: (\..+|[^.])/,$!d' > "$tmp_file" + # Copy directories to the volume + ensure_dir_exists "$file_to_persist_destination_folder" + cp -Lr --preserve=links "$file_to_persist_relative" "$file_to_persist_destination_folder" + # Restore permissions + pushd "$persist_dir" >/dev/null || exit + if am_i_root; then + setfacl --restore="$tmp_file" + else + # When running as non-root, don't change ownership + setfacl --restore=<(grep -E -v '^# (owner|group):' "$tmp_file") + fi + popd >/dev/null || exit + done + popd >/dev/null || exit + rm -f "$tmp_file" + # Install the persisted files into the installation directory, via symlinks + restore_persisted_app "$@" +} + +######################## +# Restore a persisted application directory +# Globals: +# BITNAMI_ROOT_DIR +# BITNAMI_VOLUME_DIR +# FORCE_MAJOR_UPGRADE +# Arguments: +# $1 - App folder name +# $2 - List of app files to restore +# Returns: +# true if all steps succeeded, false otherwise +######################### +restore_persisted_app() { + local -r app="${1:?missing app}" + local -a files_to_restore + read -r -a files_to_restore <<< "$(tr ',;:' ' ' <<< "$2")" + local -r install_dir="${BITNAMI_ROOT_DIR}/${app}" + local -r persist_dir="${BITNAMI_VOLUME_DIR}/${app}" + # Restore the individual persisted files + if [[ "${#files_to_restore[@]}" -le 0 ]]; then + warn "No persisted files are configured to be restored" + return + fi + local file_to_restore_relative file_to_restore_origin file_to_restore_destination + for file_to_restore in "${files_to_restore[@]}"; do + file_to_restore_relative="$(relativize "$file_to_restore" "$install_dir")" + # We use 'realpath --no-symlinks' to ensure that the case of '.' is covered and the directory is removed + file_to_restore_origin="$(realpath --no-symlinks "${install_dir}/${file_to_restore_relative}")" + file_to_restore_destination="$(realpath --no-symlinks "${persist_dir}/${file_to_restore_relative}")" + rm -rf "$file_to_restore_origin" + ln -sfn "$file_to_restore_destination" "$file_to_restore_origin" + done +} + +######################## +# Check if an application directory was already persisted +# Globals: +# BITNAMI_VOLUME_DIR +# Arguments: +# $1 - App folder name +# Returns: +# true if all steps succeeded, false otherwise +######################### +is_app_initialized() { + local -r app="${1:?missing app}" + local -r persist_dir="${BITNAMI_VOLUME_DIR}/${app}" + if ! is_mounted_dir_empty "$persist_dir"; then + true + else + false + fi +} diff --git a/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libservice.sh b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libservice.sh new file mode 100644 index 000000000..107f54e6b --- /dev/null +++ b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libservice.sh @@ -0,0 +1,496 @@ +#!/bin/bash +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 +# +# Library for managing services + +# shellcheck disable=SC1091 + +# Load Generic Libraries +. /opt/bitnami/scripts/libvalidations.sh +. /opt/bitnami/scripts/liblog.sh + +# Functions + +######################## +# Read the provided pid file and returns a PID +# Arguments: +# $1 - Pid file +# Returns: +# PID +######################### +get_pid_from_file() { + local pid_file="${1:?pid file is missing}" + + if [[ -f "$pid_file" ]]; then + if [[ -n "$(< "$pid_file")" ]] && [[ "$(< "$pid_file")" -gt 0 ]]; then + echo "$(< "$pid_file")" + fi + fi +} + +######################## +# Check if a provided PID corresponds to a running service +# Arguments: +# $1 - PID +# Returns: +# Boolean +######################### +is_service_running() { + local pid="${1:?pid is missing}" + + kill -0 "$pid" 2>/dev/null +} + +######################## +# Stop a service by sending a termination signal to its pid +# Arguments: +# $1 - Pid file +# $2 - Signal number (optional) +# Returns: +# None +######################### +stop_service_using_pid() { + local pid_file="${1:?pid file is missing}" + local signal="${2:-}" + local pid + + pid="$(get_pid_from_file "$pid_file")" + [[ -z "$pid" ]] || ! is_service_running "$pid" && return + + if [[ -n "$signal" ]]; then + kill "-${signal}" "$pid" + else + kill "$pid" + fi + + local counter=10 + while [[ "$counter" -ne 0 ]] && is_service_running "$pid"; do + sleep 1 + counter=$((counter - 1)) + done +} + +######################## +# Start cron daemon +# Arguments: +# None +# Returns: +# true if started correctly, false otherwise +######################### +cron_start() { + if [[ -x "/usr/sbin/cron" ]]; then + /usr/sbin/cron + elif [[ -x "/usr/sbin/crond" ]]; then + /usr/sbin/crond + else + false + fi +} + +######################## +# Generate a cron configuration file for a given service +# Arguments: +# $1 - Service name +# $2 - Command +# Flags: +# --run-as - User to run as (default: root) +# --schedule - Cron schedule configuration (default: * * * * *) +# Returns: +# None +######################### +generate_cron_conf() { + local service_name="${1:?service name is missing}" + local cmd="${2:?command is missing}" + local run_as="root" + local schedule="* * * * *" + local clean="true" + + # Parse optional CLI flags + shift 2 + while [[ "$#" -gt 0 ]]; do + case "$1" in + --run-as) + shift + run_as="$1" + ;; + --schedule) + shift + schedule="$1" + ;; + --no-clean) + clean="false" + ;; + *) + echo "Invalid command line flag ${1}" >&2 + return 1 + ;; + esac + shift + done + + mkdir -p /etc/cron.d + if "$clean"; then + cat > "/etc/cron.d/${service_name}" <> /etc/cron.d/"$service_name" + fi +} + +######################## +# Remove a cron configuration file for a given service +# Arguments: +# $1 - Service name +# Returns: +# None +######################### +remove_cron_conf() { + local service_name="${1:?service name is missing}" + local cron_conf_dir="/etc/monit/conf.d" + rm -f "${cron_conf_dir}/${service_name}" +} + +######################## +# Generate a monit configuration file for a given service +# Arguments: +# $1 - Service name +# $2 - Pid file +# $3 - Start command +# $4 - Stop command +# Flags: +# --disable - Whether to disable the monit configuration +# Returns: +# None +######################### +generate_monit_conf() { + local service_name="${1:?service name is missing}" + local pid_file="${2:?pid file is missing}" + local start_command="${3:?start command is missing}" + local stop_command="${4:?stop command is missing}" + local monit_conf_dir="/etc/monit/conf.d" + local disabled="no" + + # Parse optional CLI flags + shift 4 + while [[ "$#" -gt 0 ]]; do + case "$1" in + --disable) + disabled="yes" + ;; + *) + echo "Invalid command line flag ${1}" >&2 + return 1 + ;; + esac + shift + done + + is_boolean_yes "$disabled" && conf_suffix=".disabled" + mkdir -p "$monit_conf_dir" + cat > "${monit_conf_dir}/${service_name}.conf${conf_suffix:-}" <&2 + return 1 + ;; + esac + shift + done + + mkdir -p "$logrotate_conf_dir" + cat < "${logrotate_conf_dir}/${service_name}" +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 + +${log_path} { + ${period} + rotate ${rotations} + dateext + compress + copytruncate + missingok +$(indent "$extra" 2) +} +EOF +} + +######################## +# Remove a logrotate configuration file +# Arguments: +# $1 - Service name +# Returns: +# None +######################### +remove_logrotate_conf() { + local service_name="${1:?service name is missing}" + local logrotate_conf_dir="/etc/logrotate.d" + rm -f "${logrotate_conf_dir}/${service_name}" +} + +######################## +# Generate a Systemd configuration file +# Arguments: +# $1 - Service name +# Flags: +# --custom-service-content - Custom content to add to the [service] block +# --environment - Environment variable to define (multiple --environment options may be passed) +# --environment-file - Text file with environment variables (multiple --environment-file options may be passed) +# --exec-start - Start command (required) +# --exec-start-pre - Pre-start command (optional) +# --exec-start-post - Post-start command (optional) +# --exec-stop - Stop command (optional) +# --exec-reload - Reload command (optional) +# --group - System group to start the service with +# --name - Service full name (e.g. Apache HTTP Server, defaults to $1) +# --restart - When to restart the Systemd service after being stopped (defaults to always) +# --pid-file - Service PID file +# --standard-output - File where to print stdout output +# --standard-error - File where to print stderr output +# --success-exit-status - Exit code that indicates a successful shutdown +# --type - Systemd unit type (defaults to forking) +# --user - System user to start the service with +# --working-directory - Working directory at which to start the service +# Returns: +# None +######################### +generate_systemd_conf() { + local -r service_name="${1:?service name is missing}" + local -r systemd_units_dir="/etc/systemd/system" + local -r service_file="${systemd_units_dir}/bitnami.${service_name}.service" + # Default values + local name="$service_name" + local type="forking" + local user="" + local group="" + local environment="" + local environment_file="" + local exec_start="" + local exec_start_pre="" + local exec_start_post="" + local exec_stop="" + local exec_reload="" + local restart="always" + local pid_file="" + local standard_output="journal" + local standard_error="" + local limits_content="" + local success_exit_status="" + local custom_service_content="" + local working_directory="" + # Parse CLI flags + shift + while [[ "$#" -gt 0 ]]; do + case "$1" in + --name \ + | --type \ + | --user \ + | --group \ + | --exec-start \ + | --exec-stop \ + | --exec-reload \ + | --restart \ + | --pid-file \ + | --standard-output \ + | --standard-error \ + | --success-exit-status \ + | --custom-service-content \ + | --working-directory \ + ) + var_name="$(echo "$1" | sed -e "s/^--//" -e "s/-/_/g")" + shift + declare "$var_name"="${1:?"${var_name} value is missing"}" + ;; + --limit-*) + [[ -n "$limits_content" ]] && limits_content+=$'\n' + var_name="${1//--limit-}" + shift + limits_content+="Limit${var_name^^}=${1:?"--limit-${var_name} value is missing"}" + ;; + --exec-start-pre) + shift + [[ -n "$exec_start_pre" ]] && exec_start_pre+=$'\n' + exec_start_pre+="ExecStartPre=${1:?"--exec-start-pre value is missing"}" + ;; + --exec-start-post) + shift + [[ -n "$exec_start_post" ]] && exec_start_post+=$'\n' + exec_start_post+="ExecStartPost=${1:?"--exec-start-post value is missing"}" + ;; + --environment) + shift + # It is possible to add multiple environment lines + [[ -n "$environment" ]] && environment+=$'\n' + environment+="Environment=${1:?"--environment value is missing"}" + ;; + --environment-file) + shift + # It is possible to add multiple environment-file lines + [[ -n "$environment_file" ]] && environment_file+=$'\n' + environment_file+="EnvironmentFile=${1:?"--environment-file value is missing"}" + ;; + *) + echo "Invalid command line flag ${1}" >&2 + return 1 + ;; + esac + shift + done + # Validate inputs + local error="no" + if [[ -z "$exec_start" ]]; then + error "The --exec-start option is required" + error="yes" + fi + if [[ "$error" != "no" ]]; then + return 1 + fi + # Generate the Systemd unit + cat > "$service_file" <> "$service_file" <<< "WorkingDirectory=${working_directory}" + fi + if [[ -n "$exec_start_pre" ]]; then + # This variable may contain multiple ExecStartPre= directives + cat >> "$service_file" <<< "$exec_start_pre" + fi + if [[ -n "$exec_start" ]]; then + cat >> "$service_file" <<< "ExecStart=${exec_start}" + fi + if [[ -n "$exec_start_post" ]]; then + # This variable may contain multiple ExecStartPost= directives + cat >> "$service_file" <<< "$exec_start_post" + fi + # Optional stop and reload commands + if [[ -n "$exec_stop" ]]; then + cat >> "$service_file" <<< "ExecStop=${exec_stop}" + fi + if [[ -n "$exec_reload" ]]; then + cat >> "$service_file" <<< "ExecReload=${exec_reload}" + fi + # User and group + if [[ -n "$user" ]]; then + cat >> "$service_file" <<< "User=${user}" + fi + if [[ -n "$group" ]]; then + cat >> "$service_file" <<< "Group=${group}" + fi + # PID file allows to determine if the main process is running properly (for Restart=always) + if [[ -n "$pid_file" ]]; then + cat >> "$service_file" <<< "PIDFile=${pid_file}" + fi + if [[ -n "$restart" ]]; then + cat >> "$service_file" <<< "Restart=${restart}" + fi + # Environment flags + if [[ -n "$environment" ]]; then + # This variable may contain multiple Environment= directives + cat >> "$service_file" <<< "$environment" + fi + if [[ -n "$environment_file" ]]; then + # This variable may contain multiple EnvironmentFile= directives + cat >> "$service_file" <<< "$environment_file" + fi + # Logging + if [[ -n "$standard_output" ]]; then + cat >> "$service_file" <<< "StandardOutput=${standard_output}" + fi + if [[ -n "$standard_error" ]]; then + cat >> "$service_file" <<< "StandardError=${standard_error}" + fi + if [[ -n "$custom_service_content" ]]; then + # This variable may contain multiple miscellaneous directives + cat >> "$service_file" <<< "$custom_service_content" + fi + if [[ -n "$success_exit_status" ]]; then + cat >> "$service_file" <> "$service_file" <> "$service_file" <> "$service_file" <= 0 )); then + true + else + false + fi +} + +######################## +# Check if the provided argument is a boolean or is the string 'yes/true' +# Arguments: +# $1 - Value to check +# Returns: +# Boolean +######################### +is_boolean_yes() { + local -r bool="${1:-}" + # comparison is performed without regard to the case of alphabetic characters + shopt -s nocasematch + if [[ "$bool" = 1 || "$bool" =~ ^(yes|true)$ ]]; then + true + else + false + fi +} + +######################## +# Check if the provided argument is a boolean yes/no value +# Arguments: +# $1 - Value to check +# Returns: +# Boolean +######################### +is_yes_no_value() { + local -r bool="${1:-}" + if [[ "$bool" =~ ^(yes|no)$ ]]; then + true + else + false + fi +} + +######################## +# Check if the provided argument is a boolean true/false value +# Arguments: +# $1 - Value to check +# Returns: +# Boolean +######################### +is_true_false_value() { + local -r bool="${1:-}" + if [[ "$bool" =~ ^(true|false)$ ]]; then + true + else + false + fi +} + +######################## +# Check if the provided argument is a boolean 1/0 value +# Arguments: +# $1 - Value to check +# Returns: +# Boolean +######################### +is_1_0_value() { + local -r bool="${1:-}" + if [[ "$bool" =~ ^[10]$ ]]; then + true + else + false + fi +} + +######################## +# Check if the provided argument is an empty string or not defined +# Arguments: +# $1 - Value to check +# Returns: +# Boolean +######################### +is_empty_value() { + local -r val="${1:-}" + if [[ -z "$val" ]]; then + true + else + false + fi +} + +######################## +# Validate if the provided argument is a valid port +# Arguments: +# $1 - Port to validate +# Returns: +# Boolean and error message +######################### +validate_port() { + local value + local unprivileged=0 + + # Parse flags + while [[ "$#" -gt 0 ]]; do + case "$1" in + -unprivileged) + unprivileged=1 + ;; + --) + shift + break + ;; + -*) + stderr_print "unrecognized flag $1" + return 1 + ;; + *) + break + ;; + esac + shift + done + + if [[ "$#" -gt 1 ]]; then + echo "too many arguments provided" + return 2 + elif [[ "$#" -eq 0 ]]; then + stderr_print "missing port argument" + return 1 + else + value=$1 + fi + + if [[ -z "$value" ]]; then + echo "the value is empty" + return 1 + else + if ! is_int "$value"; then + echo "value is not an integer" + return 2 + elif [[ "$value" -lt 0 ]]; then + echo "negative value provided" + return 2 + elif [[ "$value" -gt 65535 ]]; then + echo "requested port is greater than 65535" + return 2 + elif [[ "$unprivileged" = 1 && "$value" -lt 1024 ]]; then + echo "privileged port requested" + return 3 + fi + fi +} + +######################## +# Validate if the provided argument is a valid IPv6 address +# Arguments: +# $1 - IP to validate +# Returns: +# Boolean +######################### +validate_ipv6() { + local ip="${1:?ip is missing}" + local stat=1 + local full_address_regex='^([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}$' + local short_address_regex='^((([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4}){0,6}::(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4}){0,6})$' + + if [[ $ip =~ $full_address_regex || $ip =~ $short_address_regex || $ip == "::" ]]; then + stat=0 + fi + return $stat +} + +######################## +# Validate if the provided argument is a valid IPv4 address +# Arguments: +# $1 - IP to validate +# Returns: +# Boolean +######################### +validate_ipv4() { + local ip="${1:?ip is missing}" + local stat=1 + + if [[ $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then + read -r -a ip_array <<< "$(tr '.' ' ' <<< "$ip")" + [[ ${ip_array[0]} -le 255 && ${ip_array[1]} -le 255 \ + && ${ip_array[2]} -le 255 && ${ip_array[3]} -le 255 ]] + stat=$? + fi + return $stat +} + +######################## +# Validate if the provided argument is a valid IPv4 or IPv6 address +# Arguments: +# $1 - IP to validate +# Returns: +# Boolean +######################### +validate_ip() { + local ip="${1:?ip is missing}" + local stat=1 + + if validate_ipv4 "$ip"; then + stat=0 + else + stat=$(validate_ipv6 "$ip") + fi + return $stat +} + +######################## +# Validate a string format +# Arguments: +# $1 - String to validate +# Returns: +# Boolean +######################### +validate_string() { + local string + local min_length=-1 + local max_length=-1 + + # Parse flags + while [ "$#" -gt 0 ]; do + case "$1" in + -min-length) + shift + min_length=${1:-} + ;; + -max-length) + shift + max_length=${1:-} + ;; + --) + shift + break + ;; + -*) + stderr_print "unrecognized flag $1" + return 1 + ;; + *) + break + ;; + esac + shift + done + + if [ "$#" -gt 1 ]; then + stderr_print "too many arguments provided" + return 2 + elif [ "$#" -eq 0 ]; then + stderr_print "missing string" + return 1 + else + string=$1 + fi + + if [[ "$min_length" -ge 0 ]] && [[ "${#string}" -lt "$min_length" ]]; then + echo "string length is less than $min_length" + return 1 + fi + if [[ "$max_length" -ge 0 ]] && [[ "${#string}" -gt "$max_length" ]]; then + echo "string length is great than $max_length" + return 1 + fi +} diff --git a/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libversion.sh b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libversion.sh new file mode 100644 index 000000000..6ca71ac7b --- /dev/null +++ b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libversion.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 +# +# Library for managing versions strings + +# shellcheck disable=SC1091 + +# Load Generic Libraries +. /opt/bitnami/scripts/liblog.sh + +# Functions +######################## +# Gets semantic version +# Arguments: +# $1 - version: string to extract major.minor.patch +# $2 - section: 1 to extract major, 2 to extract minor, 3 to extract patch +# Returns: +# array with the major, minor and release +######################### +get_sematic_version () { + local version="${1:?version is required}" + local section="${2:?section is required}" + local -a version_sections + + #Regex to parse versions: x.y.z + local -r regex='([0-9]+)(\.([0-9]+)(\.([0-9]+))?)?' + + if [[ "$version" =~ $regex ]]; then + local i=1 + local j=1 + local n=${#BASH_REMATCH[*]} + + while [[ $i -lt $n ]]; do + if [[ -n "${BASH_REMATCH[$i]}" ]] && [[ "${BASH_REMATCH[$i]:0:1}" != '.' ]]; then + version_sections[j]="${BASH_REMATCH[$i]}" + ((j++)) + fi + ((i++)) + done + + local number_regex='^[0-9]+$' + if [[ "$section" =~ $number_regex ]] && (( section > 0 )) && (( section <= 3 )); then + echo "${version_sections[$section]}" + return + else + stderr_print "Section allowed values are: 1, 2, and 3" + return 1 + fi + fi +} diff --git a/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libwebserver.sh b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libwebserver.sh new file mode 100644 index 000000000..8023f9b05 --- /dev/null +++ b/deploy/pgvector/prebuildfs/opt/bitnami/scripts/libwebserver.sh @@ -0,0 +1,476 @@ +#!/bin/bash +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 +# +# Bitnami web server handler library + +# shellcheck disable=SC1090,SC1091 + +# Load generic libraries +. /opt/bitnami/scripts/liblog.sh + +######################## +# Execute a command (or list of commands) with the web server environment and library loaded +# Globals: +# * +# Arguments: +# None +# Returns: +# None +######################### +web_server_execute() { + local -r web_server="${1:?missing web server}" + shift + # Run program in sub-shell to avoid web server environment getting loaded when not necessary + ( + . "/opt/bitnami/scripts/lib${web_server}.sh" + . "/opt/bitnami/scripts/${web_server}-env.sh" + "$@" + ) +} + +######################## +# Prints the list of enabled web servers +# Globals: +# None +# Arguments: +# None +# Returns: +# None +######################### +web_server_list() { + local -r -a supported_web_servers=(apache nginx) + local -a existing_web_servers=() + for web_server in "${supported_web_servers[@]}"; do + [[ -f "/opt/bitnami/scripts/${web_server}-env.sh" ]] && existing_web_servers+=("$web_server") + done + echo "${existing_web_servers[@]:-}" +} + +######################## +# Prints the currently-enabled web server type (only one, in order of preference) +# Globals: +# None +# Arguments: +# None +# Returns: +# None +######################### +web_server_type() { + local -a web_servers + read -r -a web_servers <<< "$(web_server_list)" + echo "${web_servers[0]:-}" +} + +######################## +# Validate that a supported web server is configured +# Globals: +# None +# Arguments: +# None +# Returns: +# None +######################### +web_server_validate() { + local error_code=0 + local supported_web_servers=("apache" "nginx") + + # Auxiliary functions + print_validation_error() { + error "$1" + error_code=1 + } + + if [[ -z "$(web_server_type)" || ! " ${supported_web_servers[*]} " == *" $(web_server_type) "* ]]; then + print_validation_error "Could not detect any supported web servers. It must be one of: ${supported_web_servers[*]}" + elif ! web_server_execute "$(web_server_type)" type -t "is_$(web_server_type)_running" >/dev/null; then + print_validation_error "Could not load the $(web_server_type) web server library from /opt/bitnami/scripts. Check that it exists and is readable." + fi + + return "$error_code" +} + +######################## +# Check whether the web server is running +# Globals: +# * +# Arguments: +# None +# Returns: +# true if the web server is running, false otherwise +######################### +is_web_server_running() { + "is_$(web_server_type)_running" +} + +######################## +# Start web server +# Globals: +# * +# Arguments: +# None +# Returns: +# None +######################### +web_server_start() { + info "Starting $(web_server_type) in background" + if [[ "${BITNAMI_SERVICE_MANAGER:-}" = "systemd" ]]; then + systemctl start "bitnami.$(web_server_type).service" + else + "${BITNAMI_ROOT_DIR}/scripts/$(web_server_type)/start.sh" + fi +} + +######################## +# Stop web server +# Globals: +# * +# Arguments: +# None +# Returns: +# None +######################### +web_server_stop() { + info "Stopping $(web_server_type)" + if [[ "${BITNAMI_SERVICE_MANAGER:-}" = "systemd" ]]; then + systemctl stop "bitnami.$(web_server_type).service" + else + "${BITNAMI_ROOT_DIR}/scripts/$(web_server_type)/stop.sh" + fi +} + +######################## +# Restart web server +# Globals: +# * +# Arguments: +# None +# Returns: +# None +######################### +web_server_restart() { + info "Restarting $(web_server_type)" + if [[ "${BITNAMI_SERVICE_MANAGER:-}" = "systemd" ]]; then + systemctl restart "bitnami.$(web_server_type).service" + else + "${BITNAMI_ROOT_DIR}/scripts/$(web_server_type)/restart.sh" + fi +} + +######################## +# Reload web server +# Globals: +# * +# Arguments: +# None +# Returns: +# None +######################### +web_server_reload() { + if [[ "${BITNAMI_SERVICE_MANAGER:-}" = "systemd" ]]; then + systemctl reload "bitnami.$(web_server_type).service" + else + "${BITNAMI_ROOT_DIR}/scripts/$(web_server_type)/reload.sh" + fi +} + +######################## +# Ensure a web server application configuration exists (i.e. Apache virtual host format or NGINX server block) +# It serves as a wrapper for the specific web server function +# Globals: +# * +# Arguments: +# $1 - App name +# Flags: +# --type - Application type, which has an effect on which configuration template to use +# --hosts - Host listen addresses +# --server-name - Server name +# --server-aliases - Server aliases +# --allow-remote-connections - Whether to allow remote connections or to require local connections +# --disable - Whether to render server configurations with a .disabled prefix +# --disable-http - Whether to render the app's HTTP server configuration with a .disabled prefix +# --disable-https - Whether to render the app's HTTPS server configuration with a .disabled prefix +# --http-port - HTTP port number +# --https-port - HTTPS port number +# --document-root - Path to document root directory +# Apache-specific flags: +# --apache-additional-configuration - Additional vhost configuration (no default) +# --apache-additional-http-configuration - Additional HTTP vhost configuration (no default) +# --apache-additional-https-configuration - Additional HTTPS vhost configuration (no default) +# --apache-before-vhost-configuration - Configuration to add before the directive (no default) +# --apache-allow-override - Whether to allow .htaccess files (only allowed when --move-htaccess is set to 'no' and type is not defined) +# --apache-extra-directory-configuration - Extra configuration for the document root directory +# --apache-proxy-address - Address where to proxy requests +# --apache-proxy-configuration - Extra configuration for the proxy +# --apache-proxy-http-configuration - Extra configuration for the proxy HTTP vhost +# --apache-proxy-https-configuration - Extra configuration for the proxy HTTPS vhost +# --apache-move-htaccess - Move .htaccess files to a common place so they can be loaded during Apache startup (only allowed when type is not defined) +# NGINX-specific flags: +# --nginx-additional-configuration - Additional server block configuration (no default) +# --nginx-external-configuration - Configuration external to server block (no default) +# Returns: +# true if the configuration was enabled, false otherwise +######################## +ensure_web_server_app_configuration_exists() { + local app="${1:?missing app}" + shift + local -a apache_args nginx_args web_servers args_var + apache_args=("$app") + nginx_args=("$app") + # Validate arguments + while [[ "$#" -gt 0 ]]; do + case "$1" in + # Common flags + --disable \ + | --disable-http \ + | --disable-https \ + ) + apache_args+=("$1") + nginx_args+=("$1") + ;; + --hosts \ + | --server-name \ + | --server-aliases \ + | --type \ + | --allow-remote-connections \ + | --http-port \ + | --https-port \ + | --document-root \ + ) + apache_args+=("$1" "${2:?missing value}") + nginx_args+=("$1" "${2:?missing value}") + shift + ;; + + # Specific Apache flags + --apache-additional-configuration \ + | --apache-additional-http-configuration \ + | --apache-additional-https-configuration \ + | --apache-before-vhost-configuration \ + | --apache-allow-override \ + | --apache-extra-directory-configuration \ + | --apache-proxy-address \ + | --apache-proxy-configuration \ + | --apache-proxy-http-configuration \ + | --apache-proxy-https-configuration \ + | --apache-move-htaccess \ + ) + apache_args+=("${1//apache-/}" "${2:?missing value}") + shift + ;; + + # Specific NGINX flags + --nginx-additional-configuration \ + | --nginx-external-configuration) + nginx_args+=("${1//nginx-/}" "${2:?missing value}") + shift + ;; + + *) + echo "Invalid command line flag $1" >&2 + return 1 + ;; + esac + shift + done + read -r -a web_servers <<< "$(web_server_list)" + for web_server in "${web_servers[@]}"; do + args_var="${web_server}_args[@]" + web_server_execute "$web_server" "ensure_${web_server}_app_configuration_exists" "${!args_var}" + done +} + +######################## +# Ensure a web server application configuration does not exist anymore (i.e. Apache virtual host format or NGINX server block) +# It serves as a wrapper for the specific web server function +# Globals: +# * +# Arguments: +# $1 - App name +# Returns: +# true if the configuration was disabled, false otherwise +######################## +ensure_web_server_app_configuration_not_exists() { + local app="${1:?missing app}" + local -a web_servers + read -r -a web_servers <<< "$(web_server_list)" + for web_server in "${web_servers[@]}"; do + web_server_execute "$web_server" "ensure_${web_server}_app_configuration_not_exists" "$app" + done +} + +######################## +# Ensure the web server loads the configuration for an application in a URL prefix +# It serves as a wrapper for the specific web server function +# Globals: +# * +# Arguments: +# $1 - App name +# Flags: +# --allow-remote-connections - Whether to allow remote connections or to require local connections +# --document-root - Path to document root directory +# --prefix - URL prefix from where it will be accessible (i.e. /myapp) +# --type - Application type, which has an effect on what configuration template will be used +# Apache-specific flags: +# --apache-additional-configuration - Additional vhost configuration (no default) +# --apache-allow-override - Whether to allow .htaccess files (only allowed when --move-htaccess is set to 'no') +# --apache-extra-directory-configuration - Extra configuration for the document root directory +# --apache-move-htaccess - Move .htaccess files to a common place so they can be loaded during Apache startup +# NGINX-specific flags: +# --nginx-additional-configuration - Additional server block configuration (no default) +# Returns: +# true if the configuration was enabled, false otherwise +######################## +ensure_web_server_prefix_configuration_exists() { + local app="${1:?missing app}" + shift + local -a apache_args nginx_args web_servers args_var + apache_args=("$app") + nginx_args=("$app") + # Validate arguments + while [[ "$#" -gt 0 ]]; do + case "$1" in + # Common flags + --allow-remote-connections \ + | --document-root \ + | --prefix \ + | --type \ + ) + apache_args+=("$1" "${2:?missing value}") + nginx_args+=("$1" "${2:?missing value}") + shift + ;; + + # Specific Apache flags + --apache-additional-configuration \ + | --apache-allow-override \ + | --apache-extra-directory-configuration \ + | --apache-move-htaccess \ + ) + apache_args+=("${1//apache-/}" "$2") + shift + ;; + + # Specific NGINX flags + --nginx-additional-configuration) + nginx_args+=("${1//nginx-/}" "$2") + shift + ;; + + *) + echo "Invalid command line flag $1" >&2 + return 1 + ;; + esac + shift + done + read -r -a web_servers <<< "$(web_server_list)" + for web_server in "${web_servers[@]}"; do + args_var="${web_server}_args[@]" + web_server_execute "$web_server" "ensure_${web_server}_prefix_configuration_exists" "${!args_var}" + done +} + +######################## +# Ensure a web server application configuration is updated with the runtime configuration (i.e. ports) +# It serves as a wrapper for the specific web server function +# Globals: +# * +# Arguments: +# $1 - App name +# Flags: +# --hosts - Host listen addresses +# --server-name - Server name +# --server-aliases - Server aliases +# --enable-http - Enable HTTP app configuration (if not enabled already) +# --enable-https - Enable HTTPS app configuration (if not enabled already) +# --disable-http - Disable HTTP app configuration (if not disabled already) +# --disable-https - Disable HTTPS app configuration (if not disabled already) +# --http-port - HTTP port number +# --https-port - HTTPS port number +# Returns: +# true if the configuration was updated, false otherwise +######################## +web_server_update_app_configuration() { + local app="${1:?missing app}" + shift + local -a args web_servers + args=("$app") + # Validate arguments + while [[ "$#" -gt 0 ]]; do + case "$1" in + # Common flags + --enable-http \ + | --enable-https \ + | --disable-http \ + | --disable-https \ + ) + args+=("$1") + ;; + --hosts \ + | --server-name \ + | --server-aliases \ + | --http-port \ + | --https-port \ + ) + args+=("$1" "${2:?missing value}") + shift + ;; + + *) + echo "Invalid command line flag $1" >&2 + return 1 + ;; + esac + shift + done + read -r -a web_servers <<< "$(web_server_list)" + for web_server in "${web_servers[@]}"; do + web_server_execute "$web_server" "${web_server}_update_app_configuration" "${args[@]}" + done +} + +######################## +# Enable loading page, which shows users that the initialization process is not yet completed +# Globals: +# * +# Arguments: +# None +# Returns: +# None +######################### +web_server_enable_loading_page() { + ensure_web_server_app_configuration_exists "__loading" --hosts "_default_" \ + --apache-additional-configuration " +# Show a HTTP 503 Service Unavailable page by default +RedirectMatch 503 ^/$ +# Show index.html if server is answering with 404 Not Found or 503 Service Unavailable status codes +ErrorDocument 404 /index.html +ErrorDocument 503 /index.html" \ + --nginx-additional-configuration " +# Show a HTTP 503 Service Unavailable page by default +location / { + return 503; +} +# Show index.html if server is answering with 404 Not Found or 503 Service Unavailable status codes +error_page 404 @installing; +error_page 503 @installing; +location @installing { + rewrite ^(.*)$ /index.html break; +}" + web_server_reload +} + +######################## +# Enable loading page, which shows users that the initialization process is not yet completed +# Globals: +# * +# Arguments: +# None +# Returns: +# None +######################### +web_server_disable_install_page() { + ensure_web_server_app_configuration_not_exists "__loading" + web_server_reload +} diff --git a/deploy/pgvector/prebuildfs/usr/sbin/install_packages b/deploy/pgvector/prebuildfs/usr/sbin/install_packages new file mode 100755 index 000000000..acbc31732 --- /dev/null +++ b/deploy/pgvector/prebuildfs/usr/sbin/install_packages @@ -0,0 +1,27 @@ +#!/bin/sh +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 +set -eu + +n=0 +max=2 +export DEBIAN_FRONTEND=noninteractive + +until [ $n -gt $max ]; do + set +e + ( + apt-get update -qq && + apt-get install -y --no-install-recommends "$@" + ) + CODE=$? + set -e + if [ $CODE -eq 0 ]; then + break + fi + if [ $n -eq $max ]; then + exit $CODE + fi + echo "apt failed, retrying" + n=$(($n + 1)) +done +apt-get clean && rm -rf /var/lib/apt/lists /var/cache/apt/archives diff --git a/deploy/pgvector/prebuildfs/usr/sbin/run-script b/deploy/pgvector/prebuildfs/usr/sbin/run-script new file mode 100755 index 000000000..b7a5bf1e5 --- /dev/null +++ b/deploy/pgvector/prebuildfs/usr/sbin/run-script @@ -0,0 +1,24 @@ +#!/bin/sh +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 +set -u + +if [ $# -eq 0 ]; then + >&2 echo "No arguments provided" + exit 1 +fi + +script=$1 +exit_code="${2:-96}" +fail_if_not_present="${3:-y}" + +if test -f "$script"; then + sh $script + + if [ $? -ne 0 ]; then + exit $((exit_code)) + fi +elif [ "$fail_if_not_present" = "y" ]; then + >&2 echo "script not found: $script" + exit 127 +fi diff --git a/deploy/pgvector/rootfs/opt/bitnami/scripts/libautoctl.sh b/deploy/pgvector/rootfs/opt/bitnami/scripts/libautoctl.sh new file mode 100644 index 000000000..236512a90 --- /dev/null +++ b/deploy/pgvector/rootfs/opt/bitnami/scripts/libautoctl.sh @@ -0,0 +1,308 @@ +#!/bin/bash +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 +# +# Bitnami pg_auto_failover library + +# shellcheck disable=SC1090,SC1091 + +# Load PostgreSQL library +. /opt/bitnami/scripts/libpostgresql.sh + +######################## +# Change pg_hba.conf so it allows access from replication users +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +autoctl_configure_pghba() { + local replication_auth="trust" + if [[ -n "${POSTGRESQL_REPLICATION_PASSWORD}" ]]; then + replication_auth="md5" + fi + + cat <"${POSTGRESQL_PGHBA_FILE}" +local all all trust +EOF + + if [[ "${POSTGRESQL_AUTOCTL_MODE}" = "monitor" ]]; then + cat <>"${POSTGRESQL_PGHBA_FILE}" +host pg_auto_failover autoctl_node 0.0.0.0/0 ${replication_auth} +EOF + elif [[ "${POSTGRESQL_AUTOCTL_MODE}" = "postgres" ]]; then + cat <>"${POSTGRESQL_PGHBA_FILE}" +host all all 0.0.0.0/0 ${replication_auth} +host all all ::/0 ${replication_auth} +host replication pgautofailover_replicator 0.0.0.0/0 ${replication_auth} +EOF + fi + + cp "${POSTGRESQL_PGHBA_FILE}" "${POSTGRESQL_DATA_DIR}/pg_hba.conf" +} + +######################## +# Configure the auth parameters +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +autoctl_configure_auth() { + info "Configuring auth parameters for (${POSTGRESQL_DATA_DIR})..." + + if [[ -f ${POSTGRESQL_DATA_DIR}/.autoctl_initialized ]]; then + info "Auth parameters are already configured, restoring from existing data" + else + postgresql_start_bg + + if [[ -n "${POSTGRESQL_REPLICATION_PASSWORD}" ]]; then + info "Changing replication passwords" + + local -r escaped_password="${POSTGRESQL_REPLICATION_PASSWORD//\'/\'\'}" + if [[ "${POSTGRESQL_AUTOCTL_MODE}" = "monitor" ]]; then + echo "ALTER USER autoctl_node WITH PASSWORD '${escaped_password}';" | postgresql_execute + elif [[ "${POSTGRESQL_AUTOCTL_MODE}" = "postgres" ]]; then + echo "ALTER USER pgautofailover_replicator WITH PASSWORD '${escaped_password}';" | postgresql_execute + pg_autoctl config set --pgdata "${POSTGRESQL_DATA_DIR}" replication.password "${POSTGRESQL_REPLICATION_PASSWORD}" + fi + fi + + if [[ "${POSTGRESQL_AUTOCTL_MODE}" = "postgres" ]]; then + info "Adding users auth configurations..." + [[ -n "${POSTGRESQL_DATABASE}" ]] && [[ "$POSTGRESQL_DATABASE" != "postgres" ]] && postgresql_create_custom_database + if [[ "$POSTGRESQL_USERNAME" = "postgres" ]]; then + postgresql_alter_postgres_user "$POSTGRESQL_PASSWORD" + else + if [[ -n "$POSTGRESQL_POSTGRES_PASSWORD" ]]; then + postgresql_alter_postgres_user "$POSTGRESQL_POSTGRES_PASSWORD" + fi + postgresql_create_admin_user + fi + fi + + postgresql_stop + fi +} + +######################## +# Create a monitor +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +autoctl_create_monitor() { + local -r default_hostname=${1:?default_hostname is required} + + "${POSTGRESQL_BIN_DIR}/pg_autoctl" create monitor \ + --auth md5 \ + --pgdata "${POSTGRESQL_DATA_DIR}" \ + --no-ssl \ + --hostname "${POSTGRESQL_AUTOCTL_HOSTNAME:-$default_hostname}" +} + +######################## +# Build monitor URI +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +monitor_connection_string() { + echo "postgres://autoctl_node:${POSTGRESQL_REPLICATION_PASSWORD}@${POSTGRESQL_AUTOCTL_MONITOR_HOST}/pg_auto_failover?connect_timeout=2" +} + +######################## +# Create a postgress node +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +autoctl_create_postgres() { + local -r default_hostname=${1:?default_hostname is required} + + PGPASSWORD="${POSTGRESQL_REPLICATION_PASSWORD}" "${POSTGRESQL_BIN_DIR}/pg_autoctl" create postgres \ + --auth md5 \ + --pgdata "${POSTGRESQL_DATA_DIR}" \ + --no-ssl \ + --monitor "$(monitor_connection_string)" \ + --name "${POSTGRESQL_AUTOCTL_HOSTNAME:-$default_hostname}" \ + --hostname "${POSTGRESQL_AUTOCTL_HOSTNAME:-$default_hostname}" + + pg_autoctl config set --pgdata "${POSTGRESQL_DATA_DIR}" pg_autoctl.monitor "$(monitor_connection_string)" + wait_until_can_connect "$(monitor_connection_string)" +} + +######################## +# Create postgresql data dir using pg_autoclt +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +autoctl_create_node() { + info "Creating ${POSTGRESQL_AUTOCTL_MODE} data directory (${POSTGRESQL_DATA_DIR})..." + + if [[ -f ${POSTGRESQL_DATA_DIR}/.autoctl_initialized ]]; then + info "A ${POSTGRESQL_AUTOCTL_MODE} data directory (${POSTGRESQL_DATA_DIR}) already exists, restoring from existing data" + else + info "Cleaning dbinit initialization files ${POSTGRESQL_DATA_DIR}..." + rm -rf "${POSTGRESQL_DATA_DIR:?}"/* + + local -r default_hostname="$(hostname --fqdn)" + if [[ "${POSTGRESQL_AUTOCTL_MODE}" = "monitor" ]]; then + autoctl_create_monitor "${default_hostname}" + elif [[ "${POSTGRESQL_AUTOCTL_MODE}" = "postgres" ]]; then + autoctl_create_postgres "${default_hostname}" + else + error "autoctl does not support ${POSTGRESQL_AUTOCTL_MODE}" + exit 1 + fi + fi +} + +######################## +# Add pgautofailover extension to shared_preload_libraries property in postgresql.conf +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +autoctl_configure_pgautofailover() { + info "Load pgautofailover through POSTGRESQL_SHARED_PRELOAD_LIBRARIES env var..." + if [[ -f ${POSTGRESQL_DATA_DIR}/.autoctl_initialized ]]; then + info "The pgautofailover is already loaded, restoring from existing config" + else + local preload_libraries + if [[ -n "${POSTGRESQL_SHARED_PRELOAD_LIBRARIES}" ]]; then + preload_libraries="${POSTGRESQL_SHARED_PRELOAD_LIBRARIES},pgautofailover" + else + preload_libraries="pgautofailover" + fi + + postgresql_set_property "shared_preload_libraries" "${preload_libraries}" + fi +} + +######################## +# Add pgbackrest extension's configuration file and directories +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +autoctl_configure_pgbackrest() { + if [[ -f ${POSTGRESQL_DATA_DIR}/.autoctl_initialized ]]; then + info "The pgbackrest is already configured" + else + info "Configuring pgbackrest..." + debug "Ensuring pgbackrest expected directories/files exist..." + for dir in "${POSTGRESQL_PGBACKREST_LOGS_DIR}" "${POSTGRESQL_PGBACKREST_BACKUPS_DIR}" "${POSTGRESQL_PGBACKREST_SPOOL_DIR}"; do + ensure_dir_exists "${dir}" + am_i_root && chown "${POSTGRESQL_DAEMON_USER}:${POSTGRESQL_DAEMON_GROUP}" "${dir}" + done + + cat <>"${POSTGRESQL_PGBACKREST_CONF_FILE}" +[global] +repo1-path=${POSTGRESQL_PGBACKREST_BACKUPS_DIR} +repo1-cipher-pass=${POSTGRESQL_REPLICATION_PASSWORD} +repo1-cipher-type=aes-256-cbc +repo1-retention-diff=1 +repo1-retention-full=2 +process-max=2 +log-path=${POSTGRESQL_PGBACKREST_LOGS_DIR} +log-level-console=info +log-level-file=debug +archive-async=y +spool-path=${POSTGRESQL_PGBACKREST_SPOOL_DIR} +start-fast=y +[testdb] +pg1-path=${POSTGRESQL_DATA_DIR} +EOF + fi +} + +######################## +# Initialize a monitor or postgress node using pg_autoctl command. +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +autoctl_initialize() { + info "Initializing ${POSTGRESQL_AUTOCTL_MODE} data directory..." + + postgresql_unrestrict_pghba + autoctl_create_node + autoctl_configure_pgautofailover + autoctl_configure_pgbackrest + + if [[ ! -f ${POSTGRESQL_DATA_DIR}/.autoctl_initialized ]]; then + info "Moving configuration files to (${POSTGRESQL_DATA_DIR})..." + + cp "${POSTGRESQL_CONF_FILE}" "${POSTGRESQL_DATA_DIR}/postgresql.conf" + mkdir -p "${POSTGRESQL_DATA_DIR}/conf.d" + fi + + autoctl_configure_auth + autoctl_configure_pghba + + touch "${POSTGRESQL_DATA_DIR}/.autoctl_initialized" + info "Done initializing ${POSTGRESQL_AUTOCTL_MODE} data directory..." +} + +######################## +# Wait until a node is ready to accepts connection. +# Globals: +# POSTGRESQL_* +# Arguments: +# - $1 node hostname +# Returns: +# None +######################### +wait_until_can_connect() { + local connection_string="$1" + + check_postgresql_connection() { + psql "$connection_string" -c 'select version()' > /dev/null 2>&1 + } + + info "Wait until node is available..." + if ! retry_while "check_postgresql_connection"; then + error "Could not connect to the postgresql" + return 1 + fi +} + +######################## +# Change pg_hba.conf so only password-based authentication is allowed +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_unrestrict_pghba() { + replace_in_file "$POSTGRESQL_PGHBA_FILE" "md5" "trust" false +} diff --git a/deploy/pgvector/rootfs/opt/bitnami/scripts/libpostgresql.sh b/deploy/pgvector/rootfs/opt/bitnami/scripts/libpostgresql.sh new file mode 100644 index 000000000..b66e8c5a5 --- /dev/null +++ b/deploy/pgvector/rootfs/opt/bitnami/scripts/libpostgresql.sh @@ -0,0 +1,1323 @@ +#!/bin/bash +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 +# +# Bitnami PostgreSQL library + +# shellcheck disable=SC1090,SC1091 + +# Load Generic Libraries +. /opt/bitnami/scripts/libfile.sh +. /opt/bitnami/scripts/libfs.sh +. /opt/bitnami/scripts/liblog.sh +. /opt/bitnami/scripts/libos.sh +. /opt/bitnami/scripts/libservice.sh +. /opt/bitnami/scripts/libvalidations.sh +. /opt/bitnami/scripts/libnet.sh + +######################## +# Configure libnss_wrapper so PostgreSQL commands work with a random user. +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_enable_nss_wrapper() { + if ! getent passwd "$(id -u)" &>/dev/null && [ -e "$NSS_WRAPPER_LIB" ]; then + debug "Configuring libnss_wrapper..." + export LD_PRELOAD="$NSS_WRAPPER_LIB" + # shellcheck disable=SC2155 + export NSS_WRAPPER_PASSWD="$(mktemp)" + # shellcheck disable=SC2155 + export NSS_WRAPPER_GROUP="$(mktemp)" + echo "postgres:x:$(id -u):$(id -g):PostgreSQL:$POSTGRESQL_DATA_DIR:/bin/false" >"$NSS_WRAPPER_PASSWD" + echo "postgres:x:$(id -g):" >"$NSS_WRAPPER_GROUP" + fi +} + +######################## +# Validate settings in POSTGRESQL_* environment variables +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_validate() { + info "Validating settings in POSTGRESQL_* env vars.." + local error_code=0 + + # Auxiliary functions + print_validation_error() { + error "$1" + error_code=1 + } + + check_multi_value() { + if [[ " ${2} " != *" ${!1} "* ]]; then + print_validation_error "The allowed values for ${1} are: ${2}" + fi + } + + empty_password_enabled_warn() { + warn "You set the environment variable ALLOW_EMPTY_PASSWORD=${ALLOW_EMPTY_PASSWORD}. For safety reasons, do not use this flag in a production environment." + } + empty_password_error() { + print_validation_error "The $1 environment variable is empty or not set. Set the environment variable ALLOW_EMPTY_PASSWORD=yes to allow the container to be started with blank passwords. This is recommended only for development." + } + if is_boolean_yes "$ALLOW_EMPTY_PASSWORD"; then + empty_password_enabled_warn + else + if [[ -z "$POSTGRESQL_PASSWORD" ]]; then + empty_password_error "POSTGRESQL_PASSWORD" + fi + if ((${#POSTGRESQL_PASSWORD} > 100)); then + print_validation_error "The password cannot be longer than 100 characters. Set the environment variable POSTGRESQL_PASSWORD with a shorter value" + fi + if [[ -n "$POSTGRESQL_USERNAME" ]] && [[ -z "$POSTGRESQL_PASSWORD" ]]; then + empty_password_error "POSTGRESQL_PASSWORD" + fi + if [[ -n "$POSTGRESQL_USERNAME" ]] && [[ "$POSTGRESQL_USERNAME" != "postgres" ]] && [[ -n "$POSTGRESQL_PASSWORD" ]] && [[ -z "$POSTGRESQL_DATABASE" ]]; then + print_validation_error "In order to use a custom PostgreSQL user you need to set the environment variable POSTGRESQL_DATABASE as well" + fi + fi + if [[ -n "$POSTGRESQL_REPLICATION_MODE" ]]; then + if [[ "$POSTGRESQL_REPLICATION_MODE" = "master" ]]; then + if ((POSTGRESQL_NUM_SYNCHRONOUS_REPLICAS < 0)); then + print_validation_error "The number of synchronous replicas cannot be less than 0. Set the environment variable POSTGRESQL_NUM_SYNCHRONOUS_REPLICAS" + fi + elif [[ "$POSTGRESQL_REPLICATION_MODE" = "slave" ]]; then + if [[ -z "$POSTGRESQL_MASTER_HOST" ]]; then + print_validation_error "Slave replication mode chosen without setting the environment variable POSTGRESQL_MASTER_HOST. Use it to indicate where the Master node is running" + fi + if [[ -z "$POSTGRESQL_REPLICATION_USER" ]]; then + print_validation_error "Slave replication mode chosen without setting the environment variable POSTGRESQL_REPLICATION_USER. Make sure that the master also has this parameter set" + fi + else + print_validation_error "Invalid replication mode. Available options are 'master/slave'" + fi + # Common replication checks + if [[ -n "$POSTGRESQL_REPLICATION_USER" ]] && [[ -z "$POSTGRESQL_REPLICATION_PASSWORD" ]]; then + empty_password_error "POSTGRESQL_REPLICATION_PASSWORD" + fi + else + if is_boolean_yes "$ALLOW_EMPTY_PASSWORD"; then + empty_password_enabled_warn + else + if [[ -z "$POSTGRESQL_PASSWORD" ]]; then + empty_password_error "POSTGRESQL_PASSWORD" + fi + if [[ -n "$POSTGRESQL_USERNAME" ]] && [[ -z "$POSTGRESQL_PASSWORD" ]]; then + empty_password_error "POSTGRESQL_PASSWORD" + fi + fi + fi + + if ! is_yes_no_value "$POSTGRESQL_ENABLE_LDAP"; then + empty_password_error "The values allowed for POSTGRESQL_ENABLE_LDAP are: yes or no" + fi + + if is_boolean_yes "$POSTGRESQL_ENABLE_LDAP" && [[ -n "$POSTGRESQL_LDAP_URL" ]] && [[ -n "$POSTGRESQL_LDAP_SERVER" ]]; then + empty_password_error "You can not set POSTGRESQL_LDAP_URL and POSTGRESQL_LDAP_SERVER at the same time. Check your LDAP configuration." + fi + + if ! is_yes_no_value "$POSTGRESQL_ENABLE_TLS"; then + print_validation_error "The values allowed for POSTGRESQL_ENABLE_TLS are: yes or no" + elif is_boolean_yes "$POSTGRESQL_ENABLE_TLS"; then + # TLS Checks + if [[ -z "$POSTGRESQL_TLS_CERT_FILE" ]]; then + print_validation_error "You must provide a X.509 certificate in order to use TLS" + elif [[ ! -f "$POSTGRESQL_TLS_CERT_FILE" ]]; then + print_validation_error "The X.509 certificate file in the specified path ${POSTGRESQL_TLS_CERT_FILE} does not exist" + fi + if [[ -z "$POSTGRESQL_TLS_KEY_FILE" ]]; then + print_validation_error "You must provide a private key in order to use TLS" + elif [[ ! -f "$POSTGRESQL_TLS_KEY_FILE" ]]; then + print_validation_error "The private key file in the specified path ${POSTGRESQL_TLS_KEY_FILE} does not exist" + fi + if [[ -z "$POSTGRESQL_TLS_CA_FILE" ]]; then + warn "A CA X.509 certificate was not provided. Client verification will not be performed in TLS connections" + elif [[ ! -f "$POSTGRESQL_TLS_CA_FILE" ]]; then + print_validation_error "The CA X.509 certificate file in the specified path ${POSTGRESQL_TLS_CA_FILE} does not exist" + fi + if [[ -n "$POSTGRESQL_TLS_CRL_FILE" ]] && [[ ! -f "$POSTGRESQL_TLS_CRL_FILE" ]]; then + print_validation_error "The CRL file in the specified path ${POSTGRESQL_TLS_CRL_FILE} does not exist" + fi + if ! is_yes_no_value "$POSTGRESQL_TLS_PREFER_SERVER_CIPHERS"; then + print_validation_error "The values allowed for POSTGRESQL_TLS_PREFER_SERVER_CIPHERS are: yes or no" + fi + fi + + if [[ -n "$POSTGRESQL_SYNCHRONOUS_REPLICAS_MODE" ]]; then + check_multi_value "POSTGRESQL_SYNCHRONOUS_REPLICAS_MODE" "FIRST ANY" + fi + + [[ "$error_code" -eq 0 ]] || exit "$error_code" +} + +######################## +# Create basic postgresql.conf file using the example provided in the share/ folder +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_create_config() { + info "postgresql.conf file not detected. Generating it..." + cp "$POSTGRESQL_BASE_DIR/share/postgresql.conf.sample" "$POSTGRESQL_CONF_FILE" + # Update default value for 'include_dir' directive + # ref: https://github.com/postgres/postgres/commit/fb9c475597c245562a28d1e916b575ac4ec5c19f#diff-f5544d9b6d218cc9677524b454b41c60 + if ! grep include_dir "$POSTGRESQL_CONF_FILE" >/dev/null; then + error "include_dir line is not present in $POSTGRESQL_CONF_FILE. This may be due to a changes in a new version of PostgreSQL. Please check" + exit 1 + fi + local psql_conf + psql_conf="$(sed -E "/#include_dir/i include_dir = 'conf.d'" "$POSTGRESQL_CONF_FILE")" + echo "$psql_conf" >"$POSTGRESQL_CONF_FILE" +} + +######################## +# Create ldap auth configuration in pg_hba, +# but keeps postgres user to authenticate locally +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_ldap_auth_configuration() { + info "Generating LDAP authentication configuration" + local ldap_configuration="" + + if [[ -n "$POSTGRESQL_LDAP_URL" ]]; then + ldap_configuration="ldapurl=\"$POSTGRESQL_LDAP_URL\"" + else + ldap_configuration="ldapserver=${POSTGRESQL_LDAP_SERVER}" + + [[ -n "$POSTGRESQL_LDAP_PREFIX" ]] && ldap_configuration+=" ldapprefix=\"${POSTGRESQL_LDAP_PREFIX}\"" + [[ -n "$POSTGRESQL_LDAP_SUFFIX" ]] && ldap_configuration+=" ldapsuffix=\"${POSTGRESQL_LDAP_SUFFIX}\"" + [[ -n "$POSTGRESQL_LDAP_PORT" ]] && ldap_configuration+=" ldapport=${POSTGRESQL_LDAP_PORT}" + [[ -n "$POSTGRESQL_LDAP_BASE_DN" ]] && ldap_configuration+=" ldapbasedn=\"${POSTGRESQL_LDAP_BASE_DN}\"" + [[ -n "$POSTGRESQL_LDAP_BIND_DN" ]] && ldap_configuration+=" ldapbinddn=\"${POSTGRESQL_LDAP_BIND_DN}\"" + [[ -n "$POSTGRESQL_LDAP_BIND_PASSWORD" ]] && ldap_configuration+=" ldapbindpasswd=${POSTGRESQL_LDAP_BIND_PASSWORD}" + [[ -n "$POSTGRESQL_LDAP_SEARCH_ATTR" ]] && ldap_configuration+=" ldapsearchattribute=${POSTGRESQL_LDAP_SEARCH_ATTR}" + [[ -n "$POSTGRESQL_LDAP_SEARCH_FILTER" ]] && ldap_configuration+=" ldapsearchfilter=\"${POSTGRESQL_LDAP_SEARCH_FILTER}\"" + [[ -n "$POSTGRESQL_LDAP_TLS" ]] && ldap_configuration+=" ldaptls=${POSTGRESQL_LDAP_TLS}" + [[ -n "$POSTGRESQL_LDAP_SCHEME" ]] && ldap_configuration+=" ldapscheme=${POSTGRESQL_LDAP_SCHEME}" + fi + + cat <"$POSTGRESQL_PGHBA_FILE" +host all postgres 0.0.0.0/0 trust +host all postgres ::/0 trust +host all all 0.0.0.0/0 ldap $ldap_configuration +host all all ::/0 ldap $ldap_configuration +EOF +} + +######################## +# Create local auth configuration in pg_hba +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_password_auth_configuration() { + info "Generating local authentication configuration" + cat <"$POSTGRESQL_PGHBA_FILE" +host all all 0.0.0.0/0 trust +host all all ::/0 trust +EOF +} + +######################## +# Enforce Certificate client authentication +# for TLS connections in pg_hba +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_tls_auth_configuration() { + info "Enabling TLS Client authentication" + local previous_content + [[ -f "$POSTGRESQL_PGHBA_FILE" ]] && previous_content=$(<"$POSTGRESQL_PGHBA_FILE") + + cat <"$POSTGRESQL_PGHBA_FILE" +hostssl all all 0.0.0.0/0 cert +hostssl all all ::/0 cert +${previous_content:-} +EOF +} + +######################## +# Create basic pg_hba.conf file +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_create_pghba() { + info "pg_hba.conf file not detected. Generating it..." + + if is_boolean_yes "$POSTGRESQL_ENABLE_LDAP"; then + postgresql_ldap_auth_configuration + else + postgresql_password_auth_configuration + fi +} + +######################## +# Change pg_hba.conf so it allows local UNIX socket-based connections +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_allow_local_connection() { + cat <>"$POSTGRESQL_PGHBA_FILE" +local all all trust +host all all 127.0.0.1/32 trust +host all all ::1/128 trust +EOF +} + +######################## +# Change pg_hba.conf so only password-based authentication is allowed +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_restrict_pghba() { + if [[ -n "$POSTGRESQL_PASSWORD" ]]; then + replace_in_file "$POSTGRESQL_PGHBA_FILE" "trust" "md5" false + fi +} + +######################## +# Change pg_hba.conf so it allows access from replication users +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_add_replication_to_pghba() { + local replication_auth="trust" + if [[ -n "$POSTGRESQL_REPLICATION_PASSWORD" ]]; then + replication_auth="md5" + fi + cat <>"$POSTGRESQL_PGHBA_FILE" +host replication all 0.0.0.0/0 ${replication_auth} +host replication all ::/0 ${replication_auth} +EOF +} + +######################## +# Change a PostgreSQL configuration file by setting a property +# Globals: +# POSTGRESQL_* +# Arguments: +# $1 - property +# $2 - value +# $3 - Path to configuration file (default: $POSTGRESQL_CONF_FILE) +# Returns: +# None +######################### +postgresql_set_property() { + local -r property="${1:?missing property}" + local -r value="${2:?missing value}" + local -r conf_file="${3:-$POSTGRESQL_CONF_FILE}" + local psql_conf + if grep -qE "^#*\s*${property}" "$conf_file" >/dev/null; then + replace_in_file "$conf_file" "^#*\s*${property}\s*=.*" "${property} = '${value}'" false + else + echo "${property} = '${value}'" >>"$conf_file" + fi +} + +######################## +# Create a user for master-slave replication +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_create_replication_user() { + local -r escaped_password="${POSTGRESQL_REPLICATION_PASSWORD//\'/\'\'}" + info "Creating replication user $POSTGRESQL_REPLICATION_USER" + echo "CREATE ROLE \"$POSTGRESQL_REPLICATION_USER\" REPLICATION LOGIN ENCRYPTED PASSWORD '$escaped_password'" | postgresql_execute +} + +######################## +# Change postgresql.conf by setting replication parameters +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_configure_replication_parameters() { + local -r psql_major_version="$(postgresql_get_major_version)" + info "Configuring replication parameters" + postgresql_set_property "wal_level" "$POSTGRESQL_WAL_LEVEL" + postgresql_set_property "max_wal_size" "400MB" + postgresql_set_property "max_wal_senders" "16" + if ((psql_major_version >= 13)); then + postgresql_set_property "wal_keep_size" "128MB" + else + postgresql_set_property "wal_keep_segments" "12" + fi + postgresql_set_property "hot_standby" "on" +} + +######################## +# Change postgresql.conf by setting parameters for synchronous replication +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_configure_synchronous_replication() { + local replication_nodes="" + local synchronous_standby_names="" + info "Configuring synchronous_replication" + + # Check for comma separate values + # When using repmgr, POSTGRESQL_CLUSTER_APP_NAME will contain the list of nodes to be synchronous + # This list need to cleaned from other things but node names. + if [[ "$POSTGRESQL_CLUSTER_APP_NAME" == *","* ]]; then + read -r -a nodes <<<"$(tr ',;' ' ' <<<"${POSTGRESQL_CLUSTER_APP_NAME}")" + for node in "${nodes[@]}"; do + [[ "$node" =~ ^(([^:/?#]+):)?// ]] || node="tcp://${node}" + + # repmgr is only using the first segment of the FQDN as the application name + host="$(parse_uri "$node" 'host' | awk -F. '{print $1}')" + replication_nodes="${replication_nodes}${replication_nodes:+,}\"${host}\"" + done + else + replication_nodes="\"${POSTGRESQL_CLUSTER_APP_NAME}\"" + fi + + if ((POSTGRESQL_NUM_SYNCHRONOUS_REPLICAS > 0)); then + synchronous_standby_names="${POSTGRESQL_NUM_SYNCHRONOUS_REPLICAS} (${replication_nodes})" + if [[ -n "$POSTGRESQL_SYNCHRONOUS_REPLICAS_MODE" ]]; then + synchronous_standby_names="${POSTGRESQL_SYNCHRONOUS_REPLICAS_MODE} ${synchronous_standby_names}" + fi + + postgresql_set_property "synchronous_commit" "$POSTGRESQL_SYNCHRONOUS_COMMIT_MODE" + postgresql_set_property "synchronous_standby_names" "$synchronous_standby_names" + fi +} + +######################## +# Change postgresql.conf by setting TLS properies +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_configure_tls() { + info "Configuring TLS" + chmod 600 "$POSTGRESQL_TLS_KEY_FILE" || warn "Could not set compulsory permissions (600) on file ${POSTGRESQL_TLS_KEY_FILE}" + postgresql_set_property "ssl" "on" + # Server ciphers are preferred by default + ! is_boolean_yes "$POSTGRESQL_TLS_PREFER_SERVER_CIPHERS" && postgresql_set_property "ssl_prefer_server_ciphers" "off" + [[ -n $POSTGRESQL_TLS_CA_FILE ]] && postgresql_set_property "ssl_ca_file" "$POSTGRESQL_TLS_CA_FILE" + [[ -n $POSTGRESQL_TLS_CRL_FILE ]] && postgresql_set_property "ssl_crl_file" "$POSTGRESQL_TLS_CRL_FILE" + postgresql_set_property "ssl_cert_file" "$POSTGRESQL_TLS_CERT_FILE" + postgresql_set_property "ssl_key_file" "$POSTGRESQL_TLS_KEY_FILE" +} + +######################## +# Change postgresql.conf by setting fsync +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_configure_fsync() { + info "Configuring fsync" + postgresql_set_property "fsync" "$POSTGRESQL_FSYNC" +} + +######################## +# Alter password of the postgres user +# Globals: +# POSTGRESQL_* +# Arguments: +# Password +# Returns: +# None +######################### +postgresql_alter_postgres_user() { + local -r escaped_password="${1//\'/\'\'}" + info "Changing password of postgres" + echo "ALTER ROLE postgres WITH PASSWORD '$escaped_password';" | postgresql_execute + if [[ -n "$POSTGRESQL_POSTGRES_CONNECTION_LIMIT" ]]; then + echo "ALTER ROLE postgres WITH CONNECTION LIMIT ${POSTGRESQL_POSTGRES_CONNECTION_LIMIT};" | postgresql_execute + fi +} + +######################## +# Create an admin user with all privileges in POSTGRESQL_DATABASE +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_create_admin_user() { + local -r escaped_password="${POSTGRESQL_PASSWORD//\'/\'\'}" + info "Creating user ${POSTGRESQL_USERNAME}" + local connlimit_string="" + if [[ -n "$POSTGRESQL_USERNAME_CONNECTION_LIMIT" ]]; then + connlimit_string="CONNECTION LIMIT ${POSTGRESQL_USERNAME_CONNECTION_LIMIT}" + fi + echo "CREATE ROLE \"${POSTGRESQL_USERNAME}\" WITH LOGIN ${connlimit_string} CREATEDB PASSWORD '${escaped_password}';" | postgresql_execute + info "Granting access to \"${POSTGRESQL_USERNAME}\" to the database \"${POSTGRESQL_DATABASE}\"" + echo "GRANT ALL PRIVILEGES ON DATABASE \"${POSTGRESQL_DATABASE}\" TO \"${POSTGRESQL_USERNAME}\"\;" | postgresql_execute "" "postgres" "$POSTGRESQL_PASSWORD" + echo "ALTER DATABASE \"${POSTGRESQL_DATABASE}\" OWNER TO \"${POSTGRESQL_USERNAME}\"\;" | postgresql_execute "" "postgres" "$POSTGRESQL_PASSWORD" + info "Setting ownership for the 'public' schema database \"${POSTGRESQL_DATABASE}\" to \"${POSTGRESQL_USERNAME}\"" + echo "ALTER SCHEMA public OWNER TO \"${POSTGRESQL_USERNAME}\"\;" | postgresql_execute "$POSTGRESQL_DATABASE" "postgres" "$POSTGRESQL_PASSWORD" +} + +######################## +# Create a database with name $POSTGRESQL_DATABASE +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_create_custom_database() { + echo "CREATE DATABASE \"$POSTGRESQL_DATABASE\"" | postgresql_execute "" "postgres" "" +} + +######################## +# Change postgresql.conf to listen in 0.0.0.0 +# Arguments: +# None +# Returns: +# None +######################### +postgresql_enable_remote_connections() { + postgresql_set_property "listen_addresses" "*" +} + +######################## +# Check if a given configuration file was mounted externally +# Globals: +# POSTGRESQL_* +# Arguments: +# $1 - Filename +# Returns: +# 1 if the file was mounted externally, 0 otherwise +######################### +postgresql_is_file_external() { + local -r filename=$1 + if [[ -d "$POSTGRESQL_MOUNTED_CONF_DIR" ]] && [[ -f "$POSTGRESQL_MOUNTED_CONF_DIR"/"$filename" ]]; then + return 0 + else + return 1 + fi +} + +######################## +# Remove flags and postmaster files from a previous run (case of container restart) +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_clean_from_restart() { + local -r -a files=( + "$POSTGRESQL_DATA_DIR"/postmaster.pid + "$POSTGRESQL_DATA_DIR"/standby.signal + "$POSTGRESQL_DATA_DIR"/recovery.signal + ) + + for file in "${files[@]}"; do + if [[ -f "$file" ]]; then + info "Cleaning stale $file file" + rm "$file" + fi + done +} + +######################## +# Ensure PostgreSQL is initialized +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_initialize() { + info "Initializing PostgreSQL database..." + + # This fixes an issue where the trap would kill the entrypoint.sh, if a PID was left over from a previous run + # Exec replaces the process without creating a new one, and when the container is restarted it may have the same PID + rm -f "$POSTGRESQL_PID_FILE" + + # User injected custom configuration + if [[ -d "$POSTGRESQL_MOUNTED_CONF_DIR" ]] && compgen -G "$POSTGRESQL_MOUNTED_CONF_DIR"/* >/dev/null; then + debug "Copying files from $POSTGRESQL_MOUNTED_CONF_DIR to $POSTGRESQL_CONF_DIR" + cp -fr "$POSTGRESQL_MOUNTED_CONF_DIR"/. "$POSTGRESQL_CONF_DIR" + fi + local create_conf_file=yes + local create_pghba_file=yes + + if postgresql_is_file_external "postgresql.conf"; then + info "Custom configuration $POSTGRESQL_CONF_FILE detected" + create_conf_file=no + fi + + if postgresql_is_file_external "pg_hba.conf"; then + info "Custom configuration $POSTGRESQL_PGHBA_FILE detected" + create_pghba_file=no + fi + + debug "Ensuring expected directories/files exist..." + for dir in "$POSTGRESQL_TMP_DIR" "$POSTGRESQL_LOG_DIR" "$POSTGRESQL_DATA_DIR"; do + ensure_dir_exists "$dir" + am_i_root && chown "$POSTGRESQL_DAEMON_USER:$POSTGRESQL_DAEMON_GROUP" "$dir" + done + am_i_root && find "$POSTGRESQL_DATA_DIR" -mindepth 1 -maxdepth 1 -not -name ".snapshot" -not -name "lost+found" -exec chown -R "$POSTGRESQL_DAEMON_USER:$POSTGRESQL_DAEMON_GROUP" {} \; + chmod u+rwx "$POSTGRESQL_DATA_DIR" || warn "Lack of permissions on data directory!" + chmod go-rwx "$POSTGRESQL_DATA_DIR" || warn "Lack of permissions on data directory!" + + is_boolean_yes "$POSTGRESQL_ALLOW_REMOTE_CONNECTIONS" && is_boolean_yes "$create_pghba_file" && postgresql_create_pghba && postgresql_allow_local_connection + # Configure port + postgresql_set_property "port" "$POSTGRESQL_PORT_NUMBER" + is_empty_value "$POSTGRESQL_DEFAULT_TOAST_COMPRESSION" || postgresql_set_property "default_toast_compression" "$POSTGRESQL_DEFAULT_TOAST_COMPRESSION" + is_empty_value "$POSTGRESQL_PASSWORD_ENCRYPTION" || postgresql_set_property "password_encryption" "$POSTGRESQL_PASSWORD_ENCRYPTION" + if ! is_dir_empty "$POSTGRESQL_DATA_DIR"; then + info "Deploying PostgreSQL with persisted data..." + export POSTGRESQL_FIRST_BOOT="no" + is_boolean_yes "$create_pghba_file" && postgresql_restrict_pghba + is_boolean_yes "$create_conf_file" && postgresql_configure_replication_parameters + is_boolean_yes "$create_conf_file" && postgresql_configure_fsync + is_boolean_yes "$create_conf_file" && is_boolean_yes "$POSTGRESQL_ENABLE_TLS" && postgresql_configure_tls + [[ "$POSTGRESQL_REPLICATION_MODE" = "master" ]] && [[ -n "$POSTGRESQL_REPLICATION_USER" ]] && is_boolean_yes "$create_pghba_file" && postgresql_add_replication_to_pghba + [[ "$POSTGRESQL_REPLICATION_MODE" = "master" ]] && is_boolean_yes "$create_pghba_file" && postgresql_configure_synchronous_replication + [[ "$POSTGRESQL_REPLICATION_MODE" = "slave" ]] && postgresql_configure_recovery + else + if [[ "$POSTGRESQL_REPLICATION_MODE" = "master" ]]; then + postgresql_master_init_db + postgresql_start_bg "false" + [[ -n "${POSTGRESQL_DATABASE}" ]] && [[ "$POSTGRESQL_DATABASE" != "postgres" ]] && postgresql_create_custom_database + if [[ "$POSTGRESQL_USERNAME" = "postgres" ]]; then + postgresql_alter_postgres_user "$POSTGRESQL_PASSWORD" + else + if [[ -n "$POSTGRESQL_POSTGRES_PASSWORD" ]]; then + postgresql_alter_postgres_user "$POSTGRESQL_POSTGRES_PASSWORD" + fi + postgresql_create_admin_user + fi + is_boolean_yes "$create_pghba_file" && postgresql_restrict_pghba + [[ -n "$POSTGRESQL_REPLICATION_USER" ]] && postgresql_create_replication_user + is_boolean_yes "$create_conf_file" && postgresql_configure_replication_parameters + is_boolean_yes "$create_pghba_file" && postgresql_configure_synchronous_replication + is_boolean_yes "$create_conf_file" && postgresql_configure_fsync + is_boolean_yes "$create_conf_file" && is_boolean_yes "$POSTGRESQL_ENABLE_TLS" && postgresql_configure_tls + [[ -n "$POSTGRESQL_REPLICATION_USER" ]] && is_boolean_yes "$create_pghba_file" && postgresql_add_replication_to_pghba + else + postgresql_slave_init_db + is_boolean_yes "$create_pghba_file" && postgresql_restrict_pghba + is_boolean_yes "$create_conf_file" && postgresql_configure_replication_parameters + is_boolean_yes "$create_conf_file" && postgresql_configure_fsync + is_boolean_yes "$create_conf_file" && is_boolean_yes "$POSTGRESQL_ENABLE_TLS" && postgresql_configure_tls + postgresql_configure_recovery + fi + fi + # TLS Modifications on pghba need to be performed after properly configuring postgresql.conf file + is_boolean_yes "$create_pghba_file" && is_boolean_yes "$POSTGRESQL_ENABLE_TLS" && [[ -n $POSTGRESQL_TLS_CA_FILE ]] && postgresql_tls_auth_configuration + + is_boolean_yes "$create_conf_file" && [[ -n "$POSTGRESQL_SHARED_PRELOAD_LIBRARIES" ]] && postgresql_set_property "shared_preload_libraries" "$POSTGRESQL_SHARED_PRELOAD_LIBRARIES" + is_boolean_yes "$create_conf_file" && postgresql_configure_logging + is_boolean_yes "$create_conf_file" && postgresql_configure_connections + is_boolean_yes "$create_conf_file" && postgresql_configure_timezone + + # Delete conf files generated on first run + rm -f "$POSTGRESQL_DATA_DIR"/postgresql.conf "$POSTGRESQL_DATA_DIR"/pg_hba.conf + + # Stop postgresql + postgresql_stop +} + +######################## +# Run custom pre-initialization scripts +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_custom_pre_init_scripts() { + info "Loading custom pre-init scripts..." + if [[ -d "$POSTGRESQL_PREINITSCRIPTS_DIR" ]] && [[ -n $(find "$POSTGRESQL_PREINITSCRIPTS_DIR/" -type f -name "*.sh") ]]; then + info "Loading user's custom files from $POSTGRESQL_PREINITSCRIPTS_DIR ..." + find "$POSTGRESQL_PREINITSCRIPTS_DIR/" -type f -name "*.sh" | sort | while read -r f; do + if [[ -x "$f" ]]; then + debug "Executing $f" + "$f" + else + debug "Sourcing $f" + . "$f" + fi + done + fi +} + +######################## +# Run custom initialization scripts +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_custom_init_scripts() { + info "Loading custom scripts..." + if [[ -d "$POSTGRESQL_INITSCRIPTS_DIR" ]] && [[ -n $(find "$POSTGRESQL_INITSCRIPTS_DIR/" -type f -regex ".*\.\(sh\|sql\|sql.gz\)") ]] && [[ ! -f "$POSTGRESQL_VOLUME_DIR/.user_scripts_initialized" ]]; then + info "Loading user's custom files from $POSTGRESQL_INITSCRIPTS_DIR ..." + postgresql_start_bg "false" + find "$POSTGRESQL_INITSCRIPTS_DIR/" -type f -regex ".*\.\(sh\|sql\|sql.gz\)" | sort | while read -r f; do + case "$f" in + *.sh) + if [[ -x "$f" ]]; then + debug "Executing $f" + "$f" + else + debug "Sourcing $f" + . "$f" + fi + ;; + *.sql) + debug "Executing $f" + postgresql_execute "$POSTGRESQL_DATABASE" "$POSTGRESQL_INITSCRIPTS_USERNAME" "$POSTGRESQL_INITSCRIPTS_PASSWORD" <"$f" + ;; + *.sql.gz) + debug "Executing $f" + gunzip -c "$f" | postgresql_execute "$POSTGRESQL_DATABASE" "$POSTGRESQL_INITSCRIPTS_USERNAME" "$POSTGRESQL_INITSCRIPTS_PASSWORD" + ;; + *) debug "Ignoring $f" ;; + esac + done + touch "$POSTGRESQL_VOLUME_DIR"/.user_scripts_initialized + fi +} + +######################## +# Stop PostgreSQL +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# None +######################### +postgresql_stop() { + local -r -a cmd=("pg_ctl" "stop" "-w" "-D" "$POSTGRESQL_DATA_DIR" "-m" "$POSTGRESQL_SHUTDOWN_MODE" "-t" "$POSTGRESQL_PGCTLTIMEOUT") + if [[ -f "$POSTGRESQL_PID_FILE" ]]; then + info "Stopping PostgreSQL..." + if am_i_root; then + run_as_user "$POSTGRESQL_DAEMON_USER" "${cmd[@]}" + else + "${cmd[@]}" + fi + fi +} + +######################## +# Start PostgreSQL and wait until it is ready +# Globals: +# POSTGRESQL_* +# Arguments: +# $1 - Enable logs for PostgreSQL. Default: false +# Returns: +# None +######################### +postgresql_start_bg() { + local -r pg_logs=${1:-false} + local -r pg_ctl_flags=("-w" "-D" "$POSTGRESQL_DATA_DIR" "-l" "$POSTGRESQL_LOG_FILE" "-o" "--config-file=$POSTGRESQL_CONF_FILE --external_pid_file=$POSTGRESQL_PID_FILE --hba_file=$POSTGRESQL_PGHBA_FILE") + info "Starting PostgreSQL in background..." + if is_postgresql_running; then + return 0 + fi + local pg_ctl_cmd=() + if am_i_root; then + pg_ctl_cmd+=("run_as_user" "$POSTGRESQL_DAEMON_USER") + fi + pg_ctl_cmd+=("$POSTGRESQL_BIN_DIR"/pg_ctl) + if [[ "${BITNAMI_DEBUG:-false}" = true ]] || [[ $pg_logs = true ]]; then + "${pg_ctl_cmd[@]}" "start" "${pg_ctl_flags[@]}" + else + "${pg_ctl_cmd[@]}" "start" "${pg_ctl_flags[@]}" >/dev/null 2>&1 + fi + local pg_isready_args=("-U" "postgres" "-p" "$POSTGRESQL_PORT_NUMBER") + local counter=$POSTGRESQL_INIT_MAX_TIMEOUT + while ! "$POSTGRESQL_BIN_DIR"/pg_isready "${pg_isready_args[@]}" >/dev/null 2>&1; do + sleep 1 + counter=$((counter - 1)) + if ((counter <= 0)); then + error "PostgreSQL is not ready after $POSTGRESQL_INIT_MAX_TIMEOUT seconds" + exit 1 + fi + done +} + +######################## +# Check if PostgreSQL is running +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# Boolean +######################### +is_postgresql_running() { + local pid + pid="$(get_pid_from_file "$POSTGRESQL_PID_FILE")" + + if [[ -z "$pid" ]]; then + false + else + is_service_running "$pid" + fi +} + +######################## +# Check if PostgreSQL is not running +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# Boolean +######################### +is_postgresql_not_running() { + ! is_postgresql_running +} + +######################## +# Initialize master node database by running initdb +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# Boolean +######################### +postgresql_master_init_db() { + local envExtraFlags=() + local initdb_args=() + if [[ -n "${POSTGRESQL_INITDB_ARGS}" ]]; then + read -r -a envExtraFlags <<<"$POSTGRESQL_INITDB_ARGS" + initdb_args+=("${envExtraFlags[@]}") + fi + if [[ -n "$POSTGRESQL_INITDB_WAL_DIR" ]]; then + ensure_dir_exists "$POSTGRESQL_INITDB_WAL_DIR" + am_i_root && chown "$POSTGRESQL_DAEMON_USER:$POSTGRESQL_DAEMON_GROUP" "$POSTGRESQL_INITDB_WAL_DIR" + initdb_args+=("--waldir" "$POSTGRESQL_INITDB_WAL_DIR") + fi + local initdb_cmd=() + if am_i_root; then + initdb_cmd+=("run_as_user" "$POSTGRESQL_DAEMON_USER") + fi + initdb_cmd+=("$POSTGRESQL_BIN_DIR/initdb") + if [[ -n "${initdb_args[*]:-}" ]]; then + info "Initializing PostgreSQL with ${initdb_args[*]} extra initdb arguments" + if [[ "${BITNAMI_DEBUG:-false}" = true ]]; then + "${initdb_cmd[@]}" -E UTF8 -D "$POSTGRESQL_DATA_DIR" -U "postgres" "${initdb_args[@]}" + else + "${initdb_cmd[@]}" -E UTF8 -D "$POSTGRESQL_DATA_DIR" -U "postgres" "${initdb_args[@]}" >/dev/null 2>&1 + fi + elif [[ "${BITNAMI_DEBUG:-false}" = true ]]; then + "${initdb_cmd[@]}" -E UTF8 -D "$POSTGRESQL_DATA_DIR" -U "postgres" + else + "${initdb_cmd[@]}" -E UTF8 -D "$POSTGRESQL_DATA_DIR" -U "postgres" >/dev/null 2>&1 + fi +} + +######################## +# Initialize slave node by running pg_basebackup +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# Boolean +######################### +postgresql_slave_init_db() { + info "Waiting for replication master to accept connections (${POSTGRESQL_INIT_MAX_TIMEOUT} timeout)..." + local -r check_args=("-U" "$POSTGRESQL_REPLICATION_USER" "-h" "$POSTGRESQL_MASTER_HOST" "-p" "$POSTGRESQL_MASTER_PORT_NUMBER" "-d" "postgres") + local check_cmd=() + if am_i_root; then + check_cmd=("run_as_user" "$POSTGRESQL_DAEMON_USER") + fi + check_cmd+=("$POSTGRESQL_BIN_DIR"/pg_isready) + local ready_counter=$POSTGRESQL_INIT_MAX_TIMEOUT + + while ! PGPASSWORD=$POSTGRESQL_REPLICATION_PASSWORD "${check_cmd[@]}" "${check_args[@]}"; do + sleep 1 + ready_counter=$((ready_counter - 1)) + if ((ready_counter <= 0)); then + error "PostgreSQL master is not ready after $POSTGRESQL_INIT_MAX_TIMEOUT seconds" + exit 1 + fi + + done + info "Replicating the initial database" + local -r backup_args=("-D" "$POSTGRESQL_DATA_DIR" "-U" "$POSTGRESQL_REPLICATION_USER" "-h" "$POSTGRESQL_MASTER_HOST" "-p" "$POSTGRESQL_MASTER_PORT_NUMBER" "-X" "stream" "-w" "-v" "-P") + local backup_cmd=() + if am_i_root; then + backup_cmd+=("run_as_user" "$POSTGRESQL_DAEMON_USER") + fi + backup_cmd+=("$POSTGRESQL_BIN_DIR"/pg_basebackup) + local replication_counter=$POSTGRESQL_INIT_MAX_TIMEOUT + while ! PGPASSWORD=$POSTGRESQL_REPLICATION_PASSWORD "${backup_cmd[@]}" "${backup_args[@]}"; do + debug "Backup command failed. Sleeping and trying again" + sleep 1 + replication_counter=$((replication_counter - 1)) + if ((replication_counter <= 0)); then + error "Slave replication failed after trying for $POSTGRESQL_INIT_MAX_TIMEOUT seconds" + exit 1 + fi + done +} + +######################## +# Create recovery.conf in slave node +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# Boolean +######################### +postgresql_configure_recovery() { + info "Setting up streaming replication slave..." + + local -r escaped_password="${POSTGRESQL_REPLICATION_PASSWORD//\&/\\&}" + local -r psql_major_version="$(postgresql_get_major_version)" + if ((psql_major_version >= 12)); then + postgresql_set_property "primary_conninfo" "host=${POSTGRESQL_MASTER_HOST} port=${POSTGRESQL_MASTER_PORT_NUMBER} user=${POSTGRESQL_REPLICATION_USER} password=${escaped_password} application_name=${POSTGRESQL_CLUSTER_APP_NAME}" "$POSTGRESQL_CONF_FILE" + ((psql_major_version < 16)) && postgresql_set_property "promote_trigger_file" "/tmp/postgresql.trigger.${POSTGRESQL_MASTER_PORT_NUMBER}" "$POSTGRESQL_CONF_FILE" + touch "$POSTGRESQL_DATA_DIR"/standby.signal + else + cp -f "$POSTGRESQL_BASE_DIR/share/recovery.conf.sample" "$POSTGRESQL_RECOVERY_FILE" + chmod 600 "$POSTGRESQL_RECOVERY_FILE" + am_i_root && chown "$POSTGRESQL_DAEMON_USER:$POSTGRESQL_DAEMON_GROUP" "$POSTGRESQL_RECOVERY_FILE" + postgresql_set_property "standby_mode" "on" "$POSTGRESQL_RECOVERY_FILE" + postgresql_set_property "primary_conninfo" "host=${POSTGRESQL_MASTER_HOST} port=${POSTGRESQL_MASTER_PORT_NUMBER} user=${POSTGRESQL_REPLICATION_USER} password=${escaped_password} application_name=${POSTGRESQL_CLUSTER_APP_NAME}" "$POSTGRESQL_RECOVERY_FILE" + postgresql_set_property "trigger_file" "/tmp/postgresql.trigger.${POSTGRESQL_MASTER_PORT_NUMBER}" "$POSTGRESQL_RECOVERY_FILE" + fi +} + +######################## +# Configure logging parameters +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# Boolean +######################### +postgresql_configure_logging() { + [[ -n "$POSTGRESQL_PGAUDIT_LOG" ]] && postgresql_set_property "pgaudit.log" "$POSTGRESQL_PGAUDIT_LOG" + [[ -n "$POSTGRESQL_PGAUDIT_LOG_CATALOG" ]] && postgresql_set_property "pgaudit.log_catalog" "$POSTGRESQL_PGAUDIT_LOG_CATALOG" + [[ -n "$POSTGRESQL_PGAUDIT_LOG_PARAMETER" ]] && postgresql_set_property "pgaudit.log_parameter" "$POSTGRESQL_PGAUDIT_LOG_PARAMETER" + [[ -n "$POSTGRESQL_LOG_CONNECTIONS" ]] && postgresql_set_property "log_connections" "$POSTGRESQL_LOG_CONNECTIONS" + [[ -n "$POSTGRESQL_LOG_DISCONNECTIONS" ]] && postgresql_set_property "log_disconnections" "$POSTGRESQL_LOG_DISCONNECTIONS" + [[ -n "$POSTGRESQL_LOG_HOSTNAME" ]] && postgresql_set_property "log_hostname" "$POSTGRESQL_LOG_HOSTNAME" + [[ -n "$POSTGRESQL_CLIENT_MIN_MESSAGES" ]] && postgresql_set_property "client_min_messages" "$POSTGRESQL_CLIENT_MIN_MESSAGES" + [[ -n "$POSTGRESQL_LOG_LINE_PREFIX" ]] && postgresql_set_property "log_line_prefix" "$POSTGRESQL_LOG_LINE_PREFIX" + ([[ -n "$POSTGRESQL_LOG_TIMEZONE" ]] && postgresql_set_property "log_timezone" "$POSTGRESQL_LOG_TIMEZONE") || true +} + +######################## +# Configure connection parameters +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# Boolean +######################### +postgresql_configure_connections() { + [[ -n "$POSTGRESQL_MAX_CONNECTIONS" ]] && postgresql_set_property "max_connections" "$POSTGRESQL_MAX_CONNECTIONS" + [[ -n "$POSTGRESQL_TCP_KEEPALIVES_IDLE" ]] && postgresql_set_property "tcp_keepalives_idle" "$POSTGRESQL_TCP_KEEPALIVES_IDLE" + [[ -n "$POSTGRESQL_TCP_KEEPALIVES_INTERVAL" ]] && postgresql_set_property "tcp_keepalives_interval" "$POSTGRESQL_TCP_KEEPALIVES_INTERVAL" + [[ -n "$POSTGRESQL_TCP_KEEPALIVES_COUNT" ]] && postgresql_set_property "tcp_keepalives_count" "$POSTGRESQL_TCP_KEEPALIVES_COUNT" + ([[ -n "$POSTGRESQL_STATEMENT_TIMEOUT" ]] && postgresql_set_property "statement_timeout" "$POSTGRESQL_STATEMENT_TIMEOUT") || true +} + +######################## +# Configure timezone +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# Boolean +######################### +postgresql_configure_timezone() { + ([[ -n "$POSTGRESQL_TIMEZONE" ]] && postgresql_set_property "timezone" "$POSTGRESQL_TIMEZONE") || true +} + +######################## +# Remove pg_hba.conf lines based on filter +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# Boolean +######################### +postgresql_remove_pghba_lines() { + for filter in ${POSTGRESQL_PGHBA_REMOVE_FILTERS//,/ }; do + result="$(sed -E "/${filter}/d" "$POSTGRESQL_PGHBA_FILE")" + echo "$result" >"$POSTGRESQL_PGHBA_FILE" + done +} + +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 + +# shellcheck disable=SC2148 + +######################## +# Return PostgreSQL major version +# Globals: +# POSTGRESQL_* +# Arguments: +# None +# Returns: +# String +######################### +postgresql_get_major_version() { + psql --version | grep -oE "[0-9]+\.[0-9]+" | grep -oE "^[0-9]+" +} + +######################## +# Gets an environment variable name based on the suffix +# Arguments: +# $1 - environment variable suffix +# Returns: +# environment variable name +######################### +get_env_var_value() { + local env_var_suffix="${1:?missing suffix}" + local env_var_name + for env_var_prefix in POSTGRESQL POSTGRESQL_CLIENT; do + env_var_name="${env_var_prefix}_${env_var_suffix}" + if [[ -n "${!env_var_name:-}" ]]; then + echo "${!env_var_name}" + break + fi + done +} + +######################## +# Execute an arbitrary query/queries against the running PostgreSQL service and print the output +# Stdin: +# Query/queries to execute +# Globals: +# BITNAMI_DEBUG +# POSTGRESQL_* +# Arguments: +# $1 - Database where to run the queries +# $2 - User to run queries +# $3 - Password +# $4 - Extra options (eg. -tA) +# Returns: +# None +######################### +postgresql_execute_print_output() { + local -r db="${1:-}" + local -r user="${2:-postgres}" + local -r pass="${3:-}" + local opts + read -r -a opts <<<"${@:4}" + + local args=("-U" "$user" "-p" "${POSTGRESQL_PORT_NUMBER:-5432}") + [[ -n "$db" ]] && args+=("-d" "$db") + [[ "${#opts[@]}" -gt 0 ]] && args+=("${opts[@]}") + + # Execute the Query/queries from stdin + PGPASSWORD=$pass psql "${args[@]}" +} + +######################## +# Execute an arbitrary query/queries against the running PostgreSQL service +# Stdin: +# Query/queries to execute +# Globals: +# BITNAMI_DEBUG +# POSTGRESQL_* +# Arguments: +# $1 - Database where to run the queries +# $2 - User to run queries +# $3 - Password +# $4 - Extra options (eg. -tA) +# Returns: +# None +######################### +postgresql_execute() { + if [[ "${BITNAMI_DEBUG:-false}" = true ]]; then + "postgresql_execute_print_output" "$@" + elif [[ "${NO_ERRORS:-false}" = true ]]; then + "postgresql_execute_print_output" "$@" 2>/dev/null + else + "postgresql_execute_print_output" "$@" >/dev/null 2>&1 + fi +} + +######################## +# Execute an arbitrary query/queries against a remote PostgreSQL service and print to stdout +# Stdin: +# Query/queries to execute +# Globals: +# BITNAMI_DEBUG +# DB_* +# Arguments: +# $1 - Remote PostgreSQL service hostname +# $2 - Remote PostgreSQL service port +# $3 - Database where to run the queries +# $4 - User to run queries +# $5 - Password +# $6 - Extra options (eg. -tA) +# Returns: +# None +postgresql_remote_execute_print_output() { + local -r hostname="${1:?hostname is required}" + local -r port="${2:?port is required}" + local -a args=("-h" "$hostname" "-p" "$port") + shift 2 + "postgresql_execute_print_output" "$@" "${args[@]}" +} + +######################## +# Execute an arbitrary query/queries against a remote PostgreSQL service +# Stdin: +# Query/queries to execute +# Globals: +# BITNAMI_DEBUG +# DB_* +# Arguments: +# $1 - Remote PostgreSQL service hostname +# $2 - Remote PostgreSQL service port +# $3 - Database where to run the queries +# $4 - User to run queries +# $5 - Password +# $6 - Extra options (eg. -tA) +# Returns: +# None +postgresql_remote_execute() { + if [[ "${BITNAMI_DEBUG:-false}" = true ]]; then + "postgresql_remote_execute_print_output" "$@" + elif [[ "${NO_ERRORS:-false}" = true ]]; then + "postgresql_remote_execute_print_output" "$@" 2>/dev/null + else + "postgresql_remote_execute_print_output" "$@" >/dev/null 2>&1 + fi +} + +######################## +# Optionally create the given database user +# Flags: +# -p|--password - database password +# --host - database host +# --port - database port +# Arguments: +# $1 - user +# Returns: +# None +######################### +postgresql_ensure_user_exists() { + local -r user="${1:?user is missing}" + local password="" + # For accessing an external database + local db_host="" + local db_port="" + + # Validate arguments + shift 1 + while [ "$#" -gt 0 ]; do + case "$1" in + -p | --password) + shift + password="${1:?missing password}" + ;; + --host) + shift + db_host="${1:?missing database host}" + ;; + --port) + shift + db_port="${1:?missing database port}" + ;; + *) + echo "Invalid command line flag $1" >&2 + return 1 + ;; + esac + shift + done + + local -a postgresql_execute_cmd=("postgresql_execute") + [[ -n "$db_host" && -n "$db_port" ]] && postgresql_execute_cmd=("postgresql_remote_execute" "$db_host" "$db_port") + local -a postgresql_execute_flags=("postgres" "$(get_env_var_value POSTGRES_USER)" "$(get_env_var_value POSTGRES_PASSWORD)") + + "${postgresql_execute_cmd[@]}" "${postgresql_execute_flags[@]}" <&2 + return 1 + ;; + esac + shift + done + + local -a postgresql_execute_cmd=("postgresql_execute") + [[ -n "$db_host" && -n "$db_port" ]] && postgresql_execute_cmd=("postgresql_remote_execute" "$db_host" "$db_port") + local -a postgresql_execute_flags=("postgres" "$(get_env_var_value POSTGRES_USER)" "$(get_env_var_value POSTGRES_PASSWORD)") + + "${postgresql_execute_cmd[@]}" "${postgresql_execute_flags[@]}" <> "$LOCALES_FILE" + else + echo "Locale ${locale} is already enabled" + fi +} + +if [[ "$WITH_ALL_LOCALES" =~ ^(yes|true|1)$ ]]; then + echo "Enabling all locales" + cp "$SUPPORTED_LOCALES_FILE" "$LOCALES_FILE" +else + # shellcheck disable=SC2001 + LOCALES_TO_ADD="$(sed 's/[,;]\s*/\n/g' <<< "$EXTRA_LOCALES")" + while [[ -n "$LOCALES_TO_ADD" ]] && read -r locale; do + echo "Enabling locale ${locale}" + enable_locale "$locale" + done <<< "$LOCALES_TO_ADD" +fi + +locale-gen diff --git a/deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql-env.sh b/deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql-env.sh new file mode 100644 index 000000000..e4fa8f525 --- /dev/null +++ b/deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql-env.sh @@ -0,0 +1,351 @@ +#!/bin/bash +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 +# +# Environment configuration for postgresql + +# The values for all environment variables will be set in the below order of precedence +# 1. Custom environment variables defined below after Bitnami defaults +# 2. Constants defined in this file (environment variables with no default), i.e. BITNAMI_ROOT_DIR +# 3. Environment variables overridden via external files using *_FILE variables (see below) +# 4. Environment variables set externally (i.e. current Bash context/Dockerfile/userdata) + +# Load logging library +# shellcheck disable=SC1090,SC1091 +. /opt/bitnami/scripts/liblog.sh + +export BITNAMI_ROOT_DIR="/opt/bitnami" +export BITNAMI_VOLUME_DIR="/bitnami" + +# Logging configuration +export MODULE="${MODULE:-postgresql}" +export BITNAMI_DEBUG="${BITNAMI_DEBUG:-false}" + +# By setting an environment variable matching *_FILE to a file path, the prefixed environment +# variable will be overridden with the value specified in that file +postgresql_env_vars=( + POSTGRESQL_VOLUME_DIR + POSTGRESQL_DATA_DIR + POSTGRESQL_EXTRA_FLAGS + POSTGRESQL_INIT_MAX_TIMEOUT + POSTGRESQL_PGCTLTIMEOUT + POSTGRESQL_SHUTDOWN_MODE + POSTGRESQL_CLUSTER_APP_NAME + POSTGRESQL_DATABASE + POSTGRESQL_INITDB_ARGS + ALLOW_EMPTY_PASSWORD + POSTGRESQL_INITDB_WAL_DIR + POSTGRESQL_MASTER_HOST + POSTGRESQL_MASTER_PORT_NUMBER + POSTGRESQL_NUM_SYNCHRONOUS_REPLICAS + POSTGRESQL_SYNCHRONOUS_REPLICAS_MODE + POSTGRESQL_PORT_NUMBER + POSTGRESQL_ALLOW_REMOTE_CONNECTIONS + POSTGRESQL_REPLICATION_MODE + POSTGRESQL_REPLICATION_USER + POSTGRESQL_SYNCHRONOUS_COMMIT_MODE + POSTGRESQL_FSYNC + POSTGRESQL_USERNAME + POSTGRESQL_ENABLE_LDAP + POSTGRESQL_LDAP_URL + POSTGRESQL_LDAP_PREFIX + POSTGRESQL_LDAP_SUFFIX + POSTGRESQL_LDAP_SERVER + POSTGRESQL_LDAP_PORT + POSTGRESQL_LDAP_SCHEME + POSTGRESQL_LDAP_TLS + POSTGRESQL_LDAP_BASE_DN + POSTGRESQL_LDAP_BIND_DN + POSTGRESQL_LDAP_BIND_PASSWORD + POSTGRESQL_LDAP_SEARCH_ATTR + POSTGRESQL_LDAP_SEARCH_FILTER + POSTGRESQL_INITSCRIPTS_USERNAME + POSTGRESQL_PASSWORD + POSTGRESQL_POSTGRES_PASSWORD + POSTGRESQL_REPLICATION_PASSWORD + POSTGRESQL_INITSCRIPTS_PASSWORD + POSTGRESQL_ENABLE_TLS + POSTGRESQL_TLS_CERT_FILE + POSTGRESQL_TLS_KEY_FILE + POSTGRESQL_TLS_CA_FILE + POSTGRESQL_TLS_CRL_FILE + POSTGRESQL_TLS_PREFER_SERVER_CIPHERS + POSTGRESQL_SHARED_PRELOAD_LIBRARIES + POSTGRESQL_PGAUDIT_LOG + POSTGRESQL_PGAUDIT_LOG_CATALOG + POSTGRESQL_PGAUDIT_LOG_PARAMETER + POSTGRESQL_LOG_CONNECTIONS + POSTGRESQL_LOG_DISCONNECTIONS + POSTGRESQL_LOG_HOSTNAME + POSTGRESQL_CLIENT_MIN_MESSAGES + POSTGRESQL_LOG_LINE_PREFIX + POSTGRESQL_LOG_TIMEZONE + POSTGRESQL_TIMEZONE + POSTGRESQL_MAX_CONNECTIONS + POSTGRESQL_TCP_KEEPALIVES_IDLE + POSTGRESQL_TCP_KEEPALIVES_INTERVAL + POSTGRESQL_TCP_KEEPALIVES_COUNT + POSTGRESQL_STATEMENT_TIMEOUT + POSTGRESQL_PGHBA_REMOVE_FILTERS + POSTGRESQL_USERNAME_CONNECTION_LIMIT + POSTGRESQL_POSTGRES_CONNECTION_LIMIT + POSTGRESQL_WAL_LEVEL + POSTGRESQL_DEFAULT_TOAST_COMPRESSION + POSTGRESQL_PASSWORD_ENCRYPTION + POSTGRESQL_DEFAULT_TRANSACTION_ISOLATION + POSTGRESQL_AUTOCTL_CONF_DIR + POSTGRESQL_AUTOCTL_MODE + POSTGRESQL_AUTOCTL_MONITOR_HOST + POSTGRESQL_AUTOCTL_HOSTNAME + POSTGRES_DATA_DIR + PGDATA + POSTGRES_SHUTDOWN_MODE + POSTGRES_CLUSTER_APP_NAME + POSTGRES_DATABASE + POSTGRES_DB + POSTGRES_INITDB_ARGS + POSTGRES_INITDB_WAL_DIR + POSTGRES_MASTER_HOST + POSTGRES_MASTER_PORT_NUMBER + POSTGRES_NUM_SYNCHRONOUS_REPLICAS + POSTGRES_SYNCHRONOUS_REPLICAS_MODE + POSTGRES_PORT_NUMBER + POSTGRES_ALLOW_REMOTE_CONNECTIONS + POSTGRES_REPLICATION_MODE + POSTGRES_REPLICATION_USER + POSTGRES_SYNCHRONOUS_COMMIT_MODE + POSTGRES_FSYNC + POSTGRES_USERNAME + POSTGRES_USER + POSTGRESQL_USER + POSTGRES_ENABLE_LDAP + POSTGRES_LDAP_URL + POSTGRES_LDAP_PREFIX + POSTGRES_LDAP_SUFFIX + POSTGRES_LDAP_SERVER + POSTGRES_LDAP_PORT + POSTGRES_LDAP_SCHEME + POSTGRES_LDAP_TLS + POSTGRES_LDAP_BASE_DN + POSTGRES_LDAP_BIND_DN + POSTGRES_LDAP_BIND_PASSWORD + POSTGRES_LDAP_SEARCH_ATTR + POSTGRES_LDAP_SEARCH_FILTER + POSTGRES_INITSCRIPTS_USERNAME + POSTGRES_PASSWORD + POSTGRES_POSTGRES_PASSWORD + POSTGRES_REPLICATION_PASSWORD + POSTGRES_INITSCRIPTS_PASSWORD + POSTGRES_ENABLE_TLS + POSTGRES_TLS_CERT_FILE + POSTGRES_TLS_KEY_FILE + POSTGRES_TLS_CA_FILE + POSTGRES_TLS_CRL_FILE + POSTGRES_TLS_PREFER_SERVER_CIPHERS + POSTGRES_SHARED_PRELOAD_LIBRARIES + POSTGRES_PGAUDIT_LOG + POSTGRES_PGAUDIT_LOG_CATALOG + POSTGRES_PGAUDIT_LOG_PARAMETER + POSTGRES_LOG_CONNECTIONS + POSTGRES_LOG_DISCONNECTIONS + POSTGRES_LOG_HOSTNAME + POSTGRES_CLIENT_MIN_MESSAGES + POSTGRES_LOG_LINE_PREFIX + POSTGRES_LOG_TIMEZONE + POSTGRES_TIMEZONE + POSTGRES_MAX_CONNECTIONS + POSTGRES_TCP_KEEPALIVES_IDLE + POSTGRES_TCP_KEEPALIVES_INTERVAL + POSTGRES_TCP_KEEPALIVES_COUNT + POSTGRES_STATEMENT_TIMEOUT + POSTGRES_PGHBA_REMOVE_FILTERS + POSTGRES_USER_CONNECTION_LIMIT + POSTGRES_POSTGRES_CONNECTION_LIMIT +) +for env_var in "${postgresql_env_vars[@]}"; do + file_env_var="${env_var}_FILE" + if [[ -n "${!file_env_var:-}" ]]; then + if [[ -r "${!file_env_var:-}" ]]; then + export "${env_var}=$(< "${!file_env_var}")" + unset "${file_env_var}" + else + warn "Skipping export of '${env_var}'. '${!file_env_var:-}' is not readable." + fi + fi +done +unset postgresql_env_vars + +# Paths +export POSTGRESQL_VOLUME_DIR="${POSTGRESQL_VOLUME_DIR:-/bitnami/postgresql}" +export POSTGRESQL_BASE_DIR="/opt/bitnami/postgresql" +POSTGRESQL_DATA_DIR="${POSTGRESQL_DATA_DIR:-"${POSTGRES_DATA_DIR:-}"}" +POSTGRESQL_DATA_DIR="${POSTGRESQL_DATA_DIR:-"${PGDATA:-}"}" +export POSTGRESQL_DATA_DIR="${POSTGRESQL_DATA_DIR:-${POSTGRESQL_VOLUME_DIR}/data}" +export POSTGRESQL_CONF_DIR="$POSTGRESQL_BASE_DIR/conf" +export POSTGRESQL_MOUNTED_CONF_DIR="$POSTGRESQL_VOLUME_DIR/conf" +export POSTGRESQL_CONF_FILE="$POSTGRESQL_CONF_DIR/postgresql.conf" +export POSTGRESQL_PGHBA_FILE="$POSTGRESQL_CONF_DIR/pg_hba.conf" +export POSTGRESQL_RECOVERY_FILE="$POSTGRESQL_DATA_DIR/recovery.conf" +export POSTGRESQL_LOG_DIR="$POSTGRESQL_BASE_DIR/logs" +export POSTGRESQL_LOG_FILE="$POSTGRESQL_LOG_DIR/postgresql.log" +export POSTGRESQL_TMP_DIR="$POSTGRESQL_BASE_DIR/tmp" +export POSTGRESQL_PID_FILE="$POSTGRESQL_TMP_DIR/postgresql.pid" +export POSTGRESQL_BIN_DIR="$POSTGRESQL_BASE_DIR/bin" +export POSTGRESQL_INITSCRIPTS_DIR="/docker-entrypoint-initdb.d" +export POSTGRESQL_PREINITSCRIPTS_DIR="/docker-entrypoint-preinitdb.d" +export PATH="${POSTGRESQL_BIN_DIR}:${BITNAMI_ROOT_DIR}/common/bin:${PATH}" + +# System users (when running with a privileged user) +export POSTGRESQL_DAEMON_USER="postgres" +export POSTGRESQL_DAEMON_GROUP="postgres" + +# PostgreSQL settings +export POSTGRESQL_EXTRA_FLAGS="${POSTGRESQL_EXTRA_FLAGS:-}" +export POSTGRESQL_INIT_MAX_TIMEOUT="${POSTGRESQL_INIT_MAX_TIMEOUT:-60}" +export POSTGRESQL_PGCTLTIMEOUT="${POSTGRESQL_PGCTLTIMEOUT:-60}" +POSTGRESQL_SHUTDOWN_MODE="${POSTGRESQL_SHUTDOWN_MODE:-"${POSTGRES_SHUTDOWN_MODE:-}"}" +export POSTGRESQL_SHUTDOWN_MODE="${POSTGRESQL_SHUTDOWN_MODE:-fast}" +POSTGRESQL_CLUSTER_APP_NAME="${POSTGRESQL_CLUSTER_APP_NAME:-"${POSTGRES_CLUSTER_APP_NAME:-}"}" +export POSTGRESQL_CLUSTER_APP_NAME="${POSTGRESQL_CLUSTER_APP_NAME:-walreceiver}" +POSTGRESQL_DATABASE="${POSTGRESQL_DATABASE:-"${POSTGRES_DATABASE:-}"}" +POSTGRESQL_DATABASE="${POSTGRESQL_DATABASE:-"${POSTGRES_DB:-}"}" +export POSTGRESQL_DATABASE="${POSTGRESQL_DATABASE:-postgres}" +POSTGRESQL_INITDB_ARGS="${POSTGRESQL_INITDB_ARGS:-"${POSTGRES_INITDB_ARGS:-}"}" +export POSTGRESQL_INITDB_ARGS="${POSTGRESQL_INITDB_ARGS:-}" +export ALLOW_EMPTY_PASSWORD="${ALLOW_EMPTY_PASSWORD:-no}" +POSTGRESQL_INITDB_WAL_DIR="${POSTGRESQL_INITDB_WAL_DIR:-"${POSTGRES_INITDB_WAL_DIR:-}"}" +export POSTGRESQL_INITDB_WAL_DIR="${POSTGRESQL_INITDB_WAL_DIR:-}" +POSTGRESQL_MASTER_HOST="${POSTGRESQL_MASTER_HOST:-"${POSTGRES_MASTER_HOST:-}"}" +export POSTGRESQL_MASTER_HOST="${POSTGRESQL_MASTER_HOST:-}" +POSTGRESQL_MASTER_PORT_NUMBER="${POSTGRESQL_MASTER_PORT_NUMBER:-"${POSTGRES_MASTER_PORT_NUMBER:-}"}" +export POSTGRESQL_MASTER_PORT_NUMBER="${POSTGRESQL_MASTER_PORT_NUMBER:-5432}" +POSTGRESQL_NUM_SYNCHRONOUS_REPLICAS="${POSTGRESQL_NUM_SYNCHRONOUS_REPLICAS:-"${POSTGRES_NUM_SYNCHRONOUS_REPLICAS:-}"}" +export POSTGRESQL_NUM_SYNCHRONOUS_REPLICAS="${POSTGRESQL_NUM_SYNCHRONOUS_REPLICAS:-0}" +POSTGRESQL_SYNCHRONOUS_REPLICAS_MODE="${POSTGRESQL_SYNCHRONOUS_REPLICAS_MODE:-"${POSTGRES_SYNCHRONOUS_REPLICAS_MODE:-}"}" +export POSTGRESQL_SYNCHRONOUS_REPLICAS_MODE="${POSTGRESQL_SYNCHRONOUS_REPLICAS_MODE:-}" +POSTGRESQL_PORT_NUMBER="${POSTGRESQL_PORT_NUMBER:-"${POSTGRES_PORT_NUMBER:-}"}" +export POSTGRESQL_PORT_NUMBER="${POSTGRESQL_PORT_NUMBER:-5432}" +POSTGRESQL_ALLOW_REMOTE_CONNECTIONS="${POSTGRESQL_ALLOW_REMOTE_CONNECTIONS:-"${POSTGRES_ALLOW_REMOTE_CONNECTIONS:-}"}" +export POSTGRESQL_ALLOW_REMOTE_CONNECTIONS="${POSTGRESQL_ALLOW_REMOTE_CONNECTIONS:-yes}" +POSTGRESQL_REPLICATION_MODE="${POSTGRESQL_REPLICATION_MODE:-"${POSTGRES_REPLICATION_MODE:-}"}" +export POSTGRESQL_REPLICATION_MODE="${POSTGRESQL_REPLICATION_MODE:-master}" +POSTGRESQL_REPLICATION_USER="${POSTGRESQL_REPLICATION_USER:-"${POSTGRES_REPLICATION_USER:-}"}" +export POSTGRESQL_REPLICATION_USER="${POSTGRESQL_REPLICATION_USER:-}" +POSTGRESQL_SYNCHRONOUS_COMMIT_MODE="${POSTGRESQL_SYNCHRONOUS_COMMIT_MODE:-"${POSTGRES_SYNCHRONOUS_COMMIT_MODE:-}"}" +export POSTGRESQL_SYNCHRONOUS_COMMIT_MODE="${POSTGRESQL_SYNCHRONOUS_COMMIT_MODE:-on}" +POSTGRESQL_FSYNC="${POSTGRESQL_FSYNC:-"${POSTGRES_FSYNC:-}"}" +export POSTGRESQL_FSYNC="${POSTGRESQL_FSYNC:-on}" +POSTGRESQL_USERNAME="${POSTGRESQL_USERNAME:-"${POSTGRES_USERNAME:-}"}" +POSTGRESQL_USERNAME="${POSTGRESQL_USERNAME:-"${POSTGRES_USER:-}"}" +POSTGRESQL_USERNAME="${POSTGRESQL_USERNAME:-"${POSTGRESQL_USER:-}"}" +export POSTGRESQL_USERNAME="${POSTGRESQL_USERNAME:-postgres}" +POSTGRESQL_ENABLE_LDAP="${POSTGRESQL_ENABLE_LDAP:-"${POSTGRES_ENABLE_LDAP:-}"}" +export POSTGRESQL_ENABLE_LDAP="${POSTGRESQL_ENABLE_LDAP:-no}" +POSTGRESQL_LDAP_URL="${POSTGRESQL_LDAP_URL:-"${POSTGRES_LDAP_URL:-}"}" +export POSTGRESQL_LDAP_URL="${POSTGRESQL_LDAP_URL:-}" +POSTGRESQL_LDAP_PREFIX="${POSTGRESQL_LDAP_PREFIX:-"${POSTGRES_LDAP_PREFIX:-}"}" +export POSTGRESQL_LDAP_PREFIX="${POSTGRESQL_LDAP_PREFIX:-}" +POSTGRESQL_LDAP_SUFFIX="${POSTGRESQL_LDAP_SUFFIX:-"${POSTGRES_LDAP_SUFFIX:-}"}" +export POSTGRESQL_LDAP_SUFFIX="${POSTGRESQL_LDAP_SUFFIX:-}" +POSTGRESQL_LDAP_SERVER="${POSTGRESQL_LDAP_SERVER:-"${POSTGRES_LDAP_SERVER:-}"}" +export POSTGRESQL_LDAP_SERVER="${POSTGRESQL_LDAP_SERVER:-}" +POSTGRESQL_LDAP_PORT="${POSTGRESQL_LDAP_PORT:-"${POSTGRES_LDAP_PORT:-}"}" +export POSTGRESQL_LDAP_PORT="${POSTGRESQL_LDAP_PORT:-}" +POSTGRESQL_LDAP_SCHEME="${POSTGRESQL_LDAP_SCHEME:-"${POSTGRES_LDAP_SCHEME:-}"}" +export POSTGRESQL_LDAP_SCHEME="${POSTGRESQL_LDAP_SCHEME:-}" +POSTGRESQL_LDAP_TLS="${POSTGRESQL_LDAP_TLS:-"${POSTGRES_LDAP_TLS:-}"}" +export POSTGRESQL_LDAP_TLS="${POSTGRESQL_LDAP_TLS:-}" +POSTGRESQL_LDAP_BASE_DN="${POSTGRESQL_LDAP_BASE_DN:-"${POSTGRES_LDAP_BASE_DN:-}"}" +export POSTGRESQL_LDAP_BASE_DN="${POSTGRESQL_LDAP_BASE_DN:-}" +POSTGRESQL_LDAP_BIND_DN="${POSTGRESQL_LDAP_BIND_DN:-"${POSTGRES_LDAP_BIND_DN:-}"}" +export POSTGRESQL_LDAP_BIND_DN="${POSTGRESQL_LDAP_BIND_DN:-}" +POSTGRESQL_LDAP_BIND_PASSWORD="${POSTGRESQL_LDAP_BIND_PASSWORD:-"${POSTGRES_LDAP_BIND_PASSWORD:-}"}" +export POSTGRESQL_LDAP_BIND_PASSWORD="${POSTGRESQL_LDAP_BIND_PASSWORD:-}" +POSTGRESQL_LDAP_SEARCH_ATTR="${POSTGRESQL_LDAP_SEARCH_ATTR:-"${POSTGRES_LDAP_SEARCH_ATTR:-}"}" +export POSTGRESQL_LDAP_SEARCH_ATTR="${POSTGRESQL_LDAP_SEARCH_ATTR:-}" +POSTGRESQL_LDAP_SEARCH_FILTER="${POSTGRESQL_LDAP_SEARCH_FILTER:-"${POSTGRES_LDAP_SEARCH_FILTER:-}"}" +export POSTGRESQL_LDAP_SEARCH_FILTER="${POSTGRESQL_LDAP_SEARCH_FILTER:-}" +POSTGRESQL_INITSCRIPTS_USERNAME="${POSTGRESQL_INITSCRIPTS_USERNAME:-"${POSTGRES_INITSCRIPTS_USERNAME:-}"}" +export POSTGRESQL_INITSCRIPTS_USERNAME="${POSTGRESQL_INITSCRIPTS_USERNAME:-$POSTGRESQL_USERNAME}" +POSTGRESQL_PASSWORD="${POSTGRESQL_PASSWORD:-"${POSTGRES_PASSWORD:-}"}" +export POSTGRESQL_PASSWORD="${POSTGRESQL_PASSWORD:-}" +POSTGRESQL_POSTGRES_PASSWORD="${POSTGRESQL_POSTGRES_PASSWORD:-"${POSTGRES_POSTGRES_PASSWORD:-}"}" +export POSTGRESQL_POSTGRES_PASSWORD="${POSTGRESQL_POSTGRES_PASSWORD:-}" +POSTGRESQL_REPLICATION_PASSWORD="${POSTGRESQL_REPLICATION_PASSWORD:-"${POSTGRES_REPLICATION_PASSWORD:-}"}" +export POSTGRESQL_REPLICATION_PASSWORD="${POSTGRESQL_REPLICATION_PASSWORD:-}" +POSTGRESQL_INITSCRIPTS_PASSWORD="${POSTGRESQL_INITSCRIPTS_PASSWORD:-"${POSTGRES_INITSCRIPTS_PASSWORD:-}"}" +export POSTGRESQL_INITSCRIPTS_PASSWORD="${POSTGRESQL_INITSCRIPTS_PASSWORD:-$POSTGRESQL_PASSWORD}" +POSTGRESQL_ENABLE_TLS="${POSTGRESQL_ENABLE_TLS:-"${POSTGRES_ENABLE_TLS:-}"}" +export POSTGRESQL_ENABLE_TLS="${POSTGRESQL_ENABLE_TLS:-no}" +POSTGRESQL_TLS_CERT_FILE="${POSTGRESQL_TLS_CERT_FILE:-"${POSTGRES_TLS_CERT_FILE:-}"}" +export POSTGRESQL_TLS_CERT_FILE="${POSTGRESQL_TLS_CERT_FILE:-}" +POSTGRESQL_TLS_KEY_FILE="${POSTGRESQL_TLS_KEY_FILE:-"${POSTGRES_TLS_KEY_FILE:-}"}" +export POSTGRESQL_TLS_KEY_FILE="${POSTGRESQL_TLS_KEY_FILE:-}" +POSTGRESQL_TLS_CA_FILE="${POSTGRESQL_TLS_CA_FILE:-"${POSTGRES_TLS_CA_FILE:-}"}" +export POSTGRESQL_TLS_CA_FILE="${POSTGRESQL_TLS_CA_FILE:-}" +POSTGRESQL_TLS_CRL_FILE="${POSTGRESQL_TLS_CRL_FILE:-"${POSTGRES_TLS_CRL_FILE:-}"}" +export POSTGRESQL_TLS_CRL_FILE="${POSTGRESQL_TLS_CRL_FILE:-}" +POSTGRESQL_TLS_PREFER_SERVER_CIPHERS="${POSTGRESQL_TLS_PREFER_SERVER_CIPHERS:-"${POSTGRES_TLS_PREFER_SERVER_CIPHERS:-}"}" +export POSTGRESQL_TLS_PREFER_SERVER_CIPHERS="${POSTGRESQL_TLS_PREFER_SERVER_CIPHERS:-yes}" +POSTGRESQL_SHARED_PRELOAD_LIBRARIES="${POSTGRESQL_SHARED_PRELOAD_LIBRARIES:-"${POSTGRES_SHARED_PRELOAD_LIBRARIES:-}"}" +export POSTGRESQL_SHARED_PRELOAD_LIBRARIES="${POSTGRESQL_SHARED_PRELOAD_LIBRARIES:-pgaudit}" +POSTGRESQL_PGAUDIT_LOG="${POSTGRESQL_PGAUDIT_LOG:-"${POSTGRES_PGAUDIT_LOG:-}"}" +export POSTGRESQL_PGAUDIT_LOG="${POSTGRESQL_PGAUDIT_LOG:-}" +POSTGRESQL_PGAUDIT_LOG_CATALOG="${POSTGRESQL_PGAUDIT_LOG_CATALOG:-"${POSTGRES_PGAUDIT_LOG_CATALOG:-}"}" +export POSTGRESQL_PGAUDIT_LOG_CATALOG="${POSTGRESQL_PGAUDIT_LOG_CATALOG:-}" +POSTGRESQL_PGAUDIT_LOG_PARAMETER="${POSTGRESQL_PGAUDIT_LOG_PARAMETER:-"${POSTGRES_PGAUDIT_LOG_PARAMETER:-}"}" +export POSTGRESQL_PGAUDIT_LOG_PARAMETER="${POSTGRESQL_PGAUDIT_LOG_PARAMETER:-}" +POSTGRESQL_LOG_CONNECTIONS="${POSTGRESQL_LOG_CONNECTIONS:-"${POSTGRES_LOG_CONNECTIONS:-}"}" +export POSTGRESQL_LOG_CONNECTIONS="${POSTGRESQL_LOG_CONNECTIONS:-}" +POSTGRESQL_LOG_DISCONNECTIONS="${POSTGRESQL_LOG_DISCONNECTIONS:-"${POSTGRES_LOG_DISCONNECTIONS:-}"}" +export POSTGRESQL_LOG_DISCONNECTIONS="${POSTGRESQL_LOG_DISCONNECTIONS:-}" +POSTGRESQL_LOG_HOSTNAME="${POSTGRESQL_LOG_HOSTNAME:-"${POSTGRES_LOG_HOSTNAME:-}"}" +export POSTGRESQL_LOG_HOSTNAME="${POSTGRESQL_LOG_HOSTNAME:-}" +POSTGRESQL_CLIENT_MIN_MESSAGES="${POSTGRESQL_CLIENT_MIN_MESSAGES:-"${POSTGRES_CLIENT_MIN_MESSAGES:-}"}" +export POSTGRESQL_CLIENT_MIN_MESSAGES="${POSTGRESQL_CLIENT_MIN_MESSAGES:-error}" +POSTGRESQL_LOG_LINE_PREFIX="${POSTGRESQL_LOG_LINE_PREFIX:-"${POSTGRES_LOG_LINE_PREFIX:-}"}" +export POSTGRESQL_LOG_LINE_PREFIX="${POSTGRESQL_LOG_LINE_PREFIX:-}" +POSTGRESQL_LOG_TIMEZONE="${POSTGRESQL_LOG_TIMEZONE:-"${POSTGRES_LOG_TIMEZONE:-}"}" +export POSTGRESQL_LOG_TIMEZONE="${POSTGRESQL_LOG_TIMEZONE:-}" +POSTGRESQL_TIMEZONE="${POSTGRESQL_TIMEZONE:-"${POSTGRES_TIMEZONE:-}"}" +export POSTGRESQL_TIMEZONE="${POSTGRESQL_TIMEZONE:-}" +POSTGRESQL_MAX_CONNECTIONS="${POSTGRESQL_MAX_CONNECTIONS:-"${POSTGRES_MAX_CONNECTIONS:-}"}" +export POSTGRESQL_MAX_CONNECTIONS="${POSTGRESQL_MAX_CONNECTIONS:-}" +POSTGRESQL_TCP_KEEPALIVES_IDLE="${POSTGRESQL_TCP_KEEPALIVES_IDLE:-"${POSTGRES_TCP_KEEPALIVES_IDLE:-}"}" +export POSTGRESQL_TCP_KEEPALIVES_IDLE="${POSTGRESQL_TCP_KEEPALIVES_IDLE:-}" +POSTGRESQL_TCP_KEEPALIVES_INTERVAL="${POSTGRESQL_TCP_KEEPALIVES_INTERVAL:-"${POSTGRES_TCP_KEEPALIVES_INTERVAL:-}"}" +export POSTGRESQL_TCP_KEEPALIVES_INTERVAL="${POSTGRESQL_TCP_KEEPALIVES_INTERVAL:-}" +POSTGRESQL_TCP_KEEPALIVES_COUNT="${POSTGRESQL_TCP_KEEPALIVES_COUNT:-"${POSTGRES_TCP_KEEPALIVES_COUNT:-}"}" +export POSTGRESQL_TCP_KEEPALIVES_COUNT="${POSTGRESQL_TCP_KEEPALIVES_COUNT:-}" +POSTGRESQL_STATEMENT_TIMEOUT="${POSTGRESQL_STATEMENT_TIMEOUT:-"${POSTGRES_STATEMENT_TIMEOUT:-}"}" +export POSTGRESQL_STATEMENT_TIMEOUT="${POSTGRESQL_STATEMENT_TIMEOUT:-}" +POSTGRESQL_PGHBA_REMOVE_FILTERS="${POSTGRESQL_PGHBA_REMOVE_FILTERS:-"${POSTGRES_PGHBA_REMOVE_FILTERS:-}"}" +export POSTGRESQL_PGHBA_REMOVE_FILTERS="${POSTGRESQL_PGHBA_REMOVE_FILTERS:-}" +POSTGRESQL_USERNAME_CONNECTION_LIMIT="${POSTGRESQL_USERNAME_CONNECTION_LIMIT:-"${POSTGRES_USER_CONNECTION_LIMIT:-}"}" +export POSTGRESQL_USERNAME_CONNECTION_LIMIT="${POSTGRESQL_USERNAME_CONNECTION_LIMIT:-}" +POSTGRESQL_POSTGRES_CONNECTION_LIMIT="${POSTGRESQL_POSTGRES_CONNECTION_LIMIT:-"${POSTGRES_POSTGRES_CONNECTION_LIMIT:-}"}" +export POSTGRESQL_POSTGRES_CONNECTION_LIMIT="${POSTGRESQL_POSTGRES_CONNECTION_LIMIT:-}" +export POSTGRESQL_WAL_LEVEL="${POSTGRESQL_WAL_LEVEL:-replica}" +export POSTGRESQL_DEFAULT_TOAST_COMPRESSION="${POSTGRESQL_DEFAULT_TOAST_COMPRESSION:-}" +export POSTGRESQL_PASSWORD_ENCRYPTION="${POSTGRESQL_PASSWORD_ENCRYPTION:-}" +export POSTGRESQL_DEFAULT_TRANSACTION_ISOLATION="${POSTGRESQL_DEFAULT_TRANSACTION_ISOLATION:-}" + +# pgAutoFailover settings +export POSTGRESQL_AUTOCTL_VOLUME_DIR="${POSTGRESQL_VOLUME_DIR}/pgautoctl" +export POSTGRESQL_AUTOCTL_CONF_DIR="${POSTGRESQL_AUTOCTL_CONF_DIR:-${POSTGRESQL_AUTOCTL_VOLUME_DIR}/.config}" +export POSTGRESQL_AUTOCTL_MODE="${POSTGRESQL_AUTOCTL_MODE:-postgres}" +export POSTGRESQL_AUTOCTL_MONITOR_HOST="${POSTGRESQL_AUTOCTL_MONITOR_HOST:-monitor}" +export POSTGRESQL_AUTOCTL_HOSTNAME="${POSTGRESQL_AUTOCTL_HOSTNAME:-$(hostname --fqdn)}" + +# pgBackRest settings +export POSTGRESQL_PGBACKREST_VOLUME_DIR="${POSTGRESQL_VOLUME_DIR}/pgbackrest" +export POSTGRESQL_PGBACKREST_LOGS_DIR="${POSTGRESQL_PGBACKREST_VOLUME_DIR}/logs" +export POSTGRESQL_PGBACKREST_BACKUPS_DIR="${POSTGRESQL_PGBACKREST_VOLUME_DIR}/backups" +export POSTGRESQL_PGBACKREST_SPOOL_DIR="${POSTGRESQL_PGBACKREST_VOLUME_DIR}/spool" +export POSTGRESQL_PGBACKREST_CONF_FILE="${POSTGRESQL_DATA_DIR}/pgbackrest.conf" + +# Internal +export POSTGRESQL_FIRST_BOOT="yes" +export NSS_WRAPPER_LIB="/opt/bitnami/common/lib/libnss_wrapper.so" + +# Custom environment variables may be defined below diff --git a/deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/entrypoint.sh b/deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/entrypoint.sh new file mode 100755 index 000000000..8ea9cb4f1 --- /dev/null +++ b/deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/entrypoint.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 + +# shellcheck disable=SC1091 + +set -o errexit +set -o nounset +set -o pipefail +#set -o xtrace + +# Load libraries +. /opt/bitnami/scripts/libbitnami.sh +. /opt/bitnami/scripts/libpostgresql.sh + +# Load PostgreSQL environment variables +. /opt/bitnami/scripts/postgresql-env.sh + +print_welcome_page + +# Enable the nss_wrapper settings +postgresql_enable_nss_wrapper + +if [[ "$*" = *"/opt/bitnami/scripts/postgresql/run.sh"* ]]; then + info "** Starting PostgreSQL setup **" + /opt/bitnami/scripts/postgresql/setup.sh + touch "$POSTGRESQL_TMP_DIR"/.initialized + info "** PostgreSQL setup finished! **" +fi + +echo "" +exec "$@" diff --git a/deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/postunpack.sh b/deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/postunpack.sh new file mode 100755 index 000000000..544976a70 --- /dev/null +++ b/deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/postunpack.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 + +# shellcheck disable=SC1091 + +# Load libraries +. /opt/bitnami/scripts/libfs.sh +. /opt/bitnami/scripts/libpostgresql.sh + +# Load PostgreSQL environment variables +. /opt/bitnami/scripts/postgresql-env.sh + +for dir in "$POSTGRESQL_INITSCRIPTS_DIR" "$POSTGRESQL_TMP_DIR" "$POSTGRESQL_LOG_DIR" "$POSTGRESQL_CONF_DIR" "${POSTGRESQL_CONF_DIR}/conf.d" "$POSTGRESQL_MOUNTED_CONF_DIR" "${POSTGRESQL_MOUNTED_CONF_DIR}/conf.d" "$POSTGRESQL_VOLUME_DIR"; do + ensure_dir_exists "$dir" +done + +# Create basic pg_hba.conf for local connections +postgresql_allow_local_connection +# Create basic postgresql.conf +postgresql_create_config + +chmod -R g+rwX "$POSTGRESQL_INITSCRIPTS_DIR" "$POSTGRESQL_TMP_DIR" "$POSTGRESQL_LOG_DIR" "$POSTGRESQL_CONF_DIR" "${POSTGRESQL_CONF_DIR}/conf.d" "$POSTGRESQL_MOUNTED_CONF_DIR" "${POSTGRESQL_MOUNTED_CONF_DIR}/conf.d" "$POSTGRESQL_VOLUME_DIR" + +# Redirect all logging to stdout +ln -sf /dev/stdout "$POSTGRESQL_LOG_DIR/postgresql.log" diff --git a/deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/run-autoctl.sh b/deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/run-autoctl.sh new file mode 100755 index 000000000..ade2e0df2 --- /dev/null +++ b/deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/run-autoctl.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 + +# shellcheck disable=SC1091 + +set -o errexit +set -o nounset +set -o pipefail +# set -o xtrace # Uncomment this line for debugging purposes + +# Load libraries +. /opt/bitnami/scripts/libpostgresql.sh +. /opt/bitnami/scripts/libautoctl.sh +. /opt/bitnami/scripts/libos.sh + +# Load PostgreSQL environment variables +. /opt/bitnami/scripts/postgresql-env.sh + +export HOME="$POSTGRESQL_AUTOCTL_VOLUME_DIR" + +autoctl_initialize + +flags=("run" "--pgdata" "$POSTGRESQL_DATA_DIR") +cmd=$(command -v pg_autoctl) + +info "** Starting PostgreSQL autoctl_node (Mode: $POSTGRESQL_AUTOCTL_MODE) **" +if am_i_root; then + exec_as_user "$POSTGRESQL_DAEMON_USER" "$cmd" "${flags[@]}" +else + PGPASSWORD=$POSTGRESQL_REPLICATION_PASSWORD exec "$cmd" "${flags[@]}" +fi diff --git a/deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/run.sh b/deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/run.sh new file mode 100755 index 000000000..3273b4adf --- /dev/null +++ b/deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/run.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 + +# shellcheck disable=SC1091 + +set -o errexit +set -o nounset +set -o pipefail +# set -o xtrace # Uncomment this line for debugging purposes + +# Load libraries +. /opt/bitnami/scripts/libpostgresql.sh +. /opt/bitnami/scripts/libos.sh + +# Load PostgreSQL environment variables +. /opt/bitnami/scripts/postgresql-env.sh + +flags=("-D" "$POSTGRESQL_DATA_DIR" "--config-file=$POSTGRESQL_CONF_FILE" "--external_pid_file=$POSTGRESQL_PID_FILE" "--hba_file=$POSTGRESQL_PGHBA_FILE") + +if [[ -n "${POSTGRESQL_EXTRA_FLAGS:-}" ]]; then + read -r -a extra_flags <<< "$POSTGRESQL_EXTRA_FLAGS" + flags+=("${extra_flags[@]}") +fi + +if [[ -n "${POSTGRESQL_DEFAULT_TRANSACTION_ISOLATION:-}" ]]; then + flags+=("-c" "default_transaction_isolation=$POSTGRESQL_DEFAULT_TRANSACTION_ISOLATION") +fi + +flags+=("$@") + +cmd=$(command -v postgres) + +info "** Starting PostgreSQL **" +if am_i_root; then + exec_as_user "$POSTGRESQL_DAEMON_USER" "$cmd" "${flags[@]}" +else + exec "$cmd" "${flags[@]}" +fi diff --git a/deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/setup.sh b/deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/setup.sh new file mode 100755 index 000000000..d5aa341ce --- /dev/null +++ b/deploy/pgvector/rootfs/opt/bitnami/scripts/postgresql/setup.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# Copyright VMware, Inc. +# SPDX-License-Identifier: APACHE-2.0 +# +# Bitnami PostgreSQL setup + +# shellcheck disable=SC1091 + +set -o errexit +set -o nounset +set -o pipefail + +# Load libraries +. /opt/bitnami/scripts/liblog.sh +. /opt/bitnami/scripts/libos.sh +. /opt/bitnami/scripts/libvalidations.sh +. /opt/bitnami/scripts/libpostgresql.sh + +# Load PostgreSQL environment variables +. /opt/bitnami/scripts/postgresql-env.sh + +# Ensure PostgreSQL environment variables settings are valid +postgresql_validate +# Ensure PostgreSQL is stopped when this script ends. +trap "postgresql_stop" EXIT +# Ensure 'daemon' user exists when running as 'root' +am_i_root && ensure_user_exists "$POSTGRESQL_DAEMON_USER" --group "$POSTGRESQL_DAEMON_GROUP" +# Fix logging issue when running as root +am_i_root && chmod o+w "$(readlink /dev/stdout)" +# Remove flags and postmaster files from a previous run +postgresql_clean_from_restart +# Allow running custom pre-initialization scripts +postgresql_custom_pre_init_scripts +# Ensure PostgreSQL is initialized +postgresql_initialize +# Allow running custom initialization scripts +postgresql_custom_init_scripts + +# Allow remote connections once the initialization is finished +if ! postgresql_is_file_external "postgresql.conf" && is_boolean_yes "$POSTGRESQL_ALLOW_REMOTE_CONNECTIONS"; then + info "Enabling remote connections" + postgresql_enable_remote_connections +fi + +# Remove any pg_hba.conf lines that match the given filters +if ! postgresql_is_file_external "pg_hba.conf" && [[ -n "$POSTGRESQL_PGHBA_REMOVE_FILTERS" ]]; then + info "Removing lines that match these filters: ${POSTGRESQL_PGHBA_REMOVE_FILTERS}" + postgresql_remove_pghba_lines +fi diff --git a/deploy/pgvector/run.sh b/deploy/pgvector/run.sh new file mode 100755 index 000000000..8845fc874 --- /dev/null +++ b/deploy/pgvector/run.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -e + +# 0. remove old files +find . | grep -v run.sh | grep -v README.md | grep -v '^\.$' | xargs rm -r -f + +# 1. get base dockerfile and script from bitnami +git clone -n --depth=1 --filter=tree:0 https://github.com/bitnami/containers.git +cd containers +git sparse-checkout set --no-cone bitnami/postgresql/16/debian-11 +git checkout +mv bitnami/postgresql/16/debian-11/* .. +cd .. +rm -r -f containers + +# 2. add pgvector build script +cat >pgvector.sh < github.com/Abirdcfly/langchaingo v0.0.0-20240109042258-6b95089747c9 diff --git a/go.sum b/go.sum index e7de428d3..fe538d46a 100644 --- a/go.sum +++ b/go.sum @@ -18,27 +18,27 @@ cloud.google.com/go v0.74.0/go.mod h1:VV1xSbzvo+9QJOxLDaJfTjx5e+MePCpCWwvftOeQmW cloud.google.com/go v0.78.0/go.mod h1:QjdrLG0uq+YwhjoVOLsS1t7TW8fs36kLs4XO5R5ECHg= cloud.google.com/go v0.79.0/go.mod h1:3bzgcEeQlzbuEAYu4mrWhKqWjmpprinYgKJLgKHnbb8= cloud.google.com/go v0.81.0/go.mod h1:mk/AM35KwGk/Nm2YSeZbxXdrNK3KZOYHmLkOqC2V6E0= -cloud.google.com/go v0.110.2 h1:sdFPBr6xG9/wkBbfhmUz/JmZC7X6LavQgcrVINrKiVA= -cloud.google.com/go v0.110.2/go.mod h1:k04UEeEtb6ZBRTv3dZz4CeJC3jKGxyhl0sAiVVquxiw= -cloud.google.com/go/aiplatform v1.42.0 h1:otuKi5bgONobl5+3bMSrapkTJGL8zNZqtr7M0tfXbt4= -cloud.google.com/go/aiplatform v1.42.0/go.mod h1:oLLeleZuSemfGDZqyX/Z2PXT5SBItSraRHqgYb2RgcI= +cloud.google.com/go v0.110.8 h1:tyNdfIxjzaWctIiLYOTalaLKZ17SI44SKFW26QbOhME= +cloud.google.com/go v0.110.8/go.mod h1:Iz8AkXJf1qmxC3Oxoep8R1T36w8B92yU29PcBhHO5fk= +cloud.google.com/go/aiplatform v1.51.1 h1:g+y03dll9HnX9U0oBKIqUOI+8VQWT1QJF12VGxkal0Q= +cloud.google.com/go/aiplatform v1.51.1/go.mod h1:kY3nIMAVQOK2XDqDPHaOuD9e+FdMA6OOpfBjsvaFSOo= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= -cloud.google.com/go/compute v1.20.1 h1:6aKEtlUiwEpJzM001l0yFkpXmUVXaN8W+fbkb2AZNbg= -cloud.google.com/go/compute v1.20.1/go.mod h1:4tCnrn48xsqlwSAiLf1HXMQk8CONslYbdiEZc9FEIbM= +cloud.google.com/go/compute v1.23.1 h1:V97tBoDaZHb6leicZ1G6DLK2BAaZLJ/7+9BB/En3hR0= +cloud.google.com/go/compute v1.23.1/go.mod h1:CqB3xpmPKKt3OJpW2ndFIXnA9A4xAy/F3Xp1ixncW78= cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY= cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= cloud.google.com/go/firestore v1.1.0/go.mod h1:ulACoGHTpvq5r8rxGJ4ddJZBZqakUQqClKRT5SZwBmk= -cloud.google.com/go/iam v1.0.1 h1:lyeCAU6jpnVNrE9zGQkTl3WgNgK/X+uWwaw0kynZJMU= -cloud.google.com/go/iam v1.0.1/go.mod h1:yR3tmSL8BcZB4bxByRv2jkSIahVmCtfKZwLYGBalRE8= -cloud.google.com/go/longrunning v0.4.2 h1:WDKiiNXFTaQ6qz/G8FCOkuY9kJmOJGY67wPUC1M2RbE= -cloud.google.com/go/longrunning v0.4.2/go.mod h1:OHrnaYyLUV6oqwh0xiS7e5sLQhP1m0QU9R+WhGDMgIQ= +cloud.google.com/go/iam v1.1.3 h1:18tKG7DzydKWUnLjonWcJO6wjSCAtzh4GcRKlH/Hrzc= +cloud.google.com/go/iam v1.1.3/go.mod h1:3khUlaBXfPKKe7huYgEpDn6FtgRyMEqbkvBxrQyY5SE= +cloud.google.com/go/longrunning v0.5.2 h1:u+oFqfEwwU7F9dIELigxbe0XVnBAo9wqMuQLA50CZ5k= +cloud.google.com/go/longrunning v0.5.2/go.mod h1:nqo6DQbNV2pXhGDbDMoN2bWz68MjZUzqv2YttZiveCs= cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= @@ -51,6 +51,8 @@ cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9 dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= github.com/99designs/gqlgen v0.17.40 h1:/l8JcEVQ93wqIfmH9VS1jsAkwm6eAF1NwQn3N+SDqBY= github.com/99designs/gqlgen v0.17.40/go.mod h1:b62q1USk82GYIVjC60h02YguAZLqYZtvWml8KkhJps4= +github.com/Abirdcfly/langchaingo v0.0.0-20240109042258-6b95089747c9 h1:v5OOO9PfmKxou/qoWOVc1wVgdJBh6ZAjBSqY9bsP+iA= +github.com/Abirdcfly/langchaingo v0.0.0-20240109042258-6b95089747c9/go.mod h1:vOFzX91wqTXvirejd6xjPXSmGn8yYKHt/FunAgrOBmI= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs= github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= @@ -255,6 +257,10 @@ github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh github.com/go-openapi/swag v0.19.14/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= github.com/go-openapi/swag v0.22.6 h1:dnqg1XfHXL9aBxSbktBqFR5CxVyVI+7fYWhAf1JOeTw= github.com/go-openapi/swag v0.22.6/go.mod h1:Gl91UqO+btAM0plGGxHqJcQZ1ZTy6jbmridBTsDy8A0= +github.com/go-pg/pg/v10 v10.11.0 h1:CMKJqLgTrfpE/aOVeLdybezR2om071Vh38OLZjsyMI0= +github.com/go-pg/pg/v10 v10.11.0/go.mod h1:4BpHRoxE61y4Onpof3x1a2SQvi9c+q1dJnrNdMjsroA= +github.com/go-pg/zerochecker v0.2.0 h1:pp7f72c3DobMWOb2ErtZsnrPaSvHd2W4o9//8HtF4mU= +github.com/go-pg/zerochecker v0.2.0/go.mod h1:NJZ4wKL0NmTtz0GKCoJ8kym6Xn/EQzXRl2OnAe7MmDo= github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= @@ -339,8 +345,8 @@ github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.1.0 h1:Hsa8mG0dQ46ij8Sl2AYJDUv1oA9/d6Vk+3LG99Oe02g= github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -361,19 +367,19 @@ github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLe github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE= github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= -github.com/google/s2a-go v0.1.4 h1:1kZ/sQM3srePvKs3tXAvQzo66XfcReoqFpIpIccE7Oc= -github.com/google/s2a-go v0.1.4/go.mod h1:Ej+mSEMGRnqRzjc7VtF+jdBwYG5fuJfiZ8ELkjEwM0A= +github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o= +github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= -github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/googleapis/enterprise-certificate-proxy v0.2.3 h1:yk9/cqRKtT9wXZSsRH9aurXEpJX+U6FLtpYTdC3R06k= -github.com/googleapis/enterprise-certificate-proxy v0.2.3/go.mod h1:AwSRAtLfXpU5Nm3pW+v7rGDHp09LsPtGY9MduiEsR9k= +github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4= +github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfFxPRy3Bf7vr3h0cechB90XaQs= +github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= -github.com/googleapis/gax-go/v2 v2.11.0 h1:9V9PWXEsWnPpQhu/PeQIkS4eGzMlTLGgt80cUUI8Ki4= -github.com/googleapis/gax-go/v2 v2.11.0/go.mod h1:DxmR61SGKkGLa2xigwuZIQpkCI2S5iydzRfb3peWZJI= +github.com/googleapis/gax-go/v2 v2.12.0 h1:A+gCJKdRfqXkr+BIRGtZLibNXf0m1f9E4HG56etFpas= +github.com/googleapis/gax-go/v2 v2.12.0/go.mod h1:y+aIqrI5eb1YGMVJfuV3185Ts/D7qKpsEkdD5+I6QGU= github.com/goph/emperror v0.17.2 h1:yLapQcmEsO0ipe9p5TaN22djm3OFV/TfM/fcYP0/J18= github.com/goph/emperror v0.17.2/go.mod h1:+ZbQ+fUNO/6FNiUo0ujtMjhgad9Xa6fQL9KhH4LNHic= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= @@ -431,6 +437,8 @@ github.com/jackc/pgx/v5 v5.4.1/go.mod h1:q6iHT8uDNXWiFNOlRqJzBTaSH3+2xCXkokxHZC5 github.com/jackc/puddle/v2 v2.2.0 h1:RdcDk92EJBuBS55nQMMYFXTxwstHug4jkhT5pq8VxPk= github.com/jackc/puddle/v2 v2.2.0/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= +github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= +github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= @@ -568,6 +576,8 @@ github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZO github.com/pelletier/go-toml/v2 v2.1.1 h1:LWAJwfNvjQZCFIDKWYQaM62NcYeYViCmWIwmOStowAI= github.com/pelletier/go-toml/v2 v2.1.1/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= +github.com/pgvector/pgvector-go v0.1.1 h1:kqJigGctFnlWvskUiYIvJRNwUtQl/aMSUZVs0YWQe+g= +github.com/pgvector/pgvector-go v0.1.1/go.mod h1:wLJgD/ODkdtd2LJK4l6evHXTuG+8PxymYAVomKHOWac= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -685,14 +695,20 @@ github.com/swaggo/swag v1.16.2 h1:28Pp+8DkQoV+HLzLx8RGJZXNGKbFqnuvSbAAtoxiY04= github.com/swaggo/swag v1.16.2/go.mod h1:6YzXnDcpr0767iOejs318CwYkCQqyGer6BizOg03f+E= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= -github.com/tmc/langchaingo v0.1.3 h1:QIzyhr5N2ZkE1z/9QJUBD+t9JndMrnXwIFw2I7FYtA8= -github.com/tmc/langchaingo v0.1.3/go.mod h1:Rm4WfxQR0WQLtcz5+zMGutlfgMuNY5QKZt8k3Y42gz0= +github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc h1:9lRDQMhESg+zvGYmW5DyG0UqvY96Bu5QYsTLvCHdrgo= +github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc/go.mod h1:bciPuU6GHm1iF1pBvUfxfsH0Wmnc2VbpgvbI9ZWuIRs= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M= github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY= github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +github.com/uptrace/bun v1.1.12 h1:sOjDVHxNTuM6dNGaba0wUuz7KvDE1BmNu9Gqs2gJSXQ= +github.com/uptrace/bun v1.1.12/go.mod h1:NPG6JGULBeQ9IU6yHp7YGELRa5Agmd7ATZdz4tGZ6z0= +github.com/uptrace/bun/dialect/pgdialect v1.1.12 h1:m/CM1UfOkoBTglGO5CUTKnIKKOApOYxkcP2qn0F9tJk= +github.com/uptrace/bun/dialect/pgdialect v1.1.12/go.mod h1:Ij6WIxQILxLlL2frUBxUBOZJtLElD2QQNDcu/PWDHTc= +github.com/uptrace/bun/driver/pgdriver v1.1.12 h1:3rRWB1GK0psTJrHwxzNfEij2MLibggiLdTqjTtfHc1w= +github.com/uptrace/bun/driver/pgdriver v1.1.12/go.mod h1:ssYUP+qwSEgeDDS1xm2XBip9el1y9Mi5mTAvLoiADLM= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasthttp v1.50.0 h1:H7fweIlBm0rXLs2q0XbalvJ6r0CUPFWK3/bB4N13e9M= @@ -701,6 +717,14 @@ github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVS github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc= github.com/vektah/gqlparser/v2 v2.5.10 h1:6zSM4azXC9u4Nxy5YmdmGu4uKamfwsdKTwp5zsEealU= github.com/vektah/gqlparser/v2 v2.5.10/go.mod h1:1rCcfwB2ekJofmluGWXMSEnPMZgbxzwj6FaZ/4OT8Cc= +github.com/vmihailenco/bufpool v0.1.11 h1:gOq2WmBrq0i2yW5QJ16ykccQ4wH9UyEsgLm6czKAd94= +github.com/vmihailenco/bufpool v0.1.11/go.mod h1:AFf/MOy3l2CFTKbxwt0mp2MwnqjNEs5H/UxrkA5jxTQ= +github.com/vmihailenco/msgpack/v5 v5.3.5 h1:5gO0H1iULLWGhs2H5tbAHIZTV8/cYafcFOr9znI5mJU= +github.com/vmihailenco/msgpack/v5 v5.3.5/go.mod h1:7xyJ9e+0+9SaZT0Wt1RGleJXzli6Q/V5KbhBonMG9jc= +github.com/vmihailenco/tagparser v0.1.2 h1:gnjoVuB/kljJ5wICEEOpx98oXMWPLj22G67Vbd1qPqc= +github.com/vmihailenco/tagparser v0.1.2/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI= +github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= +github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= github.com/x-cray/logrus-prefixed-formatter v0.5.2 h1:00txxvfBM9muc0jiLIEAkAcIMJzfthRT6usrui8uGmg= github.com/x-cray/logrus-prefixed-formatter v0.5.2/go.mod h1:2duySbKsL6M18s5GU7VPsoEPHyzalCE06qoARUCeBBE= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= @@ -1119,8 +1143,8 @@ google.golang.org/api v0.36.0/go.mod h1:+z5ficQTmoYpPn8LCUNVpK5I7hwkpjbcgqA7I34q google.golang.org/api v0.40.0/go.mod h1:fYKFpnQN0DsDSKRVRcQSDQNtqWPfM9i+zNPxepjRCQ8= google.golang.org/api v0.41.0/go.mod h1:RkxM5lITDfTzmyKFPt+wGrCJbVfniCr2ool8kTBzRTU= google.golang.org/api v0.43.0/go.mod h1:nQsDGjRXMo4lvh5hP0TKqF244gqhGcr/YSIykhUk/94= -google.golang.org/api v0.126.0 h1:q4GJq+cAdMAC7XP7njvQ4tvohGLiSlytuL4BQxbIZ+o= -google.golang.org/api v0.126.0/go.mod h1:mBwVAtz+87bEN6CbA1GtZPDOqY2R5ONPqJeIlvyo4Aw= +google.golang.org/api v0.149.0 h1:b2CqT6kG+zqJIVKRQ3ELJVLN1PwHZ6DJ3dW8yl82rgY= +google.golang.org/api v0.149.0/go.mod h1:Mwn1B7JTXrzXtnvmzQE2BD6bYZQ8DShKZDZbeN9I7qI= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -1176,12 +1200,12 @@ google.golang.org/genproto v0.0.0-20210402141018-6c239bbf2bb1/go.mod h1:9lPAdzaE google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0= google.golang.org/genproto v0.0.0-20210831024726-fe130286e0e2/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= google.golang.org/genproto v0.0.0-20220107163113-42d7afdf6368/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= -google.golang.org/genproto v0.0.0-20230530153820-e85fd2cbaebc h1:8DyZCyvI8mE1IdLy/60bS+52xfymkE72wv1asokgtao= -google.golang.org/genproto v0.0.0-20230530153820-e85fd2cbaebc/go.mod h1:xZnkP7mREFX5MORlOPEzLMr+90PPZQ2QWzrVTWfAq64= -google.golang.org/genproto/googleapis/api v0.0.0-20230530153820-e85fd2cbaebc h1:kVKPf/IiYSBWEWtkIn6wZXwWGCnLKcC8oWfZvXjsGnM= -google.golang.org/genproto/googleapis/api v0.0.0-20230530153820-e85fd2cbaebc/go.mod h1:vHYtlOoi6TsQ3Uk2yxR7NI5z8uoV+3pZtR4jmHIkRig= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230530153820-e85fd2cbaebc h1:XSJ8Vk1SWuNr8S18z1NZSziL0CPIXLCCMDOEFtHBOFc= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230530153820-e85fd2cbaebc/go.mod h1:66JfowdXAEgad5O9NnYcsNPLCPZJD++2L9X0PCMODrA= +google.golang.org/genproto v0.0.0-20231016165738-49dd2c1f3d0b h1:+YaDE2r2OG8t/z5qmsh7Y+XXwCbvadxxZ0YY6mTdrVA= +google.golang.org/genproto v0.0.0-20231016165738-49dd2c1f3d0b/go.mod h1:CgAqfJo+Xmu0GwA0411Ht3OU3OntXwsGmrmjI8ioGXI= +google.golang.org/genproto/googleapis/api v0.0.0-20231016165738-49dd2c1f3d0b h1:CIC2YMXmIhYw6evmhPxBKJ4fmLbOFtXQN/GV3XOZR8k= +google.golang.org/genproto/googleapis/api v0.0.0-20231016165738-49dd2c1f3d0b/go.mod h1:IBQ646DjkDkvUIsVq/cc03FUFQ9wbZu7yE396YcL870= +google.golang.org/genproto/googleapis/rpc v0.0.0-20231016165738-49dd2c1f3d0b h1:ZlWIi1wSK56/8hn4QcBp/j9M7Gt3U/3hZw3mC7vDICo= +google.golang.org/genproto/googleapis/rpc v0.0.0-20231016165738-49dd2c1f3d0b/go.mod h1:swOH3j0KzcDDgGUWr+SNpyTen5YrXjS3eyPzFYKc6lc= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -1204,8 +1228,8 @@ google.golang.org/grpc v1.36.1/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAG google.golang.org/grpc v1.37.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= -google.golang.org/grpc v1.57.1 h1:upNTNqv0ES+2ZOOqACwVtS3Il8M12/+Hz41RCPzAjQg= -google.golang.org/grpc v1.57.1/go.mod h1:Sd+9RMTACXwmub0zcNY2c4arhtrbBYD1AUHI/dt16Mo= +google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk= +google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -1290,6 +1314,8 @@ k8s.io/kube-openapi v0.0.0-20220328201542-3ee0da9b0b42/go.mod h1:Z/45zLw8lUo4wdi k8s.io/utils v0.0.0-20210802155522-efc7438f0176/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= k8s.io/utils v0.0.0-20220210201930-3a6ce19ff2f9 h1:HNSDgDCrr/6Ly3WEGKZftiE7IY19Vz2GdbOCyI4qqhc= k8s.io/utils v0.0.0-20220210201930-3a6ce19ff2f9/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= +mellium.im/sasl v0.3.1 h1:wE0LW6g7U83vhvxjC1IY8DnXM+EU095yeo8XClvCdfo= +mellium.im/sasl v0.3.1/go.mod h1:xm59PUYpZHhgQ9ZqoJ5QaCqzWMi8IeS49dhp6plPCzw= nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= diff --git a/pkg/application/retriever/knowledgebaseretriever.go b/pkg/application/retriever/knowledgebaseretriever.go index 74a1fac2a..0bc3b1a43 100644 --- a/pkg/application/retriever/knowledgebaseretriever.go +++ b/pkg/application/retriever/knowledgebaseretriever.go @@ -26,7 +26,6 @@ import ( "github.com/tmc/langchaingo/chains" langchaingoschema "github.com/tmc/langchaingo/schema" "github.com/tmc/langchaingo/vectorstores" - "github.com/tmc/langchaingo/vectorstores/chroma" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" @@ -37,6 +36,7 @@ import ( "github.com/kubeagi/arcadia/api/base/v1alpha1" "github.com/kubeagi/arcadia/pkg/application/base" "github.com/kubeagi/arcadia/pkg/langchainwrap" + pkgvectorstore "github.com/kubeagi/arcadia/pkg/vectorstore" ) type Reference struct { @@ -133,21 +133,12 @@ func (l *KnowledgeBaseRetriever) Run(ctx context.Context, cli dynamic.Interface, if err != nil { return nil, fmt.Errorf("can't convert the vectorstore in cluster: %w", err) } - switch vectorStore.Spec.Type() { // nolint: gocritic - case v1alpha1.VectorStoreTypeChroma: - s, err := chroma.New( - chroma.WithChromaURL(vectorStore.Spec.Endpoint.URL), - chroma.WithDistanceFunction(vectorStore.Spec.Chroma.DistanceFunction), - chroma.WithNameSpace(knowledgebase.VectorStoreCollectionName()), - chroma.WithEmbedder(em), - ) - if err != nil { - return nil, err - } - l.Retriever = vectorstores.ToRetriever(s, instance.Spec.NumDocuments, vectorstores.WithScoreThreshold(instance.Spec.ScoreThreshold)) - default: - return nil, fmt.Errorf("unknown vectorstore type: %s", vectorStore.Spec.Type()) + var s vectorstores.VectorStore + s, _, err = pkgvectorstore.NewVectorStore(ctx, vectorStore, em, knowledgebase.VectorStoreCollectionName(), nil, cli) + if err != nil { + return nil, err } + l.Retriever = vectorstores.ToRetriever(s, instance.Spec.NumDocuments, vectorstores.WithScoreThreshold(instance.Spec.ScoreThreshold)) args["retriever"] = l return args, nil } diff --git a/pkg/datasource/oss.go b/pkg/datasource/oss.go index 2d0d3b770..07da1c8c8 100644 --- a/pkg/datasource/oss.go +++ b/pkg/datasource/oss.go @@ -28,16 +28,10 @@ import ( "github.com/minio/minio-go/v7" "github.com/minio/minio-go/v7/pkg/credentials" - corev1 "k8s.io/api/core/v1" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/dynamic" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/kubeagi/arcadia/api/base/v1alpha1" - "github.com/kubeagi/arcadia/pkg/utils" ) var ( @@ -64,30 +58,12 @@ func NewOSS(ctx context.Context, c client.Client, dc dynamic.Interface, endpoint if endpoint.AuthSecret.Namespace == nil { return nil, errors.New("no namespace found for endpoint.authsecret") } - if err := utils.ValidateClient(c, dc); err != nil { + data, err := endpoint.AuthData(ctx, *endpoint.AuthSecret.Namespace, c, dc) + if err != nil { return nil, err } - if dc != nil { - secret, err := dc.Resource(schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"}). - Namespace(*endpoint.AuthSecret.Namespace).Get(ctx, endpoint.AuthSecret.Name, v1.GetOptions{}) - if err != nil { - return nil, err - } - data, _, _ := unstructured.NestedStringMap(secret.Object, "data") - accessKeyID = utils.DecodeBase64Str(data["rootUser"]) - secretAccessKey = utils.DecodeBase64Str(data["rootPassword"]) - } - if c != nil { - secret := corev1.Secret{} - if err := c.Get(ctx, types.NamespacedName{ - Namespace: *endpoint.AuthSecret.Namespace, - Name: endpoint.AuthSecret.Name, - }, &secret); err != nil { - return nil, err - } - accessKeyID = string(secret.Data["rootUser"]) - secretAccessKey = string(secret.Data["rootPassword"]) - } + accessKeyID = string(data["rootUser"]) + secretAccessKey = string(data["rootPassword"]) } mc, err := minio.New(endpoint.URL, &minio.Options{ diff --git a/pkg/datasource/postgresql.go b/pkg/datasource/postgresql.go index 9fdaa0c5a..6486ba0ff 100644 --- a/pkg/datasource/postgresql.go +++ b/pkg/datasource/postgresql.go @@ -23,63 +23,70 @@ import ( "sync" "github.com/jackc/pgx/v5/pgxpool" - corev1 "k8s.io/api/core/v1" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/dynamic" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/kubeagi/arcadia/api/base/v1alpha1" - "github.com/kubeagi/arcadia/pkg/utils" ) var ( - _ Datasource = (*PostgreSQL)(nil) - locker sync.Mutex + _ Datasource = (*PostgreSQL)(nil) + locker sync.Mutex + poolsMutex sync.Mutex + pools = make(map[string]*PostgreSQL) ) +func GetPostgreSQLPool(ctx context.Context, c client.Client, dc dynamic.Interface, datasource *v1alpha1.Datasource) (*PostgreSQL, error) { + if datasource.Spec.Type() != v1alpha1.DatasourceTypePostgreSQL { + return nil, ErrUnknowDatasourceType + } + pg, ok := pools[string(datasource.GetUID())] + if ok && pg.Ref.GetGeneration() == datasource.GetGeneration() { + return pg, nil + } + pg, err := newPostgreSQL(ctx, c, dc, datasource.Spec.PostgreSQL, &datasource.Spec.Endpoint) + if err != nil { + return nil, err + } + pg.Ref = datasource.DeepCopy() + poolsMutex.Lock() + pools[string(datasource.GetUID())] = pg + poolsMutex.Unlock() + return pg, nil +} + +func RemovePostgreSQLPool(datasource v1alpha1.Datasource) { + pg, ok := pools[string(datasource.GetUID())] + if !ok { + return + } + pg.Pool.Close() + poolsMutex.Lock() + delete(pools, string(datasource.GetUID())) + poolsMutex.Unlock() +} + // PostgreSQL is a wrapper to PostgreSQL type PostgreSQL struct { *pgxpool.Pool + Ref *v1alpha1.Datasource } // NewPostgreSQL creates a new PostgreSQL pool -func NewPostgreSQL(ctx context.Context, c client.Client, dc dynamic.Interface, config *v1alpha1.PostgreSQL, endpoint *v1alpha1.Endpoint) (*PostgreSQL, error) { +func newPostgreSQL(ctx context.Context, c client.Client, dc dynamic.Interface, config *v1alpha1.PostgreSQL, endpoint *v1alpha1.Endpoint) (*PostgreSQL, error) { var pgUser, pgPassword, pgPassFile, pgSSLPassword string if endpoint.AuthSecret != nil { if endpoint.AuthSecret.Namespace == nil { return nil, errors.New("no namespace found for endpoint.authsecret") } - if err := utils.ValidateClient(c, dc); err != nil { + data, err := endpoint.AuthData(ctx, *endpoint.AuthSecret.Namespace, c, dc) + if err != nil { return nil, err } - if dc != nil { - secret, err := dc.Resource(schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"}). - Namespace(*endpoint.AuthSecret.Namespace).Get(ctx, endpoint.AuthSecret.Name, v1.GetOptions{}) - if err != nil { - return nil, err - } - data, _, _ := unstructured.NestedStringMap(secret.Object, "data") - pgUser = utils.DecodeBase64Str(data[v1alpha1.PGUSER]) - pgPassword = utils.DecodeBase64Str(data[v1alpha1.PGPASSWORD]) - pgPassFile = utils.DecodeBase64Str(data[v1alpha1.PGPASSFILE]) - pgSSLPassword = utils.DecodeBase64Str(data[v1alpha1.PGSSLPASSWORD]) - } - if c != nil { - secret := corev1.Secret{} - if err := c.Get(ctx, types.NamespacedName{ - Namespace: *endpoint.AuthSecret.Namespace, - Name: endpoint.AuthSecret.Name, - }, &secret); err != nil { - return nil, err - } - pgUser = string(secret.Data[v1alpha1.PGUSER]) - pgPassword = string(secret.Data[v1alpha1.PGPASSWORD]) - pgPassFile = string(secret.Data[v1alpha1.PGPASSFILE]) - pgSSLPassword = string(secret.Data[v1alpha1.PGSSLPASSWORD]) - } + pgUser = string(data[v1alpha1.PGUSER]) + pgPassword = string(data[v1alpha1.PGPASSWORD]) + pgPassFile = string(data[v1alpha1.PGPASSFILE]) + pgSSLPassword = string(data[v1alpha1.PGSSLPASSWORD]) } locker.Lock() defer locker.Unlock() @@ -191,7 +198,7 @@ func NewPostgreSQL(ctx context.Context, c client.Client, dc dynamic.Interface, c if err != nil { return nil, err } - return &PostgreSQL{pool}, nil + return &PostgreSQL{Pool: pool}, nil } func (p *PostgreSQL) Stat(ctx context.Context, _ any) error { diff --git a/pkg/embeddings/zhipuai/zhipuai.go b/pkg/embeddings/zhipuai/zhipuai.go index 848e0acc2..7565afbaf 100644 --- a/pkg/embeddings/zhipuai/zhipuai.go +++ b/pkg/embeddings/zhipuai/zhipuai.go @@ -21,6 +21,7 @@ import ( "strings" "github.com/tmc/langchaingo/embeddings" + "k8s.io/klog/v2" llmzhipuai "github.com/kubeagi/arcadia/pkg/llms/zhipuai" ) @@ -53,10 +54,12 @@ func (e ZhiPuAI) EmbedDocuments(ctx context.Context, texts []string) ([][]float3 emb := make([][]float32, 0, len(texts)) for _, batch := range batchedTexts { + klog.V(5).Infoln("try to create an embedding batch:", batch) curTextEmbeddings, err := e.client.CreateEmbedding(ctx, batch) if err != nil { return nil, err } + klog.V(5).Infoln("create an embedding batch done") emb = append(emb, curTextEmbeddings...) } diff --git a/pkg/llms/zhipuai/api.go b/pkg/llms/zhipuai/api.go index 5456c5f2e..7539cec6b 100644 --- a/pkg/llms/zhipuai/api.go +++ b/pkg/llms/zhipuai/api.go @@ -33,7 +33,7 @@ import ( const ( ZhipuaiModelAPIURL = "https://open.bigmodel.cn/api/paas/v3/model-api" - ZhipuaiModelDefaultTimeout = 300 * time.Second + ZhipuaiModelDefaultTimeout = 30 * time.Second RetryLimit = 3 ) @@ -198,7 +198,7 @@ func (z *ZhiPuAI) CreateEmbedding(ctx context.Context, inputTexts []string) ([][ retry++ if retry > 1 { time.Sleep(100 * time.Millisecond) - klog.Warning("retry embedding post quest:", retry) + klog.Warning("retry embedding post request:", retry) } postResponse, err = EmbeddingPost(url, token, EmbeddingText{ Prompt: text, diff --git a/pkg/utils/structured.go b/pkg/utils/structured.go index 361192c51..2dadef192 100644 --- a/pkg/utils/structured.go +++ b/pkg/utils/structured.go @@ -17,7 +17,6 @@ limitations under the License. package utils import ( - "encoding/base64" "encoding/json" "fmt" "reflect" @@ -58,11 +57,3 @@ func ValidateClient(c client.Client, cli dynamic.Interface) error { } return nil } - -func DecodeBase64Str(s string) string { - ds, err := base64.StdEncoding.DecodeString(s) - if err == nil { - return string(ds) - } - return "" -} diff --git a/pkg/vectorstore/vectorstore.go b/pkg/vectorstore/vectorstore.go new file mode 100644 index 000000000..4aaaa368f --- /dev/null +++ b/pkg/vectorstore/vectorstore.go @@ -0,0 +1,180 @@ +/* +Copyright 2024 KubeAGI. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vectorstore + +import ( + "context" + "errors" + + "github.com/go-logr/logr" + "github.com/tmc/langchaingo/embeddings" + "github.com/tmc/langchaingo/llms/openai" + "github.com/tmc/langchaingo/vectorstores" + "github.com/tmc/langchaingo/vectorstores/chroma" + "github.com/tmc/langchaingo/vectorstores/pgvector" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/dynamic" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" + + arcadiav1alpha1 "github.com/kubeagi/arcadia/api/base/v1alpha1" + "github.com/kubeagi/arcadia/pkg/datasource" + "github.com/kubeagi/arcadia/pkg/utils" +) + +var ( + ErrUnsupportedVectorStoreType = errors.New("unsupported vectorstore type") +) + +func NewVectorStore(ctx context.Context, vs *arcadiav1alpha1.VectorStore, embedder embeddings.Embedder, collectionName string, c client.Client, dc dynamic.Interface) (v vectorstores.VectorStore, finish func(), err error) { + switch vs.Spec.Type() { + case arcadiav1alpha1.VectorStoreTypeChroma: + ops := []chroma.Option{ + chroma.WithChromaURL(vs.Spec.Endpoint.URL), + chroma.WithDistanceFunction(vs.Spec.Chroma.DistanceFunction), + } + if embedder != nil { + ops = append(ops, chroma.WithEmbedder(embedder)) + } else { + ops = append(ops, chroma.WithOpenAiAPIKey("fake_key_just_for_chroma_heartbeat")) + } + if collectionName != "" { + ops = append(ops, chroma.WithNameSpace(collectionName)) + } + v, err = chroma.New(ops...) + case arcadiav1alpha1.VectorStoreTypePGVector: + ops := []pgvector.Option{ + pgvector.WithPreDeleteCollection(vs.Spec.PGVector.PreDeleteCollection), + } + if vs.Spec.PGVector.CollectionTableName != "" { + ops = append(ops, pgvector.WithCollectionTableName(vs.Spec.PGVector.CollectionTableName)) + } + if vs.Spec.PGVector.EmbeddingTableName != "" { + ops = append(ops, pgvector.WithEmbeddingTableName(vs.Spec.PGVector.EmbeddingTableName)) + } + if ref := vs.Spec.PGVector.DataSourceRef; ref != nil { + if err := utils.ValidateClient(c, dc); err != nil { + return nil, nil, err + } + ds := &arcadiav1alpha1.Datasource{} + if c != nil { + if err := c.Get(ctx, types.NamespacedName{Name: ref.Name, Namespace: ref.GetNamespace(vs.GetNamespace())}, ds); err != nil { + return nil, nil, err + } + } else { + obj, err := dc.Resource(schema.GroupVersionResource{Group: "arcadia.kubeagi.k8s.com.cn", Version: "v1alpha1", Resource: "datasources"}). + Namespace(ref.GetNamespace(vs.GetNamespace())).Get(ctx, ref.Name, metav1.GetOptions{}) + if err != nil { + return nil, nil, err + } + err = runtime.DefaultUnstructuredConverter.FromUnstructured(obj.UnstructuredContent(), ds) + if err != nil { + return nil, nil, err + } + } + vs.Spec.Endpoint = ds.Spec.Endpoint.DeepCopy() + pool, err := datasource.GetPostgreSQLPool(ctx, c, dc, ds) + if err != nil { + return nil, nil, err + } + conn, err := pool.Acquire(ctx) + if err != nil { + return nil, nil, err + } + klog.V(5).Info("acquire pg conn from pool") + finish = func() { + if conn != nil { + conn.Release() + klog.V(5).Info("release pg conn to pool") + } + } + ops = append(ops, pgvector.WithConn(conn.Conn())) + } else { + ops = append(ops, pgvector.WithConnectionURL(vs.Spec.Endpoint.URL)) + } + if embedder != nil { + ops = append(ops, pgvector.WithEmbedder(embedder)) + } else { + llm, _ := openai.New() + embedder, _ = embeddings.NewEmbedder(llm) + } + ops = append(ops, pgvector.WithEmbedder(embedder)) + if collectionName != "" { + ops = append(ops, pgvector.WithCollectionName(collectionName)) + } else { + ops = append(ops, pgvector.WithCollectionName(vs.Spec.PGVector.CollectionName)) + } + v, err = pgvector.New(ctx, ops...) + case arcadiav1alpha1.VectorStoreTypeUnknown: + fallthrough + default: + err = ErrUnsupportedVectorStoreType + } + return v, finish, err +} + +func RemoveCollection(ctx context.Context, log logr.Logger, vs *arcadiav1alpha1.VectorStore, collectionName string) (err error) { + switch vs.Spec.Type() { + case arcadiav1alpha1.VectorStoreTypeChroma: + ops := []chroma.Option{ + chroma.WithChromaURL(vs.Spec.Endpoint.URL), + chroma.WithDistanceFunction(vs.Spec.Chroma.DistanceFunction), + chroma.WithOpenAiAPIKey("fake_key_just_for_chroma_heartbeat"), + } + if collectionName != "" { + ops = append(ops, chroma.WithNameSpace(collectionName)) + } + v, err := chroma.New(ops...) + if err != nil { + log.Error(err, "reconcile delete: init vector store error, may leave garbage data") + return err + } + if err = v.RemoveCollection(); err != nil { + log.Error(err, "reconcile delete: remove vector store error, may leave garbage data") + return err + } + case arcadiav1alpha1.VectorStoreTypePGVector: + ops := []pgvector.Option{ + pgvector.WithConnectionURL(vs.Spec.Endpoint.URL), + pgvector.WithPreDeleteCollection(vs.Spec.PGVector.PreDeleteCollection), + pgvector.WithCollectionTableName(vs.Spec.PGVector.CollectionTableName), + } + if collectionName != "" { + ops = append(ops, pgvector.WithCollectionName(collectionName)) + } else { + ops = append(ops, pgvector.WithCollectionName(vs.Spec.PGVector.CollectionName)) + } + v, err := pgvector.New(ctx, ops...) + if err != nil { + log.Error(err, "reconcile delete: init vector store error, may leave garbage data") + return err + } + if err = v.RemoveCollection(ctx); err != nil { + log.Error(err, "reconcile delete: remove vector store error, may leave garbage data") + return err + } + + case arcadiav1alpha1.VectorStoreTypeUnknown: + fallthrough + default: + err = ErrUnsupportedVectorStoreType + } + return err +} diff --git a/tests/deploy-values.yaml b/tests/deploy-values.yaml index 81c5e62ac..f591f58e1 100644 --- a/tests/deploy-values.yaml +++ b/tests/deploy-values.yaml @@ -3,6 +3,7 @@ # @param imagePullPolcy ImagePullPolicy # @param resources Resources to be used controller: + loglevel: 5 image: kubeagi/arcadia:latest imagePullPolicy: IfNotPresent resources: @@ -119,5 +120,5 @@ postgresql: image: registry: docker.io repository: kubeagi/postgresql - tag: 16.1.0-debian-11-r15 + tag: 16.1.0-debian-11-r18-pgvector-v0.5.1 pullPolicy: IfNotPresent diff --git a/tests/example-test.sh b/tests/example-test.sh index bbcd765a9..100af1d31 100755 --- a/tests/example-test.sh +++ b/tests/example-test.sh @@ -24,7 +24,7 @@ fi export TERM=xterm-color KindName="kubeagi" -TimeoutSeconds=${TimeoutSeconds:-"600"} +TimeoutSeconds=${TimeoutSeconds:-"300"} HelmTimeout=${HelmTimeout:-"1800s"} KindVersion=${KindVersion:-"v1.24.4"} TempFilePath=${TempFilePath:-"/tmp/kubeagi-example-test"} @@ -244,6 +244,9 @@ fi info "6. verify default vectorstore" waitCRDStatusReady "VectorStore" "arcadia" "arcadia-vectorstore" +info "6.2 verify PGVector vectorstore" +kubectl apply -f config/samples/arcadia_v1alpha1_vectorstore_pgvector.yaml +waitCRDStatusReady "VectorStore" "arcadia" "pgvector-sample" info "7. create and verify knowledgebase" @@ -270,10 +273,15 @@ kubectl apply -f config/samples/arcadia_v1alpha1_embedders.yaml waitCRDStatusReady "Embedders" "arcadia" "zhipuai-embedders-sample" info "7.4 create knowledgebase and wait it ready" +info "7.4.1 create knowledgebase based on chroma and wait it ready" kubectl apply -f config/samples/arcadia_v1alpha1_knowledgebase.yaml waitCRDStatusReady "KnowledgeBase" "arcadia" "knowledgebase-sample" +sleep 3 +info "7.4.2 create knowledgebase based on pgvector and wait it ready" +kubectl apply -f config/samples/arcadia_v1alpha1_knowledgebase_pgvector.yaml +waitCRDStatusReady "KnowledgeBase" "arcadia" "knowledgebase-sample-pgvector" -info "7.5 check this vectorstore has data" +info "7.5 check chroma vectorstore has data" kubectl port-forward -n arcadia svc/arcadia-chromadb 8000:8000 >/dev/null 2>&1 & chroma_pid=$! info "port-forward chroma in pid: $chroma_pid" @@ -300,12 +308,19 @@ info "port-forward portal in pid: $portal_pid" sleep 3 getRespInAppChat "base-chat-english-teacher" "arcadia" "hi how are you?" "" "true" -info "8.2 QA app using knowledgebase" +info "8.2 QA app using knowledgebase base" +info "8.2.1 QA app using knowledgebase base on chroma" kubectl apply -f config/samples/app_retrievalqachain_knowledgebase.yaml waitCRDStatusReady "Application" "arcadia" "base-chat-with-knowledgebase" sleep 3 getRespInAppChat "base-chat-with-knowledgebase" "arcadia" "旷工最小计算单位为多少天?" "" "true" +info "8.2.2 QA app using knowledgebase base on pgvector" +kubectl apply -f config/samples/app_retrievalqachain_knowledgebase_pgvector.yaml +waitCRDStatusReady "Application" "arcadia" "base-chat-with-knowledgebase-pgvector" +sleep 3 +getRespInAppChat "base-chat-with-knowledgebase" "arcadia" "旷工最小计算单位为多少天?" "" "true" + info "8.3 conversation chat app" kubectl apply -f config/samples/app_llmchain_chat_with_bot.yaml waitCRDStatusReady "Application" "arcadia" "base-chat-with-bot"