Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add pgvector as vectorstore #522

Merged
merged 1 commit into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/codespell.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ jobs:
with:
ignore_words_file: .github/.codespellignore
check_filenames: true
skip: go.*,**/*.drawio,./deploy/charts/*,./config/crd/*,./deploy/llms/*
skip: go.*,**/*.drawio,./deploy/charts/*,./config/crd/*,./deploy/llms/*,./deploy/pgvector

1 change: 1 addition & 0 deletions .github/workflows/example_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ jobs:
# see https://github.com/actions/runner/issues/662
if: steps.cache.outputs.cache-hit != 'true'
- name: Setup Golang
if: steps.cache.outputs.cache-hit != 'true'
uses: actions/setup-go@v5
with:
go-version-file: 'go.mod'
Expand Down
58 changes: 58 additions & 0 deletions .github/workflows/pgvector_image_build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: Build pgvector images

on:
pull_request:
branches: [main]
push:
branches: [main]
paths:
- 'deploy/pgvector/Dockerfile'
workflow_dispatch:

# see deploy/pgvector/README.md first
env:
TAG: 16.1.0-debian-11-r18-pgvector-v0.5.1
Abirdcfly marked this conversation as resolved.
Show resolved Hide resolved

jobs:
image:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
buildkitd-flags: --debug
config-inline: |
[worker.oci]
max-parallelism = 1
- name: Login to the dockerhub Registry only push
if: github.event_name == 'push'
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: only Build image
if: github.event_name != 'push'
uses: docker/build-push-action@v5
with:
context: deploy/pgvector
file: deploy/pgvector/Dockerfile
platforms: linux/amd64
tags: |
kubeagi/postgresql:latest
kubeagi/postgresql:${{ env.TAG }}
push: false
load: true
- name: Build and export to Docker
if: github.event_name == 'push'
uses: docker/build-push-action@v5
with:
context: deploy/pgvector
file: deploy/pgvector/Dockerfile
platforms: linux/amd64,linux/arm64
tags: |
kubeagi/postgresql:latest
kubeagi/postgresql:${{ env.TAG }}
push: true
22 changes: 15 additions & 7 deletions api/base/v1alpha1/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,28 +183,36 @@ type CommonSpec struct {
Description string `json:"description,omitempty"`
}

func (endpoint Endpoint) AuthAPIKey(ctx context.Context, ns string, c client.Client, cli dynamic.Interface) (string, error) {
func (endpoint Endpoint) AuthData(ctx context.Context, ns string, c client.Client, cli dynamic.Interface) (map[string][]byte, error) {
if endpoint.AuthSecret == nil {
return "", nil
return nil, nil
}
if err := utils.ValidateClient(c, cli); err != nil {
return "", err
return nil, err
}
authSecret := &corev1.Secret{}
if c != nil {
if err := c.Get(ctx, types.NamespacedName{Name: endpoint.AuthSecret.Name, Namespace: endpoint.AuthSecret.GetNamespace(ns)}, authSecret); err != nil {
return "", err
return nil, err
}
} else {
obj, err := cli.Resource(schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"}).
Namespace(endpoint.AuthSecret.GetNamespace(ns)).Get(ctx, endpoint.AuthSecret.Name, metav1.GetOptions{})
if err != nil {
return "", err
return nil, err
}
err = runtime.DefaultUnstructuredConverter.FromUnstructured(obj.UnstructuredContent(), authSecret)
if err != nil {
return "", err
return nil, err
}
}
return string(authSecret.Data["apiKey"]), nil
return authSecret.Data, nil
}

func (endpoint Endpoint) AuthAPIKey(ctx context.Context, ns string, c client.Client, cli dynamic.Interface) (string, error) {
data, err := endpoint.AuthData(ctx, ns, c, cli)
if err != nil {
return "", err
}
return string(data["apiKey"]), nil
}
18 changes: 9 additions & 9 deletions api/base/v1alpha1/vectorstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,20 @@ const (
type VectorStoreType string

const (
VectorStoreTypeChroma VectorStoreType = "chroma"
VectorStoreTypeUnknown VectorStoreType = "unknown"
VectorStoreTypeChroma VectorStoreType = "chroma"
VectorStoreTypePGVector VectorStoreType = "pgvector"
VectorStoreTypeUnknown VectorStoreType = "unknown"
)

func (vs VectorStoreSpec) Type() VectorStoreType {
if vs.Endpoint == nil {
return VectorStoreTypeUnknown
}

if vs.Chroma != nil {
switch {
case vs.Chroma != nil:
return VectorStoreTypeChroma
case vs.PGVector != nil:
return VectorStoreTypePGVector
default:
return VectorStoreTypeUnknown
}

return VectorStoreTypeUnknown
}

func (vs *VectorStore) InitCondition() Condition {
Expand Down
15 changes: 15 additions & 0 deletions api/base/v1alpha1/vectorstore_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,28 @@ type VectorStoreSpec struct {
Endpoint *Endpoint `json:"endpoint,omitempty"`

Chroma *Chroma `json:"chroma,omitempty"`

PGVector *PGVector `json:"pgvector,omitempty"`
}

// Chroma defines the configuration of Chroma
type Chroma struct {
DistanceFunction chromago.DistanceFunction `json:"distanceFunction,omitempty"`
}

type PGVector struct {
// PreDeleteCollection defines if the collection should be deleted before creating.
PreDeleteCollection bool `json:"preDeleteCollection,omitempty"`
// CollectionName defines the name of the collection
CollectionName string `json:"collectionName,omitempty"`
bjwswang marked this conversation as resolved.
Show resolved Hide resolved
// EmbeddingTableName defines the name of the embedding table. if empty, use `langchain_pg_embedding`
EmbeddingTableName string `json:"embeddingTableName,omitempty"`
// CollectionTableName defines the name of the collection table. if empty, use `langchain_pg_collection`
CollectionTableName string `json:"collectionTableName,omitempty"`
// DataSourceRef defines the reference of the data source
DataSourceRef *TypedObjectReference `json:"dataSourceRef,omitempty"`
}

// VectorStoreStatus defines the observed state of VectorStore
type VectorStoreStatus struct {
// ConditionedStatus is the current status
Expand Down
25 changes: 25 additions & 0 deletions api/base/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

41 changes: 41 additions & 0 deletions config/crd/bases/arcadia.kubeagi.k8s.com.cn_vectorstores.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,47 @@ spec:
description: URL for this endpoint
type: string
type: object
pgvector:
properties:
collectionName:
description: CollectionName defines the name of the collection
type: string
collectionTableName:
description: CollectionTableName defines the name of the collection
table. if empty, use `langchain_pg_collection`
type: string
dataSourceRef:
description: DataSourceRef defines the reference of the data source
properties:
apiGroup:
description: APIGroup is the group for the resource being
referenced. If APIGroup is not specified, the specified
Kind must be in the core API group. For any other third-party
types, APIGroup is required.
type: string
kind:
description: Kind is the type of resource being referenced
type: string
name:
description: Name is the name of resource being referenced
type: string
namespace:
description: Namespace is the namespace of resource being
referenced
type: string
required:
- kind
- name
type: object
embeddingTableName:
description: EmbeddingTableName defines the name of the embedding
table. if empty, use `langchain_pg_embedding`
type: string
preDeleteCollection:
description: PreDeleteCollection defines if the collection should
be deleted before creating.
type: boolean
type: object
type: object
status:
description: VectorStoreStatus defines the observed state of VectorStore
Expand Down
63 changes: 63 additions & 0 deletions config/samples/app_retrievalqachain_knowledgebase_pgvector.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1
kind: Application
metadata:
name: base-chat-with-knowledgebase-pgvector
namespace: arcadia
spec:
displayName: "知识库应用"
description: "最简单的和知识库对话的应用"
prologue: "Welcome to talk to the KnowledgeBase!🤖"
nodes:
- name: Input
displayName: "用户输入"
description: "用户输入节点,必须"
ref:
kind: Input
name: Input
nextNodeName: ["prompt-node"]
- name: prompt-node
displayName: "prompt"
description: "设定prompt,template中可以使用{{xx}}来替换变量"
ref:
apiGroup: prompt.arcadia.kubeagi.k8s.com.cn
kind: Prompt
name: base-chat-with-knowledgebase
nextNodeName: ["chain-node"]
- name: llm-node
displayName: "zhipu大模型服务"
description: "设定大模型的访问信息"
ref:
apiGroup: arcadia.kubeagi.k8s.com.cn
kind: LLM
name: app-shared-llm-service
nextNodeName: ["chain-node"]
- name: knowledgebase-node
displayName: "使用的知识库"
description: "要用哪个知识库"
ref:
apiGroup: arcadia.kubeagi.k8s.com.cn
kind: KnowledgeBase
name: knowledgebase-sample-pgvector
nextNodeName: ["retriever-node"]
- name: retriever-node
displayName: "从知识库提取信息的retriever"
description: "连接应用和知识库"
ref:
apiGroup: retriever.arcadia.kubeagi.k8s.com.cn
kind: KnowledgeBaseRetriever
name: base-chat-with-knowledgebase
nextNodeName: ["chain-node"]
- name: chain-node
displayName: "RetrievalQA chain"
description: "chain是langchain的核心概念,RetrievalQAChain用于从 retriever 中提取信息,供llm调用"
ref:
apiGroup: chain.arcadia.kubeagi.k8s.com.cn
kind: RetrievalQAChain
name: base-chat-with-knowledgebase
nextNodeName: ["Output"]
- name: Output
displayName: "最终输出"
description: "最终输出节点,必须"
ref:
kind: Output
name: Output
3 changes: 2 additions & 1 deletion config/samples/arcadia_v1alpha1_datasource_postgresql.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
name: datasource-postgresql-sample-authsecret
namespace: arcadia
data:
PGUSER: YWRtaW4=
PGUSER: cG9zdGdyZXM=
PGPASSWORD: UGFzc3cwcmQh
---
apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1
Expand All @@ -20,5 +20,6 @@ spec:
authSecret:
kind: Secret
name: datasource-postgresql-sample-authsecret
namespace: arcadia
postgresql:
PGDATABASE: arcadia
23 changes: 23 additions & 0 deletions config/samples/arcadia_v1alpha1_knowledgebase_pgvector.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1
kind: KnowledgeBase
metadata:
name: knowledgebase-sample-pgvector
namespace: arcadia
spec:
displayName: "测试 KnowledgeBase"
description: "测试 KnowledgeBase"
embedder:
kind: Embedders
name: zhipuai-embedders-sample
namespace: arcadia
vectorStore:
kind: VectorStores
name: pgvector-sample
namespace: arcadia
fileGroups:
- source:
kind: VersionedDataset
name: dataset-playground-v1
namespace: arcadia
paths:
- qa.csv
14 changes: 14 additions & 0 deletions config/samples/arcadia_v1alpha1_vectorstore_pgvector.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1
kind: VectorStore
metadata:
name: pgvector-sample
namespace: arcadia
spec:
displayName: "测试 PGVector VectorStore"
description: "测试 PGvector VectorStore"
pgvector:
dataSourceRef:
apiGroup: arcadia.kubeagi.k8s.com.cn
kind: Datasource
name: arcadia-postgresql
namespace: arcadia
Loading