Skip to content

Commit

Permalink
feat: add pgvector as vectorstore
Browse files Browse the repository at this point in the history
Signed-off-by: Abirdcfly <fp544037857@gmail.com>
  • Loading branch information
Abirdcfly committed Jan 8, 2024
1 parent c206d1d commit 1c12849
Show file tree
Hide file tree
Showing 27 changed files with 614 additions and 196 deletions.
1 change: 1 addition & 0 deletions .github/workflows/example_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ jobs:
# see https://github.com/actions/runner/issues/662
if: steps.cache.outputs.cache-hit != 'true'
- name: Setup Golang
if: steps.cache.outputs.cache-hit != 'true'
uses: actions/setup-go@v5
with:
go-version-file: 'go.mod'
Expand Down
22 changes: 15 additions & 7 deletions api/base/v1alpha1/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,28 +183,36 @@ type CommonSpec struct {
Description string `json:"description,omitempty"`
}

func (endpoint Endpoint) AuthAPIKey(ctx context.Context, ns string, c client.Client, cli dynamic.Interface) (string, error) {
func (endpoint Endpoint) AuthData(ctx context.Context, ns string, c client.Client, cli dynamic.Interface) (map[string][]byte, error) {
if endpoint.AuthSecret == nil {
return "", nil
return nil, nil
}
if err := utils.ValidateClient(c, cli); err != nil {
return "", err
return nil, err
}
authSecret := &corev1.Secret{}
if c != nil {
if err := c.Get(ctx, types.NamespacedName{Name: endpoint.AuthSecret.Name, Namespace: endpoint.AuthSecret.GetNamespace(ns)}, authSecret); err != nil {
return "", err
return nil, err
}
} else {
obj, err := cli.Resource(schema.GroupVersionResource{Group: "", Version: "v1", Resource: "secrets"}).
Namespace(endpoint.AuthSecret.GetNamespace(ns)).Get(ctx, endpoint.AuthSecret.Name, metav1.GetOptions{})
if err != nil {
return "", err
return nil, err
}
err = runtime.DefaultUnstructuredConverter.FromUnstructured(obj.UnstructuredContent(), authSecret)
if err != nil {
return "", err
return nil, err
}
}
return string(authSecret.Data["apiKey"]), nil
return authSecret.Data, nil
}

func (endpoint Endpoint) AuthAPIKey(ctx context.Context, ns string, c client.Client, cli dynamic.Interface) (string, error) {
data, err := endpoint.AuthData(ctx, ns, c, cli)
if err != nil {
return "", err
}
return string(data["apiKey"]), nil
}
18 changes: 9 additions & 9 deletions api/base/v1alpha1/vectorstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,20 @@ const (
type VectorStoreType string

const (
VectorStoreTypeChroma VectorStoreType = "chroma"
VectorStoreTypeUnknown VectorStoreType = "unknown"
VectorStoreTypeChroma VectorStoreType = "chroma"
VectorStoreTypePGVector VectorStoreType = "pgvector"
VectorStoreTypeUnknown VectorStoreType = "unknown"
)

func (vs VectorStoreSpec) Type() VectorStoreType {
if vs.Endpoint == nil {
return VectorStoreTypeUnknown
}

if vs.Chroma != nil {
switch {
case vs.Chroma != nil:
return VectorStoreTypeChroma
case vs.PGVector != nil:
return VectorStoreTypePGVector
default:
return VectorStoreTypeUnknown
}

return VectorStoreTypeUnknown
}

func (vs *VectorStore) InitCondition() Condition {
Expand Down
15 changes: 15 additions & 0 deletions api/base/v1alpha1/vectorstore_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,28 @@ type VectorStoreSpec struct {
Endpoint *Endpoint `json:"endpoint,omitempty"`

Chroma *Chroma `json:"chroma,omitempty"`

PGVector *PGVector `json:"pgvector,omitempty"`
}

// Chroma defines the configuration of Chroma
type Chroma struct {
DistanceFunction chromago.DistanceFunction `json:"distanceFunction,omitempty"`
}

type PGVector struct {
// PreDeleteCollection defines if the collection should be deleted before creating.
PreDeleteCollection bool `json:"preDeleteCollection,omitempty"`
// CollectionName defines the name of the collection
CollectionName string `json:"collectionName,omitempty"`
// EmbeddingTableName defines the name of the embedding table. if empty, use `langchain_pg_embedding`
EmbeddingTableName string `json:"embeddingTableName,omitempty"`
// CollectionTableName defines the name of the collection table. if empty, use `langchain_pg_collection`
CollectionTableName string `json:"collectionTableName,omitempty"`
// DataSourceRef defines the reference of the data source
DataSourceRef *TypedObjectReference `json:"dataSourceRef,omitempty"`
}

// VectorStoreStatus defines the observed state of VectorStore
type VectorStoreStatus struct {
// ConditionedStatus is the current status
Expand Down
25 changes: 25 additions & 0 deletions api/base/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

41 changes: 41 additions & 0 deletions config/crd/bases/arcadia.kubeagi.k8s.com.cn_vectorstores.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,47 @@ spec:
description: URL for this endpoint
type: string
type: object
pgvector:
properties:
collectionName:
description: CollectionName defines the name of the collection
type: string
collectionTableName:
description: CollectionTableName defines the name of the collection
table. if empty, use `langchain_pg_collection`
type: string
dataSourceRef:
description: DataSourceRef defines the reference of the data source
properties:
apiGroup:
description: APIGroup is the group for the resource being
referenced. If APIGroup is not specified, the specified
Kind must be in the core API group. For any other third-party
types, APIGroup is required.
type: string
kind:
description: Kind is the type of resource being referenced
type: string
name:
description: Name is the name of resource being referenced
type: string
namespace:
description: Namespace is the namespace of resource being
referenced
type: string
required:
- kind
- name
type: object
embeddingTableName:
description: EmbeddingTableName defines the name of the embedding
table. if empty, use `langchain_pg_embedding`
type: string
preDeleteCollection:
description: PreDeleteCollection defines if the collection should
be deleted before creating.
type: boolean
type: object
type: object
status:
description: VectorStoreStatus defines the observed state of VectorStore
Expand Down
63 changes: 63 additions & 0 deletions config/samples/app_retrievalqachain_knowledgebase_pgvector.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1
kind: Application
metadata:
name: base-chat-with-knowledgebase-pgvector
namespace: arcadia
spec:
displayName: "知识库应用"
description: "最简单的和知识库对话的应用"
prologue: "Welcome to talk to the KnowledgeBase!🤖"
nodes:
- name: Input
displayName: "用户输入"
description: "用户输入节点,必须"
ref:
kind: Input
name: Input
nextNodeName: ["prompt-node"]
- name: prompt-node
displayName: "prompt"
description: "设定prompt,template中可以使用{{xx}}来替换变量"
ref:
apiGroup: prompt.arcadia.kubeagi.k8s.com.cn
kind: Prompt
name: base-chat-with-knowledgebase
nextNodeName: ["chain-node"]
- name: llm-node
displayName: "zhipu大模型服务"
description: "设定大模型的访问信息"
ref:
apiGroup: arcadia.kubeagi.k8s.com.cn
kind: LLM
name: app-shared-llm-service
nextNodeName: ["chain-node"]
- name: knowledgebase-node
displayName: "使用的知识库"
description: "要用哪个知识库"
ref:
apiGroup: arcadia.kubeagi.k8s.com.cn
kind: KnowledgeBase
name: knowledgebase-sample-pgvector
nextNodeName: ["retriever-node"]
- name: retriever-node
displayName: "从知识库提取信息的retriever"
description: "连接应用和知识库"
ref:
apiGroup: retriever.arcadia.kubeagi.k8s.com.cn
kind: KnowledgeBaseRetriever
name: base-chat-with-knowledgebase
nextNodeName: ["chain-node"]
- name: chain-node
displayName: "RetrievalQA chain"
description: "chain是langchain的核心概念,RetrievalQAChain用于从 retriever 中提取信息,供llm调用"
ref:
apiGroup: chain.arcadia.kubeagi.k8s.com.cn
kind: RetrievalQAChain
name: base-chat-with-knowledgebase
nextNodeName: ["Output"]
- name: Output
displayName: "最终输出"
description: "最终输出节点,必须"
ref:
kind: Output
name: Output
3 changes: 2 additions & 1 deletion config/samples/arcadia_v1alpha1_datasource_postgresql.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@ spec:
authSecret:
kind: Secret
name: datasource-postgresql-sample-authsecret
namespace: arcadia
postgresql:
PGDATABASE: arcadia
PGDATABASE: admin
23 changes: 23 additions & 0 deletions config/samples/arcadia_v1alpha1_knowledgebase_pgvector.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1
kind: KnowledgeBase
metadata:
name: knowledgebase-sample-pgvector
namespace: arcadia
spec:
displayName: "测试 KnowledgeBase"
description: "测试 KnowledgeBase"
embedder:
kind: Embedders
name: zhipuai-embedders-sample
namespace: arcadia
vectorStore:
kind: VectorStores
name: pgvector-sample
namespace: arcadia
fileGroups:
- source:
kind: VersionedDataset
name: dataset-playground-v1
namespace: arcadia
paths:
- qa.csv
14 changes: 14 additions & 0 deletions config/samples/arcadia_v1alpha1_vectorstore_pgvector.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1
kind: VectorStore
metadata:
name: pgvector-sample
namespace: arcadia
spec:
displayName: "测试 PGVector VectorStore"
description: "测试 PGvector VectorStore"
pgvector:
dataSourceRef:
apiGroup: arcadia.kubeagi.k8s.com.cn
kind: Datasource
name: arcadia-postgresql
namespace: arcadia
15 changes: 13 additions & 2 deletions controllers/datasource_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func (r *DatasourceReconciler) Reconcile(ctx context.Context, req ctrl.Request)
// indicated by the deletion timestamp being set.
if instance.GetDeletionTimestamp() != nil && controllerutil.ContainsFinalizer(instance, arcadiav1alpha1.Finalizer) {
logger.Info("Performing Finalizer Operations for Datasource before delete CR")
// TODO perform the finalizer operations here, for example: remove data?
r.RemoveDatasource(logger, instance)
logger.Info("Removing Finalizer for Datasource after successfully performing the operations")
controllerutil.RemoveFinalizer(instance, arcadiav1alpha1.Finalizer)
if err := r.Update(ctx, instance); err != nil {
Expand Down Expand Up @@ -172,7 +172,7 @@ func (r *DatasourceReconciler) Checkdatasource(ctx context.Context, logger logr.
case arcadiav1alpha1.DatasourceTypeRDMA:
return r.UpdateStatus(ctx, instance, nil)
case arcadiav1alpha1.DatasourceTypePostgreSQL:
ds, err = datasource.NewPostgreSQL(ctx, r.Client, nil, instance.Spec.PostgreSQL, endpoint)
ds, err = datasource.GetPostgreSQLPool(ctx, r.Client, nil, instance)
if err != nil {
return r.UpdateStatus(ctx, instance, err)
}
Expand Down Expand Up @@ -219,3 +219,14 @@ func (r *DatasourceReconciler) UpdateStatus(ctx context.Context, instance *arcad
instanceCopy.Status.SetConditions(newCondition)
return r.Client.Status().Update(ctx, instanceCopy)
}

func (r *DatasourceReconciler) RemoveDatasource(logger logr.Logger, instance *arcadiav1alpha1.Datasource) {
logger.V(5).Info("remove datasource")
switch instance.Spec.Type() {
case arcadiav1alpha1.DatasourceTypeOSS:
case arcadiav1alpha1.DatasourceTypeRDMA:
case arcadiav1alpha1.DatasourceTypePostgreSQL:
datasource.RemovePostgreSQLPool(*instance)
default:
}
}
Loading

0 comments on commit 1c12849

Please sign in to comment.