Skip to content

Commit

Permalink
feat(worker): add RunnerKubeAGI(developed by kubeagi team) to host re…
Browse files Browse the repository at this point in the history
…ranking models

Signed-off-by: bjwswang <bjwswang@gmail.com>
  • Loading branch information
bjwswang committed Mar 4, 2024
1 parent ca933d5 commit 65aa753
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 1 deletion.
1 change: 1 addition & 0 deletions api/base/v1alpha1/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type WorkerType string
const (
WorkerTypeFastchatNormal WorkerType = "fastchat"
WorkerTypeFastchatVLLM WorkerType = "fastchat-vllm"
WorkerTypeKubeAGI WorkerType = "kubeagi"
WorkerTypeUnknown WorkerType = "unknown"
)

Expand Down
32 changes: 32 additions & 0 deletions config/samples/arcadia_v1alpha1_worker_reranking.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1
kind: Model
metadata:
name: bge-reranker-large
namespace: arcadia
spec:
displayName: "bge-reranker-large"
description: |
bge-raranker-large 是一个通用reranking,由北京智源人工智能研究院(BAAI)推出,同时支持中英文
官网: https://www.baai.ac.cn/
Github: https://github.com/FlagOpen/FlagEmbedding
HuggingFace: https://huggingface.co/BAAI/bge-reranker-large
arXiv: https://arxiv.org/pdf/2309.07597
北京智源人工智能研究院是北京大学的直属研究机构,主要从事人工智能的数理基础、机器学习、智能信息检索与挖掘、智能体系架构与芯片、自然语言处理等领域研究。
types: "reranking"
huggingFaceRepo: BAAI/bge-reranker-large
---
apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1
kind: Worker
metadata:
name: bge-reranker-large
namespace: arcadia
spec:
displayName: BGE Reranking模型
description: "bge-raranker-large 是一个通用reranking,由北京智源人工智能研究院(BAAI)推出"
type: "kubeagi"
model:
kind: "Models"
name: "bge-reranker-large"
replicas: 1
67 changes: 67 additions & 0 deletions pkg/worker/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,16 @@ const (
// tag is the same version as fastchat
defaultFastChatImage = "kubeagi/arcadia-fastchat-worker:v0.2.36"
defaultFastchatVLLMImage = "kubeagi/arcadia-fastchat-worker:vllm-v0.2.36"
// defaultKubeAGIImage for RunnerKubeAGI
defaultKubeAGIImage = "kubeagi/core-library-cli:v0.0.1"
)

// ModelRunner run a model service
type ModelRunner interface {
// Device used when running model
Device() Device
// NumberOfGPUs used when running model
NumberOfGPUs() string
// Build a model runner instance
Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)
}
Expand All @@ -63,6 +67,7 @@ func NewRunnerFastchat(c client.Client, w *arcadiav1alpha1.Worker, modelFileFrom
}, nil
}

// Device utilized by this runner
func (runner *RunnerFastchat) Device() Device {
return DeviceBasedOnResource(runner.w.Spec.Resources.Limits)
}
Expand Down Expand Up @@ -255,3 +260,65 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp

return container, nil
}

var _ ModelRunner = (*KubeAGIRunner)(nil)

// KubeAGIRunner utilizes core-library-cli(https://github.com/kubeagi/core-library/tree/main/libs/cli) to run model services
// Mainly for reranking,whisper,etc..
type KubeAGIRunner struct {
c client.Client
w *arcadiav1alpha1.Worker
}

func NewKubeAGIRunner(c client.Client, w *arcadiav1alpha1.Worker) (ModelRunner, error) {
return &KubeAGIRunner{
c: c,
w: w,
}, nil
}

// Device used when running model
func (runner *KubeAGIRunner) Device() Device {
return DeviceBasedOnResource(runner.w.Spec.Resources.Limits)
}

// NumberOfGPUs utilized by this runner
func (runner *KubeAGIRunner) NumberOfGPUs() string {
return NumberOfGPUs(runner.w.Spec.Resources.Limits)
}

// Build a model runner instance
func (runner *KubeAGIRunner) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error) {
if model == nil {
return nil, errors.New("nil model")
}

img := defaultKubeAGIImage
if runner.w.Spec.Runner.Image != "" {
img = runner.w.Spec.Runner.Image
}

// read worker address
mountPath := "/data/models"
container := &corev1.Container{
Name: "runner",
Image: img,
ImagePullPolicy: runner.w.Spec.Runner.ImagePullPolicy,
Command: []string{
"python", "kubeagi_cli/cli.py", "serve", "--host", "0.0.0.0", "--port", "21002",
},
Env: []corev1.EnvVar{
// Only reranking supported for now
{Name: "RERANKING_MODEL_PATH", Value: fmt.Sprintf("%s/%s", mountPath, model.Name)},
},
Ports: []corev1.ContainerPort{
{Name: "http", ContainerPort: 21002},
},
VolumeMounts: []corev1.VolumeMount{
{Name: "models", MountPath: mountPath},
},
Resources: runner.w.Spec.Resources,
}

return container, nil
}
13 changes: 12 additions & 1 deletion pkg/worker/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,6 @@ func NewPodWorker(ctx context.Context, c client.Client, s *runtime.Scheme, w *ar
return nil, fmt.Errorf("datasource %s with type %s not supported in worker", d.Name, d.Spec.Type())
}

// init runner
return podWorker, nil
}

Expand All @@ -245,6 +244,12 @@ func (podWorker *PodWorker) Model() *arcadiav1alpha1.Model {
// Now we have a pvc(if configured), service, LLM(if a llm model), Embedder(if a embedding model)
func (podWorker *PodWorker) BeforeStart(ctx context.Context) error {
var err error

// Capability Checks
if podWorker.Model().IsRerankingModel() && podWorker.Worker().Type() != arcadiav1alpha1.WorkerTypeKubeAGI {
return errors.New("only kubeagi runner can host reranking models")
}

// If the local directory is mounted, there is no need to create the pvc
if podWorker.Worker().Spec.Storage != nil && podWorker.storage.HostPath == nil {
pvc := &corev1.PersistentVolumeClaim{
Expand Down Expand Up @@ -387,6 +392,12 @@ func (podWorker *PodWorker) Start(ctx context.Context) error {
return fmt.Errorf("failed to new a runner with %w", err)
}
podWorker.r = r
case arcadiav1alpha1.WorkerTypeKubeAGI:
r, err := NewKubeAGIRunner(podWorker.c, podWorker.w.DeepCopy())
if err != nil {
return fmt.Errorf("failed to new a runner with %w", err)
}
podWorker.r = r
default:
return fmt.Errorf("worker %s with type %s not supported in worker", podWorker.w.Name, podWorker.w.Type())
}
Expand Down

0 comments on commit 65aa753

Please sign in to comment.