Skip to content

Commit

Permalink
feat: #427 support to run models using ray cluster
Browse files Browse the repository at this point in the history
  • Loading branch information
nkwangleiGIT committed Jan 5, 2024
1 parent 2833016 commit 8d83804
Show file tree
Hide file tree
Showing 17 changed files with 321 additions and 56 deletions.
4 changes: 2 additions & 2 deletions ROADMAP.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## Roadmap of KubeAGI

### v0.1.0 - 2023 Q4, will release soon
### v0.1.0 - 2023 Q4 Released

* Dataset Management - manage data, including local files, integrate with object storage(s3), data editing, version control, and file download
* Data Processing - data cleaning, text splitting (e.g., text segmentation, QA splitting), file labeling
Expand All @@ -14,7 +14,7 @@
* LLM Applications - prompt engineering, initial implementation of LLM application orchestration capabilities. Manage and orchestrate Prompt, LLM/Retriever Chain nodes, and provide relevant example applications (based on streamlit)
* Guided walkthroughs and example scenarios - let the user get started to build LLM application quickly, add momre built-in chat example applications

### v0.2.0 - 2024 Feb.
### v0.2.0 - 2024 Feb. Ongoing
* Support evaluation of Prompts under different LLMs and generate test reports.
* RAG evaluation and RAG Question Generation
- Optimize question generation, analyze question quality, filter out low-similarity questions
Expand Down
4 changes: 2 additions & 2 deletions ROADMAP_cn.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## Roadmap of KubeAGI

### v0.1.0 - 2023 Q4, 即将发布
### v0.1.0 - 2023 Q4, 已发布

* 数据集管理 - 对数据进行管理,包括本地上传、对接对象存储、数据编辑、版本控制、下载等
* 数据处理 - 数据清洗、文本拆分(文本分段、QA拆分)、文件标签
Expand All @@ -14,7 +14,7 @@
* 模型应用 - Prompt Engineering,初步实现 LLM 应用的编排能力,支持 Prompt、LLM/Retriever Chain节点的管理及编排,提供相关示例应用(基于 streamlit)
* 引导、示例场景 - 通过向导方式引导用户平台使用流程;内置 chat 示例应用

### v0.2.0 - 2024 Feb.
### v0.2.0 - 2024 Feb. 进行中...
* 支持 Prompt 在不同 LLM 下的评估,生成测试报告
* RAG 评估、RAG Question Generation
- 优化问题自动生成,分析问题质量,过滤掉相似度不高的问题
Expand Down
6 changes: 6 additions & 0 deletions api/base/v1alpha1/worker_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ type WorkerSpec struct {
// - Memory
Resources corev1.ResourceRequirements `json:"resources,omitempty"`

// NodeSelectorRequirement to schedule this worker
MatchExpressions []corev1.NodeSelectorRequirement `json:"matchExpressions,omitempty"`

// Additional env to use
AdditionalEnvs []corev1.EnvVar `json:"additionalEnvs,omitempty"`

// Storage claimed to store model files
Storage *corev1.PersistentVolumeClaimSpec `json:"storage,omitempty"`
}
Expand Down
14 changes: 14 additions & 0 deletions api/base/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

138 changes: 138 additions & 0 deletions config/crd/bases/arcadia.kubeagi.k8s.com.cn_workers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,115 @@ spec:
spec:
description: WorkerSpec defines the desired state of Worker
properties:
additionalEnvs:
description: Additional env to use
items:
description: EnvVar represents an environment variable present in
a Container.
properties:
name:
description: Name of the environment variable. Must be a C_IDENTIFIER.
type: string
value:
description: 'Variable references $(VAR_NAME) are expanded using
the previously defined environment variables in the container
and any service environment variables. If a variable cannot
be resolved, the reference in the input string will be unchanged.
Double $$ are reduced to a single $, which allows for escaping
the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will produce the
string literal "$(VAR_NAME)". Escaped references will never
be expanded, regardless of whether the variable exists or
not. Defaults to "".'
type: string
valueFrom:
description: Source for the environment variable's value. Cannot
be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the ConfigMap or its key
must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
fieldRef:
description: 'Selects a field of the pod: supports metadata.name,
metadata.namespace, `metadata.labels[''<KEY>'']`, `metadata.annotations[''<KEY>'']`,
spec.nodeName, spec.serviceAccountName, status.hostIP,
status.podIP, status.podIPs.'
properties:
apiVersion:
description: Version of the schema the FieldPath is
written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in the specified
API version.
type: string
required:
- fieldPath
type: object
x-kubernetes-map-type: atomic
resourceFieldRef:
description: 'Selects a resource of the container: only
resources limits and requests (limits.cpu, limits.memory,
limits.ephemeral-storage, requests.cpu, requests.memory
and requests.ephemeral-storage) are currently supported.'
properties:
containerName:
description: 'Container name: required for volumes,
optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the exposed
resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
x-kubernetes-map-type: atomic
secretKeyRef:
description: Selects a key of a secret in the pod's namespace
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
type: object
required:
- name
type: object
type: array
creator:
description: Creator defines datasource creator (AUTO-FILLED by webhook)
type: string
Expand All @@ -51,6 +160,35 @@ spec:
displayName:
description: DisplayName defines datasource display name
type: string
matchExpressions:
description: NodeSelectorRequirement to schedule this worker
items:
description: A node selector requirement is a selector that contains
values, a key, and an operator that relates the key and values.
properties:
key:
description: The label key that the selector applies to.
type: string
operator:
description: Represents a key's relationship to a set of values.
Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and
Lt.
type: string
values:
description: An array of string values. If the operator is In
or NotIn, the values array must be non-empty. If the operator
is Exists or DoesNotExist, the values array must be empty.
If the operator is Gt or Lt, the values array must have a
single element, which will be interpreted as an integer. This
array is replaced during a strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
model:
description: Model this worker wants to use
properties:
Expand Down
2 changes: 1 addition & 1 deletion deploy/charts/arcadia/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: v2
name: arcadia
description: A Helm chart(KubeBB Component) for KubeAGI Arcadia
type: application
version: 0.2.2
version: 0.2.3
appVersion: "0.1.0"

keywords:
Expand Down
9 changes: 4 additions & 5 deletions deploy/charts/arcadia/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ global:
# @param imagePullPolcy ImagePullPolicy
# @param resources Resources to be used
controller:
image: kubeagi/arcadia:v0.1.0-20231230-c8518fb
image: kubeagi/arcadia:latest
imagePullPolicy: IfNotPresent
resources:
limits:
Expand All @@ -20,7 +20,7 @@ controller:
# @section graphql and bff server
# related project: https://github.com/kubeagi/arcadia/tree/main/apiserver
apiserver:
image: kubeagi/arcadia:v0.1.0-20231230-c8518fb
image: kubeagi/arcadia:latest
enableplayground: false
port: 8081
ingress:
Expand All @@ -38,7 +38,7 @@ apiserver:
portal:
enabled: true
kubebbEnabled: true
image: kubeagi/portal:v0.1.0-20231229-f7b210e
image: kubeagi/ops-console:v0.1.0-20240105-6658717
port: 80
ingress:
path: kubeagi-portal-public
Expand All @@ -48,7 +48,6 @@ portal:
fastchat:
image:
repository: kubeagi/arcadia-fastchat
# Use either v0.1.0 or vllm-v0.1.0 (with vllm enabled)
tag: v0.1.0
ingress:
enabled: true
Expand Down Expand Up @@ -113,7 +112,7 @@ chromadb:
# Related project: https://github.com/kubeagi/arcadia/tree/main/data-processing
dataprocess:
enabled: true
image: kubeagi/data-processing:v0.1.0-20231230-c8518fb
image: kubeagi/data-processing:latest
port: 28888
config:
llm:
Expand Down
2 changes: 1 addition & 1 deletion deploy/charts/llm-worker/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ image:
repository: kubeagi/arcadia-fastchat-worker
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: "v0.1.0"
tag: "v0.2.0"
env:
- name: FASTCHAT_MODEL_NAME
value: "baichuan2-7b"
Expand Down
3 changes: 3 additions & 0 deletions deploy/llms/Dockerfile.fastchat-worker
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,6 @@ RUN python3.9 -m pip install einops scipy transformers_stream_generator==0.0.4 d

# Install requirements for Qutantization with auto-gptq
RUN python3.9 -m pip install auto-gptq optimum -i ${PYTHON_INDEX_URL}

COPY deploy/llms/start-worker.sh /
ENTRYPOINT ["/start-worker.sh"]
8 changes: 0 additions & 8 deletions deploy/llms/Dockerfile.fastchat-worker-ray

This file was deleted.

7 changes: 6 additions & 1 deletion deploy/llms/Dockerfile.fastchat-worker-vllm
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
ARG BASE_IMAGE_VERSION="v0.1.0"
ARG BASE_IMAGE_VERSION="v0.2.0"
FROM kubeagi/arcadia-fastchat-worker:${BASE_IMAGE_VERSION}

# Official: https://pypi.org/simple
ARG PYTHON_INDEX_URL="https://pypi.mirrors.ustc.edu.cn/simple/"
# Install requirements for vllm worker
RUN python3.9 -m pip install vllm -i ${PYTHON_INDEX_URL}

# Allow to use environment variable to set ray & python version to pass the version check
# for now, ray: 2.9.0, python: 3.9.x
# this utils.py file is from ray 2.9.0 ray-ml image
# search 'KubeAGI' in utils.py for what's changed
COPY utils.py /usr/local/lib/python3.9/dist-packages/ray/_private/utils.py
34 changes: 34 additions & 0 deletions deploy/llms/start-worker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash
#
# Copyright contributors to the KubeAGI project
#
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Start ray worker if configured
if [[ $RAY_ADDRESS != "" ]]; then
echo "Run Ray worker..."
ray start --address=$RAY_ADDRESS
# wait for ray worker's resource to be available
# TODO: maybe have better way to do this
sleep 5
fi

echo "Run model worker..."
python3.9 -m $FASTCHAT_WORKER_NAME --model-names $FASTCHAT_REGISTRATION_MODEL_NAME \
--model-path /data/models/$FASTCHAT_MODEL_NAME --worker-address $FASTCHAT_WORKER_ADDRESS \
--controller-address $FASTCHAT_CONTROLLER_ADDRESS \
--num-gpus $NUMBER_GPUS \
--host 0.0.0.0 --port 21002 $EXTRA_ARGS
13 changes: 13 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ var (
ErrNoConfigMinIO = fmt.Errorf("config MinIO in comfigmap is not found")
ErrNoConfigVectorstore = fmt.Errorf("config Vectorstore in comfigmap is not found")
ErrNoConfigStreamlit = fmt.Errorf("config Streamlit in comfigmap is not found")
ErrNoConfigRayClusters = fmt.Errorf("config RayClusters in comfigmap is not found")
)

func GetSystemDatasource(ctx context.Context, c client.Client, cli dynamic.Interface) (*arcadiav1alpha1.Datasource, error) {
Expand Down Expand Up @@ -146,3 +147,15 @@ func GetStreamlit(ctx context.Context, c client.Client) (*Streamlit, error) {
}
return config.Streamlit, nil
}

// Get the ray cluster that can be used a resource pool
func GetRayClusters(ctx context.Context, c client.Client) ([]RayCluster, error) {
config, err := GetConfig(ctx, c, nil)
if err != nil {
return nil, err
}
if config.RayClusters == nil {
return nil, ErrNoConfigRayClusters
}
return config.RayClusters, nil
}
Loading

0 comments on commit 8d83804

Please sign in to comment.