Skip to content

Commit

Permalink
Merge pull request #520 from bjwswang/ray
Browse files Browse the repository at this point in the history
feat: able to list rayclusters configured in arcadia config
  • Loading branch information
bjwswang committed Jan 8, 2024
2 parents 5b76328 + 01f1700 commit c206d1d
Show file tree
Hide file tree
Showing 12 changed files with 782 additions and 12 deletions.
600 changes: 599 additions & 1 deletion apiserver/graph/generated/generated.go

Large diffs are not rendered by default.

22 changes: 22 additions & 0 deletions apiserver/graph/generated/models_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion apiserver/graph/impl/knowledgebase.resolvers.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 35 additions & 0 deletions apiserver/graph/impl/raycluster.resolvers.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion apiserver/graph/schema/entrypoint.graphqls
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,4 @@ type TypedObjectReference {
namespace: String
}

union PageNode = Datasource | Model | Embedder | KnowledgeBase | Dataset | VersionedDataset | F | Worker | ApplicationMetadata | LLM | ModelService
union PageNode = Datasource | Model | Embedder | KnowledgeBase | Dataset | VersionedDataset | F | Worker | ApplicationMetadata | LLM | ModelService | RayCluster
19 changes: 19 additions & 0 deletions apiserver/graph/schema/raycluster.gql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# list
query listRayClusters($input: ListCommonInput!){
RayCluster {
listRayClusters(input: $input) {
totalCount
hasNextPage
nodes {
__typename
... on RayCluster {
index
name
headAddress
dashboardHost
pythonVersion
}
}
}
}
}
35 changes: 35 additions & 0 deletions apiserver/graph/schema/raycluster.graphqls
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@

"""RayCluster集群"""
type RayCluster {
"""
Ray集群的索引
"""
index: Int!
"""
名称
规则: 遵循k8s命名
"""
name: String!
"""
Ray集群head节点的地址
规则: 遵循k8s命名
"""
headAddress: String
"""
Ray集群dashboard的地址
"""
dashboardHost: String
"""
Ray集群应用要求的python版本
"""
pythonVersion: String
}


type RayClusterQuery {
listRayClusters(input: ListCommonInput!): PaginatedResult!
}

extend type Query {
RayCluster: RayClusterQuery
}
57 changes: 57 additions & 0 deletions apiserver/pkg/ray/raycluster.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
Copyright 2023 KubeAGI.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package ray

import (
"context"
"strings"

"k8s.io/client-go/dynamic"

"github.com/kubeagi/arcadia/apiserver/graph/generated"
"github.com/kubeagi/arcadia/pkg/config"
)

func ListRayClusters(ctx context.Context, c dynamic.Interface, input generated.ListCommonInput) (*generated.PaginatedResult, error) {
clusters, err := config.GetRayClusters(ctx, nil, c)
if err != nil {
return nil, err
}

var results = make([]generated.PageNode, 0, len(clusters))
for index, cluster := range clusters {
// skip if keyword not in cluster name
if input.Keyword != nil {
if !strings.Contains(cluster.Name, *input.Keyword) {
continue
}
}

results = append(results, &generated.RayCluster{
Index: index,
Name: cluster.Name,
HeadAddress: &cluster.HeadAddress,
DashboardHost: &cluster.DashboardHost,
PythonVersion: &cluster.PythonVersion,
})
}

return &generated.PaginatedResult{
TotalCount: len(results),
Nodes: results,
}, nil
}
4 changes: 4 additions & 0 deletions gqlgen.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -258,3 +258,7 @@ models:
resolver: true
checkModelService:
resolver: true
RayClusterQuery:
fields:
listRayClusters:
resolver: true
12 changes: 6 additions & 6 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ func GetConfig(ctx context.Context, c client.Client, cli dynamic.Interface) (con
return config, nil
}

func GetVectorStore(ctx context.Context, c dynamic.Interface) (*arcadiav1alpha1.TypedObjectReference, error) {
config, err := GetConfig(ctx, nil, c)
func GetVectorStore(ctx context.Context, c client.Client, cli dynamic.Interface) (*arcadiav1alpha1.TypedObjectReference, error) {
config, err := GetConfig(ctx, c, cli)
if err != nil {
return nil, err
}
Expand All @@ -137,8 +137,8 @@ func GetVectorStore(ctx context.Context, c dynamic.Interface) (*arcadiav1alpha1.
}

// Get the configuration of streamlit tool
func GetStreamlit(ctx context.Context, c client.Client) (*Streamlit, error) {
config, err := GetConfig(ctx, c, nil)
func GetStreamlit(ctx context.Context, c client.Client, cli dynamic.Interface) (*Streamlit, error) {
config, err := GetConfig(ctx, c, cli)
if err != nil {
return nil, err
}
Expand All @@ -149,8 +149,8 @@ func GetStreamlit(ctx context.Context, c client.Client) (*Streamlit, error) {
}

// Get the ray cluster that can be used a resource pool
func GetRayClusters(ctx context.Context, c client.Client) ([]RayCluster, error) {
config, err := GetConfig(ctx, c, nil)
func GetRayClusters(ctx context.Context, c client.Client, cli dynamic.Interface) ([]RayCluster, error) {
config, err := GetConfig(ctx, c, cli)
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/streamlit/deployer.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func (st *StreamlitDeployer) Install() error {

namespace := st.namespace.Name
// lookup streamlit image from config
streamlitConfig, err := config.GetStreamlit(st.ctx, st.client)
streamlitConfig, err := config.GetStreamlit(st.ctx, st.client, nil)
if err != nil {
klog.Errorln("failed to get streamlit config", err)
return err
Expand Down
4 changes: 2 additions & 2 deletions pkg/worker/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,9 @@ func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alp

// Get ray config from configMap
if gpuCount > 1 {
rayClusters, err := config.GetRayClusters(ctx, runner.c)
rayClusters, err := config.GetRayClusters(ctx, runner.c, nil)
if err != nil || len(rayClusters) == 0 {
klog.Warningln("no ray cluster configured, fallback to local resoue: ", err)
klog.Warningln("no ray cluster configured, fallback to local resource: ", err)
} else {
// Use the 1st ray cluster for now
// TODO: let user to select with ray cluster to use
Expand Down

0 comments on commit c206d1d

Please sign in to comment.