Skip to content

Commit

Permalink
--story=118271864 自研云支持K8S原生集群 (merge request !1918)
Browse files Browse the repository at this point in the history
Squash merge branch 'bcs-cluster-manager-bluekingcloud' into 'master'
--story=118271864 自研云支持K8S原生集群


TAPD: --story=118271864
  • Loading branch information
evanlixin authored and evanxinli committed Jul 23, 2024
1 parent aa17916 commit a581ce2
Show file tree
Hide file tree
Showing 11 changed files with 5,266 additions and 4,962 deletions.
9,963 changes: 5,023 additions & 4,940 deletions bcs-services/bcs-cluster-manager/api/clustermanager/clustermanager.pb.go

Large diffs are not rendered by default.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -1770,7 +1770,7 @@ message NetworkSetting {
}];
SubnetSource subnetSource = 7[(grpc.gateway.protoc_gen_swagger.options.openapiv2_field) = {
title: "subnetSource",
description: "集群VPC-CNI模式是否为非固定IP,默认: FALSE 非固定IP"
description: "集群VPC-CNI模式下的使用子网情况"
}];
bool isStaticIpMode = 8[(grpc.gateway.protoc_gen_swagger.options.openapiv2_field) = {
title: "isStaticIpMode",
Expand Down Expand Up @@ -1804,6 +1804,10 @@ message NetworkSetting {
title: "status",
description: "集群网络状态, 主要是变更网络组件过程中记录网络状态"
}];
string networkMode = 16 [(grpc.gateway.protoc_gen_swagger.options.openapiv2_field) = {
title: "networkMode",
description: "集群VpcCni网络模式(目前支持共享网卡模式tke-route-eni和独占网卡模式tke-direct-eni, 默认情况下是共享网卡模式)"
}];
}

message SubnetSource {
Expand Down Expand Up @@ -3383,13 +3387,24 @@ message CloudNetworkInfo {
title: "underlayAutoSteps",
description: "underlay网络模式下自动分配子网的步长"
}];
repeated NetworkMode vpcCniModes = 8 [
(grpc.gateway.protoc_gen_swagger.options.openapiv2_field) = {
title: "vpcCniModes",
description: "vpc-cni插件下支持的网络模式"
}];
}

message EnvCidrStep {
string env=1;
uint32 step=2;
}

message NetworkMode {
string mode=1;
bool default=2;
string name=3;
}

// NodeGroup pool for kubernetes cluster-autoscaling
message NodeGroup {
option (grpc.gateway.protoc_gen_swagger.options.openapiv2_schema) = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7855,6 +7855,14 @@
"format": "int64",
"description": "underlay网络模式下自动分配子网的步长",
"title": "underlayAutoSteps"
},
"vpcCniModes": {
"type": "array",
"items": {
"$ref": "#/definitions/clustermanagerNetworkMode"
},
"description": "vpc-cni插件下支持的网络模式",
"title": "vpcCniModes"
}
},
"description": "用于记录公共网络配置信息",
Expand Down Expand Up @@ -15109,6 +15117,21 @@
}
}
},
"clustermanagerNetworkMode": {
"type": "object",
"properties": {
"mode": {
"type": "string"
},
"default": {
"type": "boolean",
"format": "boolean"
},
"name": {
"type": "string"
}
}
},
"clustermanagerNetworkSetting": {
"type": "object",
"properties": {
Expand Down Expand Up @@ -15150,7 +15173,7 @@
},
"subnetSource": {
"$ref": "#/definitions/clustermanagerSubnetSource",
"description": "集群VPC-CNI模式是否为非固定IP,默认: FALSE 非固定IP",
"description": "集群VPC-CNI模式下的使用子网情况",
"title": "subnetSource"
},
"isStaticIpMode": {
Expand Down Expand Up @@ -15198,6 +15221,11 @@
"type": "string",
"description": "集群网络状态, 主要是变更网络组件过程中记录网络状态",
"title": "status"
},
"networkMode": {
"type": "string",
"description": "集群VpcCni网络模式(目前支持共享网卡模式tke-route-eni和独占网卡模式tke-direct-eni, 默认情况下是共享网卡模式)",
"title": "networkMode"
}
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,26 +113,46 @@ func (t *Task) BuildCreateClusterTask(cls *proto.Cluster, opt *cloudprovider.Cre
taskName := fmt.Sprintf(createClusterTaskTemplate, cls.ClusterID)
task.CommonParams[cloudprovider.TaskNameKey.String()] = taskName

// step1: call bkops preAction operation
// postAction bkops, platform run default steps
// step1: call bkops preAction operation 创建集群
if opt.Cloud != nil && opt.Cloud.ClusterManagement != nil && opt.Cloud.ClusterManagement.CreateCluster != nil {
step := &template.BkSopsStepAction{
TaskName: template.SystemInit,
Actions: opt.Cloud.ClusterManagement.CreateCluster.PreActions,
Plugins: opt.Cloud.ClusterManagement.CreateCluster.Plugins,
}
err := step.BuildBkSopsStepAction(task, cls, template.ExtraInfo{
NodeIPList: strings.Join(opt.WorkerNodes, ","),
BusinessID: cls.BusinessID,
NodeOperator: opt.Operator,
Operator: opt.Operator,
NodeIPList: strings.Join(opt.WorkerNodes, ","),
BusinessID: cls.BusinessID,
NodeOperator: opt.Operator,
Operator: opt.Operator,
TranslateMethod: createClusterStep.StepMethod,
})
if err != nil {
return nil, fmt.Errorf("BuildCreateClusterTask BuildBkSopsStepAction failed: %v", err)
}
}

// step2: update cluster DB info and associated data
// step2: call bksops add nodes to cluster 上架节点
if len(opt.WorkerNodes) > 0 && opt.Cloud != nil && opt.Cloud.ClusterManagement != nil &&
opt.Cloud.ClusterManagement.AddNodesToCluster != nil {
step := &template.BkSopsStepAction{
TaskName: template.SystemInit,
Actions: opt.Cloud.ClusterManagement.AddNodesToCluster.PreActions,
Plugins: opt.Cloud.ClusterManagement.AddNodesToCluster.Plugins,
}
err := step.BuildBkSopsStepAction(task, cls, template.ExtraInfo{
NodeIPList: strings.Join(opt.WorkerNodes, ","),
NodeOperator: opt.Operator,
BusinessID: cls.BusinessID,
Operator: opt.Operator,
TranslateMethod: addNodesToClusterStep.StepMethod,
})
if err != nil {
return nil, fmt.Errorf("BuildCreateClusterTask BuildBkSopsStepAction failed: %v", err)
}
}

// step3: update cluster DB info and associated data
createClusterTask := &CreateClusterTaskOption{Cluster: cls, WorkerNodes: opt.WorkerNodes}
createClusterTask.BuildUpdateClusterDbInfoStep(task)

Expand Down Expand Up @@ -258,8 +278,9 @@ func (t *Task) BuildDeleteClusterTask(cls *proto.Cluster, opt *cloudprovider.Del
Plugins: opt.Cloud.ClusterManagement.DeleteCluster.Plugins,
}
err := step.BuildBkSopsStepAction(task, cls, template.ExtraInfo{
BusinessID: cls.BusinessID,
Operator: opt.Operator,
BusinessID: cls.BusinessID,
Operator: opt.Operator,
TranslateMethod: deleteClusterStep.StepMethod,
})
if err != nil {
return nil, fmt.Errorf("BuildDeleteClusterTask BuildBkSopsStepAction failed: %v", err)
Expand Down Expand Up @@ -339,10 +360,11 @@ func (t *Task) BuildAddNodesToClusterTask(cls *proto.Cluster, nodes []*proto.Nod
Plugins: opt.Cloud.ClusterManagement.AddNodesToCluster.Plugins,
}
err := step.BuildBkSopsStepAction(task, cls, template.ExtraInfo{
NodeIPList: strings.Join(nodeIPs, ","),
NodeOperator: opt.Operator,
BusinessID: cls.BusinessID,
Operator: opt.Operator,
NodeIPList: strings.Join(nodeIPs, ","),
NodeOperator: opt.Operator,
BusinessID: cls.BusinessID,
Operator: opt.Operator,
TranslateMethod: addNodesToClusterStep.StepMethod,
})
if err != nil {
return nil, fmt.Errorf("BuildAddNodesToClusterTask BuildBkSopsStepAction failed: %v", err)
Expand Down Expand Up @@ -429,10 +451,11 @@ func (t *Task) BuildRemoveNodesFromClusterTask(cls *proto.Cluster, nodes []*prot
Plugins: opt.Cloud.ClusterManagement.DeleteNodesFromCluster.Plugins,
}
err := step.BuildBkSopsStepAction(task, cls, template.ExtraInfo{
NodeIPList: strings.Join(nodeIPs, ","),
NodeOperator: opt.Operator,
BusinessID: cls.BusinessID,
Operator: opt.Operator,
NodeIPList: strings.Join(nodeIPs, ","),
NodeOperator: opt.Operator,
BusinessID: cls.BusinessID,
Operator: opt.Operator,
TranslateMethod: removeNodesFromClusterStep.StepMethod,
})
if err != nil {
return nil, fmt.Errorf("BuildAddNodesToClusterTask BuildBkSopsStepAction failed: %v", err)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,30 @@ const (
)

var (
// create cluster task steps
createClusterStep = cloudprovider.StepInfo{
StepMethod: fmt.Sprintf("%s-CreateCluster", cloudName),
StepName: "创建集群",
}

// delete cluster task steps
deleteClusterStep = cloudprovider.StepInfo{
StepMethod: fmt.Sprintf("%s-DeleteCluster", cloudName),
StepName: "删除集群",
}

// cluster add nodes task steps
addNodesToClusterStep = cloudprovider.StepInfo{
StepMethod: fmt.Sprintf("%s-AddNodesToCluster", cloudName),
StepName: "集群上架节点",
}

// cluster remove nodes task steps
removeNodesFromClusterStep = cloudprovider.StepInfo{
StepMethod: fmt.Sprintf("%s-RemoveNodesFromCluster", cloudName),
StepName: "集群下架节点",
}

// import cluster task steps
importClusterNodesStep = cloudprovider.StepInfo{
StepMethod: fmt.Sprintf("%s-ImportClusterNodesTask", cloudName),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,13 @@ func updateClusterInfo(cloudID string, opt *cloudprovider.GetClusterOption) (*pr
}

// 集群VPC-CNI模式子网信息
opt.Cluster.NetworkSettings.EnableVPCCni = business.GetClusterVpcCniStatus(cls)
if !utils.StringInSlice(opt.Cluster.GetNetworkSettings().GetStatus(),
[]string{icommon.StatusInitialization, icommon.TaskStatusFailure}) {
opt.Cluster.NetworkSettings.EnableVPCCni = business.GetClusterVpcCniStatus(cls)
}
if opt.Cluster.NetworkSettings.GetNetworkMode() == "" {
opt.Cluster.NetworkSettings.NetworkMode = api.TKERouteEni
}
opt.Cluster.NetworkSettings.EniSubnetIDs = business.GetClusterVpcCniSubnets(cls)

return opt.Cluster, nil
Expand Down Expand Up @@ -918,6 +924,7 @@ func (c *Cluster) CheckClusterNetworkStatus(clusterId string,

switch opt.Disable {
case true:
// 底层集群已经关闭
if !business.GetClusterVpcCniStatus(cls) {
opt.Cluster.NetworkSettings.EnableVPCCni = false
opt.Cluster.NetworkSettings.EniSubnetIDs = nil
Expand All @@ -927,6 +934,12 @@ func (c *Cluster) CheckClusterNetworkStatus(clusterId string,
return false, nil
}

if !opt.Cluster.GetNetworkSettings().GetEnableVPCCni() &&
opt.Cluster.GetNetworkSettings().GetStatus() != icommon.TaskStatusFailure {
return false,
fmt.Errorf("cluster %s/%s already close vpc-cni", opt.Cluster.ClusterID, opt.Cluster.ClusterName)
}

// check subnets usage when close vpc-cni
opt.Cluster.NetworkSettings.EniSubnetIDs = nil
opt.Cluster.NetworkSettings.SubnetSource.New = nil
Expand Down Expand Up @@ -957,7 +970,8 @@ func (c *Cluster) CheckClusterNetworkStatus(clusterId string,
return false, nil
}

if opt.Cluster.GetNetworkSettings().GetEnableVPCCni() {
if opt.Cluster.GetNetworkSettings().GetEnableVPCCni() &&
opt.Cluster.GetNetworkSettings().GetStatus() != icommon.TaskStatusFailure {
return false,
fmt.Errorf("cluster %s/%s already open vpc-cni", opt.Cluster.ClusterID, opt.Cluster.ClusterName)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ func (sjr *SyncJobResult) UpdateJobResultStatus(isSuccess bool) error {
common.StatusAutoScalingOptionUpdateFailed)
return sjr.updateAutoScalingStatus(isSuccess)
case SwitchClusterNetworkJob:
sjr.Status = generateStatusResult(common.TaskStatusSuccess, common.TaskStatusFailure)
sjr.Status = generateStatusResult("", common.TaskStatusFailure)
return sjr.updateClusterNetworkStatus(isSuccess)
}

Expand Down
Loading

0 comments on commit a581ce2

Please sign in to comment.