From 3304545b7f665f2ff84520830a15b8ff26877b17 Mon Sep 17 00:00:00 2001 From: "wangjianyu.wjy" Date: Mon, 30 Sep 2024 12:31:26 +0800 Subject: [PATCH] scheduler: recover gang check in preFilter Signed-off-by: wangjianyu.wjy --- .../plugins/coscheduling/core/core.go | 26 +++++- .../plugins/coscheduling/core/core_test.go | 89 +++++++++++++++---- 2 files changed, 96 insertions(+), 19 deletions(-) diff --git a/pkg/scheduler/plugins/coscheduling/core/core.go b/pkg/scheduler/plugins/coscheduling/core/core.go index 028898323..b19274ef6 100644 --- a/pkg/scheduler/plugins/coscheduling/core/core.go +++ b/pkg/scheduler/plugins/coscheduling/core/core.go @@ -259,9 +259,11 @@ func (pgMgr *PodGroupManager) PreEnqueue(ctx context.Context, pod *corev1.Pod) ( } // PreFilter -// i.Check whether the Gang has met the scheduleCycleValid check, and reject the pod if negative(only Strict mode ). -// ii.Check whether the Gang is OnceResourceSatisfied -// iii.Check whether the Gang has met the scheduleCycleValid check, and reject the pod if negative(only Strict mode ). +// i.Check whether children in Gang has met the requirements of minimum number under each Gang, and reject the pod if negative. +// ii.Check whether the Gang is inited, and reject the pod if positive. +// iii.Check whether the Gang is OnceResourceSatisfied +// iv.Check whether the Gang has met the scheduleCycleValid check, and reject the pod if negative(only Strict mode ). +// v.Try update scheduleCycle, scheduleCycleValid, childrenScheduleRoundMap as mentioned above. func (pgMgr *PodGroupManager) PreFilter(ctx context.Context, state *framework.CycleState, pod *corev1.Pod) (err error) { if !util.IsPodNeedGang(pod) { return nil @@ -269,12 +271,30 @@ func (pgMgr *PodGroupManager) PreFilter(ctx context.Context, state *framework.Cy preFilterState := &stateData{skipReject: false, skipSetCycleInvalid: false} state.Write(stateKey, preFilterState) gang := pgMgr.GetGangByPod(pod) + if gang == nil { + preFilterState.skipSetCycleInvalid = true + return fmt.Errorf("can't find gang, gangName: %v, podName: %v", util.GetId(pod.Namespace, util.GetGangNameByPod(pod)), + util.GetId(pod.Namespace, pod.Name)) + } + // check if gang is initialized + if !gang.HasGangInit { + preFilterState.skipSetCycleInvalid = true + return fmt.Errorf("gang has not init, gangName: %v, podName: %v", gang.Name, + util.GetId(pod.Namespace, pod.Name)) + } // resourceSatisfied means pod will directly pass the PreFilter if gang.getGangMatchPolicy() == extension.GangMatchPolicyOnceSatisfied && gang.isGangOnceResourceSatisfied() { return nil } + // check minNum + if gang.getChildrenNum() < gang.getGangMinNum() { + preFilterState.skipSetCycleInvalid = true + return fmt.Errorf("gang child pod not collect enough, gangName: %v, podName: %v", gang.Name, + util.GetId(pod.Namespace, pod.Name)) + } + if pgMgr.args != nil && pgMgr.args.SkipCheckScheduleCycle { return nil } diff --git a/pkg/scheduler/plugins/coscheduling/core/core_test.go b/pkg/scheduler/plugins/coscheduling/core/core_test.go index 30ec3cf74..f1c57b1ad 100644 --- a/pkg/scheduler/plugins/coscheduling/core/core_test.go +++ b/pkg/scheduler/plugins/coscheduling/core/core_test.go @@ -399,6 +399,19 @@ func TestPlugin_PreFilter(t *testing.T) { pods: []*corev1.Pod{}, expectedErrorMessage: "", }, + { + name: "pod belongs to a non-existing pg", + pod: st.MakePod().Name("pod2").UID("pod2").Namespace("gangA_ns").Label(v1alpha1.PodGroupLabel, "wenshiqi222").Obj(), + expectedErrorMessage: "gang has not init, gangName: gangA_ns/wenshiqi222, podName: gangA_ns/pod2", + expectedChildCycleMap: map[string]int{ + "gangA_ns/pod2": 1, + }, + expectedScheduleCycleValid: true, + expectedScheduleCycle: 1, + expectStateData: &stateData{ + skipSetCycleInvalid: true, + }, + }, { name: "gang ResourceSatisfied", pod: st.MakePod().Name("podq").UID("podq").Namespace("gangq_ns").Label(v1alpha1.PodGroupLabel, "gangq").Obj(), @@ -409,6 +422,21 @@ func TestPlugin_PreFilter(t *testing.T) { resourceSatisfied: true, expectStateData: &stateData{}, }, + { + name: "pod count less than minMember", + pod: st.MakePod().Name("pod3").UID("pod3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "ganga").Obj(), + pods: []*corev1.Pod{ + st.MakePod().Name("pod3-1").UID("pod3-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "ganga").Obj(), + }, + pgs: makePg("ganga", "ganga_ns", 4, &gangACreatedTime, nil), + expectedErrorMessage: "gang child pod not collect enough, gangName: ganga_ns/ganga, podName: ganga_ns/pod3", + expectedScheduleCycle: 1, + expectedChildCycleMap: map[string]int{}, + expectedScheduleCycleValid: true, + expectStateData: &stateData{ + skipSetCycleInvalid: true, + }, + }, { name: "pods count equal with minMember,but is NonStrictMode", pod: st.MakePod().Name("pod5").UID("pod5").Namespace("gangb_ns").Label(v1alpha1.PodGroupLabel, "gangb").Obj(), @@ -423,10 +451,14 @@ func TestPlugin_PreFilter(t *testing.T) { expectStateData: &stateData{}, }, { - name: "due to reschedule pod6's podScheduleCycle is equal with the gangScheduleCycle", - pod: st.MakePod().Name("pod6").UID("pod6").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(), - pods: []*corev1.Pod{}, - pgs: makePg("gangc", "ganga_ns", 1, &gangACreatedTime, nil), + name: "due to reschedule pod6's podScheduleCycle is equal with the gangScheduleCycle", + pod: st.MakePod().Name("pod6").UID("pod6").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(), + pods: []*corev1.Pod{ + st.MakePod().Name("pod6-1").UID("pod6-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(), + st.MakePod().Name("pod6-2").UID("pod6-2").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(), + st.MakePod().Name("pod6-3").UID("pod6-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(), + }, + pgs: makePg("gangc", "ganga_ns", 4, &gangACreatedTime, nil), shouldSetCycleEqualWithGlobal: true, totalNum: 5, expectedScheduleCycle: 1, @@ -442,9 +474,9 @@ func TestPlugin_PreFilter(t *testing.T) { pod: st.MakePod().Name("pod6").UID("pod6").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc"). NominatedNodeName("N1").Obj(), pods: []*corev1.Pod{ - st.MakePod().Name("pod6-1").UID("pod6-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").NominatedNodeName("N1").Obj(), - st.MakePod().Name("pod6-2").UID("pod6-2").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").NominatedNodeName("N1").Obj(), - st.MakePod().Name("pod6-3").UID("pod6-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").NominatedNodeName("N1").Obj(), + st.MakePod().Name("pod6-1").UID("pod6-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(), + st.MakePod().Name("pod6-2").UID("pod6-2").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(), + st.MakePod().Name("pod6-3").UID("pod6-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(), }, pgs: makePg("gangc", "ganga_ns", 4, &gangACreatedTime, nil), shouldSetCycleEqualWithGlobal: true, @@ -461,10 +493,14 @@ func TestPlugin_PreFilter(t *testing.T) { expectStateData: &stateData{}, }, { - name: "pods count equal with minMember,is StrictMode,but the gang's scheduleCycle is not valid due to pre pod Filter Failed", - pod: st.MakePod().Name("pod7").UID("pod7").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(), - pods: []*corev1.Pod{}, - pgs: makePg("gangd", "ganga_ns", 1, &gangACreatedTime, nil), + name: "pods count equal with minMember,is StrictMode,but the gang's scheduleCycle is not valid due to pre pod Filter Failed", + pod: st.MakePod().Name("pod7").UID("pod7").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(), + pods: []*corev1.Pod{ + st.MakePod().Name("pod7-1").UID("pod7-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(), + st.MakePod().Name("pod7-2").UID("pod7-2").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(), + st.MakePod().Name("pod7-3").UID("pod7-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(), + }, + pgs: makePg("gangd", "ganga_ns", 4, &gangACreatedTime, nil), expectedScheduleCycle: 1, expectedChildCycleMap: map[string]int{ "ganga_ns/pod7": 1, @@ -477,10 +513,14 @@ func TestPlugin_PreFilter(t *testing.T) { }, }, { - name: "pods count equal with minMember,is StrictMode, disable check scheduleCycle even if the gang's scheduleCycle is not valid", - pod: st.MakePod().Name("pod7").UID("pod7").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(), - pods: []*corev1.Pod{}, - pgs: makePg("gangd", "ganga_ns", 1, &gangACreatedTime, nil), + name: "pods count equal with minMember,is StrictMode, disable check scheduleCycle even if the gang's scheduleCycle is not valid", + pod: st.MakePod().Name("pod7").UID("pod7").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(), + pods: []*corev1.Pod{ + st.MakePod().Name("pod7-1").UID("pod7-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(), + st.MakePod().Name("pod7-2").UID("pod7-2").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(), + st.MakePod().Name("pod7-3").UID("pod7-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(), + }, + pgs: makePg("gangd", "ganga_ns", 4, &gangACreatedTime, nil), expectedScheduleCycle: 1, expectedChildCycleMap: map[string]int{ "ganga_ns/pod7": 1, @@ -491,6 +531,24 @@ func TestPlugin_PreFilter(t *testing.T) { shouldSkipCheckScheduleCycle: true, expectStateData: &stateData{}, }, + { + name: "pods count equal with minMember,is StrictMode,scheduleCycle valid,but childrenNum is not reach to total num", + pod: st.MakePod().Name("pod8").UID("pod8").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gange").Obj(), + pods: []*corev1.Pod{ + st.MakePod().Name("pod8-1").UID("pod8-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gange").Obj(), + st.MakePod().Name("pod8-2").UID("pod8-2").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gange").Obj(), + st.MakePod().Name("pod8-3").UID("pod8-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gange").Obj(), + }, + pgs: makePg("gange", "ganga_ns", 4, &gangACreatedTime, nil), + totalNum: 5, + expectedScheduleCycle: 1, + expectedChildCycleMap: map[string]int{ + "ganga_ns/pod8": 1, + }, + expectedScheduleCycleValid: true, + expectedErrorMessage: "", + expectStateData: &stateData{}, + }, { name: "pods count more than minMember,is StrictMode,scheduleCycle valid,and childrenNum reach to total num", pod: st.MakePod().Name("pod9").UID("pod9").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "ganga").Obj(), @@ -500,7 +558,6 @@ func TestPlugin_PreFilter(t *testing.T) { st.MakePod().Name("pod9-3").UID("pod9-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "ganga").Obj(), st.MakePod().Name("pod9-4").UID("pod9-4").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "ganga").Obj(), }, - pgs: makePg("ganga", "ganga_ns", 1, &gangACreatedTime, nil), totalNum: 5, expectedScheduleCycle: 1, expectedChildCycleMap: map[string]int{