Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scheduler: recover gang check in preFilter #2217

Merged
merged 1 commit into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions pkg/scheduler/plugins/coscheduling/core/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,22 +259,42 @@ func (pgMgr *PodGroupManager) PreEnqueue(ctx context.Context, pod *corev1.Pod) (
}

// PreFilter
// i.Check whether the Gang has met the scheduleCycleValid check, and reject the pod if negative(only Strict mode ).
// ii.Check whether the Gang is OnceResourceSatisfied
// iii.Check whether the Gang has met the scheduleCycleValid check, and reject the pod if negative(only Strict mode ).
// i.Check whether children in Gang has met the requirements of minimum number under each Gang, and reject the pod if negative.
// ii.Check whether the Gang is inited, and reject the pod if positive.
// iii.Check whether the Gang is OnceResourceSatisfied
// iv.Check whether the Gang has met the scheduleCycleValid check, and reject the pod if negative(only Strict mode ).
// v.Try update scheduleCycle, scheduleCycleValid, childrenScheduleRoundMap as mentioned above.
func (pgMgr *PodGroupManager) PreFilter(ctx context.Context, state *framework.CycleState, pod *corev1.Pod) (err error) {
if !util.IsPodNeedGang(pod) {
return nil
}
preFilterState := &stateData{skipReject: false, skipSetCycleInvalid: false}
state.Write(stateKey, preFilterState)
gang := pgMgr.GetGangByPod(pod)
if gang == nil {
preFilterState.skipSetCycleInvalid = true
return fmt.Errorf("can't find gang, gangName: %v, podName: %v", util.GetId(pod.Namespace, util.GetGangNameByPod(pod)),
util.GetId(pod.Namespace, pod.Name))
}

// check if gang is initialized
if !gang.HasGangInit {
preFilterState.skipSetCycleInvalid = true
return fmt.Errorf("gang has not init, gangName: %v, podName: %v", gang.Name,
util.GetId(pod.Namespace, pod.Name))
}
// resourceSatisfied means pod will directly pass the PreFilter
if gang.getGangMatchPolicy() == extension.GangMatchPolicyOnceSatisfied && gang.isGangOnceResourceSatisfied() {
return nil
}

// check minNum
if gang.getChildrenNum() < gang.getGangMinNum() {
preFilterState.skipSetCycleInvalid = true
return fmt.Errorf("gang child pod not collect enough, gangName: %v, podName: %v", gang.Name,
util.GetId(pod.Namespace, pod.Name))
}

if pgMgr.args != nil && pgMgr.args.SkipCheckScheduleCycle {
return nil
}
Expand Down
89 changes: 73 additions & 16 deletions pkg/scheduler/plugins/coscheduling/core/core_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,19 @@ func TestPlugin_PreFilter(t *testing.T) {
pods: []*corev1.Pod{},
expectedErrorMessage: "",
},
{
name: "pod belongs to a non-existing pg",
pod: st.MakePod().Name("pod2").UID("pod2").Namespace("gangA_ns").Label(v1alpha1.PodGroupLabel, "wenshiqi222").Obj(),
expectedErrorMessage: "gang has not init, gangName: gangA_ns/wenshiqi222, podName: gangA_ns/pod2",
expectedChildCycleMap: map[string]int{
"gangA_ns/pod2": 1,
},
expectedScheduleCycleValid: true,
expectedScheduleCycle: 1,
expectStateData: &stateData{
skipSetCycleInvalid: true,
},
},
{
name: "gang ResourceSatisfied",
pod: st.MakePod().Name("podq").UID("podq").Namespace("gangq_ns").Label(v1alpha1.PodGroupLabel, "gangq").Obj(),
Expand All @@ -409,6 +422,21 @@ func TestPlugin_PreFilter(t *testing.T) {
resourceSatisfied: true,
expectStateData: &stateData{},
},
{
name: "pod count less than minMember",
pod: st.MakePod().Name("pod3").UID("pod3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "ganga").Obj(),
pods: []*corev1.Pod{
st.MakePod().Name("pod3-1").UID("pod3-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "ganga").Obj(),
},
pgs: makePg("ganga", "ganga_ns", 4, &gangACreatedTime, nil),
expectedErrorMessage: "gang child pod not collect enough, gangName: ganga_ns/ganga, podName: ganga_ns/pod3",
expectedScheduleCycle: 1,
expectedChildCycleMap: map[string]int{},
expectedScheduleCycleValid: true,
expectStateData: &stateData{
skipSetCycleInvalid: true,
},
},
{
name: "pods count equal with minMember,but is NonStrictMode",
pod: st.MakePod().Name("pod5").UID("pod5").Namespace("gangb_ns").Label(v1alpha1.PodGroupLabel, "gangb").Obj(),
Expand All @@ -423,10 +451,14 @@ func TestPlugin_PreFilter(t *testing.T) {
expectStateData: &stateData{},
},
{
name: "due to reschedule pod6's podScheduleCycle is equal with the gangScheduleCycle",
pod: st.MakePod().Name("pod6").UID("pod6").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(),
pods: []*corev1.Pod{},
pgs: makePg("gangc", "ganga_ns", 1, &gangACreatedTime, nil),
name: "due to reschedule pod6's podScheduleCycle is equal with the gangScheduleCycle",
pod: st.MakePod().Name("pod6").UID("pod6").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(),
pods: []*corev1.Pod{
st.MakePod().Name("pod6-1").UID("pod6-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(),
st.MakePod().Name("pod6-2").UID("pod6-2").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(),
st.MakePod().Name("pod6-3").UID("pod6-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(),
},
pgs: makePg("gangc", "ganga_ns", 4, &gangACreatedTime, nil),
shouldSetCycleEqualWithGlobal: true,
totalNum: 5,
expectedScheduleCycle: 1,
Expand All @@ -442,9 +474,9 @@ func TestPlugin_PreFilter(t *testing.T) {
pod: st.MakePod().Name("pod6").UID("pod6").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").
NominatedNodeName("N1").Obj(),
pods: []*corev1.Pod{
st.MakePod().Name("pod6-1").UID("pod6-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").NominatedNodeName("N1").Obj(),
st.MakePod().Name("pod6-2").UID("pod6-2").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").NominatedNodeName("N1").Obj(),
st.MakePod().Name("pod6-3").UID("pod6-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").NominatedNodeName("N1").Obj(),
st.MakePod().Name("pod6-1").UID("pod6-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(),
st.MakePod().Name("pod6-2").UID("pod6-2").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(),
st.MakePod().Name("pod6-3").UID("pod6-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangc").Obj(),
},
pgs: makePg("gangc", "ganga_ns", 4, &gangACreatedTime, nil),
shouldSetCycleEqualWithGlobal: true,
Expand All @@ -461,10 +493,14 @@ func TestPlugin_PreFilter(t *testing.T) {
expectStateData: &stateData{},
},
{
name: "pods count equal with minMember,is StrictMode,but the gang's scheduleCycle is not valid due to pre pod Filter Failed",
pod: st.MakePod().Name("pod7").UID("pod7").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
pods: []*corev1.Pod{},
pgs: makePg("gangd", "ganga_ns", 1, &gangACreatedTime, nil),
name: "pods count equal with minMember,is StrictMode,but the gang's scheduleCycle is not valid due to pre pod Filter Failed",
pod: st.MakePod().Name("pod7").UID("pod7").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
pods: []*corev1.Pod{
st.MakePod().Name("pod7-1").UID("pod7-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
st.MakePod().Name("pod7-2").UID("pod7-2").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
st.MakePod().Name("pod7-3").UID("pod7-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
},
pgs: makePg("gangd", "ganga_ns", 4, &gangACreatedTime, nil),
expectedScheduleCycle: 1,
expectedChildCycleMap: map[string]int{
"ganga_ns/pod7": 1,
Expand All @@ -477,10 +513,14 @@ func TestPlugin_PreFilter(t *testing.T) {
},
},
{
name: "pods count equal with minMember,is StrictMode, disable check scheduleCycle even if the gang's scheduleCycle is not valid",
pod: st.MakePod().Name("pod7").UID("pod7").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
pods: []*corev1.Pod{},
pgs: makePg("gangd", "ganga_ns", 1, &gangACreatedTime, nil),
name: "pods count equal with minMember,is StrictMode, disable check scheduleCycle even if the gang's scheduleCycle is not valid",
pod: st.MakePod().Name("pod7").UID("pod7").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
pods: []*corev1.Pod{
st.MakePod().Name("pod7-1").UID("pod7-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
st.MakePod().Name("pod7-2").UID("pod7-2").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
st.MakePod().Name("pod7-3").UID("pod7-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gangd").Obj(),
},
pgs: makePg("gangd", "ganga_ns", 4, &gangACreatedTime, nil),
expectedScheduleCycle: 1,
expectedChildCycleMap: map[string]int{
"ganga_ns/pod7": 1,
Expand All @@ -491,6 +531,24 @@ func TestPlugin_PreFilter(t *testing.T) {
shouldSkipCheckScheduleCycle: true,
expectStateData: &stateData{},
},
{
name: "pods count equal with minMember,is StrictMode,scheduleCycle valid,but childrenNum is not reach to total num",
pod: st.MakePod().Name("pod8").UID("pod8").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gange").Obj(),
pods: []*corev1.Pod{
st.MakePod().Name("pod8-1").UID("pod8-1").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gange").Obj(),
st.MakePod().Name("pod8-2").UID("pod8-2").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gange").Obj(),
st.MakePod().Name("pod8-3").UID("pod8-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "gange").Obj(),
},
pgs: makePg("gange", "ganga_ns", 4, &gangACreatedTime, nil),
totalNum: 5,
expectedScheduleCycle: 1,
expectedChildCycleMap: map[string]int{
"ganga_ns/pod8": 1,
},
expectedScheduleCycleValid: true,
expectedErrorMessage: "",
expectStateData: &stateData{},
},
{
name: "pods count more than minMember,is StrictMode,scheduleCycle valid,and childrenNum reach to total num",
pod: st.MakePod().Name("pod9").UID("pod9").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "ganga").Obj(),
Expand All @@ -500,7 +558,6 @@ func TestPlugin_PreFilter(t *testing.T) {
st.MakePod().Name("pod9-3").UID("pod9-3").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "ganga").Obj(),
st.MakePod().Name("pod9-4").UID("pod9-4").Namespace("ganga_ns").Label(v1alpha1.PodGroupLabel, "ganga").Obj(),
},
pgs: makePg("ganga", "ganga_ns", 1, &gangACreatedTime, nil),
totalNum: 5,
expectedScheduleCycle: 1,
expectedChildCycleMap: map[string]int{
Expand Down