Skip to content

Commit

Permalink
Merge pull request #197 from jescarri/add_force_option
Browse files Browse the repository at this point in the history
Node Drain Logic: Allow users to force node drain
  • Loading branch information
invidian committed Jul 14, 2023
2 parents e0efc35 + 296c2b6 commit 7a1b0ff
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 3 deletions.
2 changes: 2 additions & 0 deletions cmd/update-agent/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ var (

reapTimeout = flag.Int("grace-period", defaultGracePeriodSeconds,
"Period of time in seconds given to a pod to terminate when rebooting for an update")
forceNodeDrain = flag.Bool("force-drain", false, "Force removal of pods with custom or no owners while draining node")
)

func main() {
Expand Down Expand Up @@ -74,6 +75,7 @@ func main() {
Clientset: clientset,
StatusReceiver: updateEngineClient,
Rebooter: rebooter,
ForceNodeDrain: *forceNodeDrain,
}

agent, err := agent.New(config)
Expand Down
9 changes: 6 additions & 3 deletions pkg/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
type Config struct {
NodeName string
PodDeletionGracePeriod time.Duration
ForceNodeDrain bool
Clientset kubernetes.Interface
StatusReceiver StatusReceiver
Rebooter Rebooter
Expand Down Expand Up @@ -65,6 +66,7 @@ type klocksmith struct {
ue StatusReceiver
lc Rebooter
reapTimeout time.Duration
forceNodeDrain bool
hostFilesPrefix string
pollInterval time.Duration
maxOperatorResponseTime time.Duration
Expand Down Expand Up @@ -114,6 +116,7 @@ func New(config *Config) (Klocksmith, error) {
ue: config.StatusReceiver,
lc: config.Rebooter,
reapTimeout: config.PodDeletionGracePeriod,
forceNodeDrain: config.ForceNodeDrain,
hostFilesPrefix: config.HostFilesPrefix,
pollInterval: pollInterval,
maxOperatorResponseTime: maxOperatorResponseTime,
Expand Down Expand Up @@ -269,7 +272,7 @@ func (k *klocksmith) process(ctx context.Context) error {
klog.Info("Node already marked as unschedulable")
}

drainer := newDrainer(ctx, k.clientset, k.reapTimeout)
drainer := newDrainer(ctx, k.clientset, k.reapTimeout, k.forceNodeDrain)

klog.Info("Getting pod list for deletion")

Expand Down Expand Up @@ -461,11 +464,11 @@ type drainer interface {
DeleteOrEvictPods([]corev1.Pod) error
}

func newDrainer(ctx context.Context, cs kubernetes.Interface, timeout time.Duration) drainer {
func newDrainer(ctx context.Context, cs kubernetes.Interface, timeout time.Duration, forceNodeDrain bool) drainer {
return &drain.Helper{
Ctx: ctx,
Client: cs,
Force: false,
Force: forceNodeDrain,
GracePeriodSeconds: -1,
Timeout: timeout,
// Explicitly don't terminate self? we'll probably just be a
Expand Down
46 changes: 46 additions & 0 deletions pkg/agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -931,6 +931,52 @@ func Test_Running_agent(t *testing.T) {
})
})

t.Run("removes_pod_without_owner_when_force_drain_is_configured", func(t *testing.T) {
t.Parallel()

rebootTriggerred := make(chan bool)

podsToCreate := []*corev1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Spec: corev1.PodSpec{
NodeName: testNode().Name,
},
},
}

fakeClient := fake.NewSimpleClientset(podsToCreate[0], testNode())
addEvictionSupport(t, fakeClient)

testConfig, node, _ := validTestConfig(t, testNode())
testConfig.ForceNodeDrain = true
testConfig.Clientset = fakeClient
testConfig.Rebooter = &mockRebooter{
rebootF: func(auth bool) {
rebootTriggerred <- auth
},
}

ctx := contextWithTimeout(t, agentRunTimeLimit)

assertNodeProperty(ctx, t, &assertNodePropertyContext{
done: runAgent(ctx, t, testConfig),
config: testConfig,
testF: assertNodeAnnotationValue(constants.AnnotationRebootNeeded, constants.True),
})

okToReboot(ctx, t, testConfig.Clientset.CoreV1().Nodes(), node.Name)

select {
case <-ctx.Done():
t.Fatal("Timed out waiting for reboot to be triggered")
case <-rebootTriggerred:
}
})

t.Run("after_draining_node", func(t *testing.T) {
t.Parallel()

Expand Down

0 comments on commit 7a1b0ff

Please sign in to comment.