Skip to content

Commit

Permalink
ebs br: mark volume backup complete only when all data planes are dur…
Browse files Browse the repository at this point in the history
…ing waiting snapshots creation time

Signed-off-by: BornChanger <dawn_catcher@126.com>
  • Loading branch information
BornChanger committed Sep 4, 2024
1 parent b6f34b7 commit b29b12a
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 6 deletions.
24 changes: 18 additions & 6 deletions pkg/fedvolumebackup/backup/backup_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -381,20 +381,32 @@ func (bm *backupManager) waitBackupMemberInitializeComplete(volumeBackup *v1alph
}

func (bm *backupManager) waitVolumeSnapshotsComplete(backupMembers []*volumeBackupMember) error {
someMemberFailed := false
var failMemberName, failClusterName string

for _, backupMember := range backupMembers {
if pingcapv1alpha1.IsVolumeBackupInitializeFailed(backupMember.backup) ||
pingcapv1alpha1.IsVolumeBackupFailed(backupMember.backup) ||
pingcapv1alpha1.IsBackupFailed(backupMember.backup) {
errMsg := fmt.Sprintf("backup member %s of cluster %s failed", backupMember.backup.Name, backupMember.k8sClusterName)
return &fedvolumebackup.BRDataPlaneFailedError{
Reason: reasonVolumeBackupMemberFailed,
Message: errMsg,
failMemberName = backupMember.backup.Name
failClusterName = backupMember.k8sClusterName
klog.Errorf("backup member %s of cluster %s failed", failMemberName, failClusterName)
someMemberFailed = true
} else {
if !pingcapv1alpha1.IsVolumeBackupComplete(backupMember.backup) {
return controller.IgnoreErrorf("backup member %s of cluster %s is not volume snapshots complete", backupMember.backup.Name, backupMember.k8sClusterName)
}
}
if !pingcapv1alpha1.IsVolumeBackupComplete(backupMember.backup) {
return controller.IgnoreErrorf("backup member %s of cluster %s is not volume snapshots complete", backupMember.backup.Name, backupMember.k8sClusterName)
}

if someMemberFailed {
errMsg := fmt.Sprintf("backup member %s of cluster %s failed", failMemberName, failClusterName)
return &fedvolumebackup.BRDataPlaneFailedError{
Reason: reasonVolumeBackupMemberFailed,
Message: errMsg,
}
}

return nil
}

Expand Down
98 changes: 98 additions & 0 deletions pkg/fedvolumebackup/backup/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,37 @@ func (h *helper) setDataPlaneVolumeComplete(ctx context.Context) {
h.g.Expect(err).To(gomega.BeNil())
}

func (h *helper) setSomeDataPlaneBackupFailed(ctx context.Context) {
backupMember1, err := h.dataPlaneClient1.PingcapV1alpha1().Backups(fakeTcNamespace1).Get(ctx, h.backupMemberName1, metav1.GetOptions{})
h.g.Expect(err).To(gomega.BeNil())
pingcapv1alpha1.UpdateBackupCondition(&backupMember1.Status, &pingcapv1alpha1.BackupCondition{
Status: corev1.ConditionTrue,
Type: pingcapv1alpha1.VolumeBackupFailed,
})
_, err = h.dataPlaneClient1.PingcapV1alpha1().Backups(fakeTcNamespace1).UpdateStatus(ctx, backupMember1, metav1.UpdateOptions{})
h.g.Expect(err).To(gomega.BeNil())
}

func (h *helper) setOtherDataPlaneVolumeComplete(ctx context.Context) {
backupMember2, err := h.dataPlaneClient2.PingcapV1alpha1().Backups(fakeTcNamespace2).Get(ctx, h.backupMemberName2, metav1.GetOptions{})
h.g.Expect(err).To(gomega.BeNil())
pingcapv1alpha1.UpdateBackupCondition(&backupMember2.Status, &pingcapv1alpha1.BackupCondition{
Status: corev1.ConditionTrue,
Type: pingcapv1alpha1.VolumeBackupComplete,
})
_, err = h.dataPlaneClient2.PingcapV1alpha1().Backups(fakeTcNamespace2).UpdateStatus(ctx, backupMember2, metav1.UpdateOptions{})
h.g.Expect(err).To(gomega.BeNil())

backupMember3, err := h.dataPlaneClient3.PingcapV1alpha1().Backups(fakeTcNamespace3).Get(ctx, h.backupMemberName3, metav1.GetOptions{})
h.g.Expect(err).To(gomega.BeNil())
pingcapv1alpha1.UpdateBackupCondition(&backupMember3.Status, &pingcapv1alpha1.BackupCondition{
Status: corev1.ConditionTrue,
Type: pingcapv1alpha1.VolumeBackupComplete,
})
_, err = h.dataPlaneClient3.PingcapV1alpha1().Backups(fakeTcNamespace3).UpdateStatus(ctx, backupMember3, metav1.UpdateOptions{})
h.g.Expect(err).To(gomega.BeNil())
}

func (h *helper) setDataPlaneComplete(ctx context.Context) {
backupMember1, err := h.dataPlaneClient1.PingcapV1alpha1().Backups(fakeTcNamespace1).Get(ctx, h.backupMemberName1, metav1.GetOptions{})
h.g.Expect(err).To(gomega.BeNil())
Expand Down Expand Up @@ -410,6 +441,73 @@ func TestVolumeBackupVolumeFailed(t *testing.T) {
h.assertFailed(volumeBackup)
}

func TestVolumeBackupVolumeMemberFailed(t *testing.T) {
ctx := context.Background()
backupName := "backup-4"
backupNamespace := "ns-4"
h := newHelper(t, backupName, backupNamespace)

// create volume backup
volumeBackup := h.createVolumeBackup(ctx)

// run initialize phase
err := h.bm.Sync(volumeBackup)
h.g.Expect(err).To(gomega.BeNil())
h.assertRunInitialize(ctx, volumeBackup)

// wait initialized
err = h.bm.Sync(volumeBackup)
h.g.Expect(err).To(gomega.HaveOccurred())

// initialized, run execute phase
h.setDataPlaneInitialized(ctx)
err = h.bm.Sync(volumeBackup)
h.g.Expect(err).To(gomega.BeNil())
h.assertRunExecute(ctx, volumeBackup)

// wait snapshots created
err = h.bm.Sync(volumeBackup)
h.g.Expect(err).To(gomega.HaveOccurred())

// snapshots created, resume gc and scheduler
h.setDataPlaneSnapshotCreated(ctx)
err = h.bm.Sync(volumeBackup)
h.g.Expect(err).To(gomega.BeNil())
h.assertRunResumeGcSchedule(ctx)

// wait initialize complete
err = h.bm.Sync(volumeBackup)
h.g.Expect(err).To(gomega.HaveOccurred())

// initialize complete
h.setDataPlaneInitializeComplete(ctx)
err = h.bm.Sync(volumeBackup)
h.g.Expect(err).To(gomega.HaveOccurred())
h.assertControlPlaneSnapshotsCreated(volumeBackup)

// wait volume complete
err = h.bm.Sync(volumeBackup)
h.g.Expect(err).To(gomega.HaveOccurred())

// some dataplane failed, volumebackup
h.setSomeDataPlaneBackupFailed(ctx)
err = h.bm.Sync(volumeBackup)
h.g.Expect(err).To(gomega.BeNil())
h.assertControlPlaneSnapshotsCreated(volumeBackup)

// some dataplane failed, run teardown phase
h.setOtherDataPlaneVolumeComplete(ctx)
err = h.bm.Sync(volumeBackup)
h.g.Expect(err).To(gomega.BeNil())
h.assertRunTeardown(ctx, volumeBackup, false)

// volume backup failed
h.setDataPlaneFailed(ctx)
err = h.bm.Sync(volumeBackup)
h.g.Expect(err).To(gomega.BeNil())
h.assertFailed(volumeBackup)
}

func generateVolumeBackup(backupName, backupNamespace string) *v1alpha1.VolumeBackup {
return &v1alpha1.VolumeBackup{
ObjectMeta: metav1.ObjectMeta{
Expand Down

0 comments on commit b29b12a

Please sign in to comment.