Skip to content

Commit f9ee8d1

Browse files
authored
During deploy time operator should check for CSI Driver and switch to etcd (#1354)
Summary: We currently decide whether or not to deploy an etcd-backed metadata store or pv-backed metadata store if a user doesn't have any default storage classes set. However, we've been seeing instances where the storageclass exists, but persistent volumes still can't be used unless the CSI driver is enabled. This leads to many failed deployments. Instead, we can autodetect this case since it is fairly common. Relevant Issues: N/A Type of change: /kind bug Test Plan: Skaffold deploy operator to a cluster --------- Signed-off-by: Michelle Nguyen <michellenguyen@pixielabs.ai>
1 parent 8111b1c commit f9ee8d1

1 file changed

Lines changed: 32 additions & 2 deletions

File tree

src/operator/controllers/vizier_controller.go

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"strings"
3030
"time"
3131

32+
"github.com/blang/semver"
3233
"github.com/cenkalti/backoff/v4"
3334
log "github.com/sirupsen/logrus"
3435
"google.golang.org/grpc"
@@ -126,6 +127,34 @@ func getLatestVizierVersion(ctx context.Context, client cloudpb.ArtifactTrackerC
126127
return resp.Artifact[0].VersionStr, nil
127128
}
128129

130+
// missingNecessaryCSIDriver checks if the user is running an EKS cluster, and if so, whether they are
131+
// missing the CSIDriver. Without the CSI driver, persistent volumes may not be able to be deployed.
132+
func missingNecessaryCSIDriver(clientset *kubernetes.Clientset, k8sVersion string) bool {
133+
// This check only needs to be done for eks clusters with K8s version > 1.22.0.
134+
if !strings.Contains(k8sVersion, "-eks-") {
135+
return false
136+
}
137+
138+
parsedVersion, err := semver.ParseTolerant(k8sVersion)
139+
if err != nil {
140+
log.WithError(err).Error("Failed to parse K8s cluster version")
141+
return false
142+
}
143+
driverVersionRange, _ := semver.ParseRange("<=1.22.0")
144+
if driverVersionRange(parsedVersion) {
145+
return false
146+
}
147+
148+
_, err = clientset.AppsV1().Deployments("kube-system").Get(context.Background(), "ebs-csi-controller", metav1.GetOptions{})
149+
if err != nil && !k8serrors.IsNotFound(err) {
150+
log.WithError(err).Error("Error trying to check for ebs-csi-controller")
151+
return false
152+
} else if k8serrors.IsNotFound(err) {
153+
return true
154+
}
155+
return false
156+
}
157+
129158
// validateNumDefaultStorageClasses returns a boolean whether there is exactly
130159
// 1 default storage class or not.
131160
func validateNumDefaultStorageClasses(clientset *kubernetes.Clientset) (bool, error) {
@@ -154,7 +183,6 @@ func validateNumDefaultStorageClasses(clientset *kubernetes.Clientset) (bool, er
154183
// Reconcile updates the Vizier running in the cluster to match the expected state.
155184
func (r *VizierReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
156185
log.WithField("req", req).Info("Reconciling Vizier...")
157-
158186
// Fetch vizier CRD to determine what operation should be performed.
159187
var vizier v1alpha1.Vizier
160188
if err := r.Get(ctx, req.NamespacedName, &vizier); err != nil {
@@ -374,7 +402,9 @@ func (r *VizierReconciler) deployVizier(ctx context.Context, req ctrl.Request, v
374402
if err != nil {
375403
log.WithError(err).Error("Error checking default storage classes")
376404
}
377-
if !defaultStorageExists {
405+
missingCSIDriver := missingNecessaryCSIDriver(r.Clientset, r.K8sVersion)
406+
407+
if !defaultStorageExists || missingCSIDriver {
378408
log.Warn("No default storage class detected for cluster. Deploying etcd operator instead of statefulset for metadata backend.")
379409
vz.Spec.UseEtcdOperator = true
380410
}

0 commit comments

Comments
 (0)