Skip to content

Commit

Permalink
[controller] Checking parent controller region state before handling …
Browse files Browse the repository at this point in the history
…requests for all routes (#1075)

* Resolving merge conflicts

* Implemented active parent controller check for all routes with VeniceParentControllerRegionStateHandler

* Fixed CreateVersionTest

* Added ParentControllerRegionState properties to tests

* Revert "Implemented active parent controller check for all routes with VeniceParentControllerRegionStateHandler"

This reverts commit 1c03f5b.

* Implemented active checks, reverted test case, minor javadoc nit

- Added ACTIVE checks to all routes in AdminSparkServer using VeniceParentControllerRegionStateHandler wrapper class
- Reverted CreateVersionTest to previous commit without ParentControllerRegionState.ACTIVE
- Removed some minor parentheses in ParentControllerRegionState

* Reverted TestAdminSparkServer and TestBackupControllerResponse without ParentControllerRegionState.ACTIVE
  • Loading branch information
bonytoni authored Jul 29, 2024
1 parent decc833 commit cdbd176
Show file tree
Hide file tree
Showing 11 changed files with 519 additions and 114 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1066,6 +1066,17 @@ private ConfigKeys() {
/** Whether current controller is parent or not */
public static final String CONTROLLER_PARENT_MODE = "controller.parent.mode";

/**
* This config specifies the state of the region of the parent controller.
*
* The region can be in one of the following states:
* ACTIVE: the parent controller in the region is serving requests.
* PASSIVE: the parent controller in the region is rejecting requests.
*
* By default, this is set to ACTIVE.
*/
public static final String CONTROLLER_PARENT_REGION_STATE = "controller.parent.region.state";

/**
* This config is used to control how many errored topics we are going to keep in parent cluster.
* This is mostly used to investigate the Kafka missing message issue.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.eq;

import com.linkedin.venice.controller.ParentControllerRegionState;
import com.linkedin.venice.controller.VeniceHelixAdmin;
import com.linkedin.venice.controllerapi.ControllerApiConstants;
import com.linkedin.venice.controllerapi.ControllerRoute;
Expand Down Expand Up @@ -205,6 +206,7 @@ public void testAAIncrementalPushRTSourceRegion(boolean sourceGridFabricPresent,
mockStore.setIncrementalPushEnabled(true);
doReturn(mockStore).when(admin).getStore(anyString(), anyString());
doReturn(true).when(admin).isParent();
doReturn(ParentControllerRegionState.ACTIVE).when(admin).getParentControllerRegionState();
doReturn(true).when(admin).isLeaderControllerFor(anyString());
doReturn(1).when(admin).getReplicationFactor(anyString(), anyString());
doReturn(1).when(admin).calculateNumberOfPartitions(anyString(), anyString());
Expand Down Expand Up @@ -330,6 +332,7 @@ public void testSamzaReplicationPolicyMode(boolean samzaPolicy, boolean storePol
doReturn("kafka-bootstrap").when(admin).getKafkaBootstrapServers(anyBoolean());
doReturn("store_rt").when(admin).getRealTimeTopic(anyString(), anyString());
doReturn(samzaPolicy).when(admin).isParent();
doReturn(ParentControllerRegionState.ACTIVE).when(admin).getParentControllerRegionState();
doReturn(aaEnabled).when(admin).isActiveActiveReplicationEnabledInAllRegion(anyString(), anyString(), eq(true));
mockStore.setActiveActiveReplicationEnabled(aaEnabled);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,13 @@ void updateRoutersClusterConfig(
*/
boolean isParent();

/**
* Return the state of the region of the parent controller.
* @return {@link ParentControllerRegionState#ACTIVE} which means that the parent controller in the region is serving requests.
* Otherwise, return {@link ParentControllerRegionState#PASSIVE}
*/
ParentControllerRegionState getParentControllerRegionState();

/**
* Get child datacenter to child controller url mapping.
* @return A map of child datacenter -> child controller url
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package com.linkedin.venice.controller;

/**
* Enum representing the state of the region where the parent controller resides.
* i.e., Region dc-0 is ACTIVE while Region dc-1 is PASSIVE
* This means that ParentController in dc-0 is serving requests while ParentController in dc-1 is rejecting requests
*/
public enum ParentControllerRegionState {
/** The region is active, so the parent controller in the region is serving requests */
ACTIVE,
/**
* The region is passive, so the parent controller in the region is rejecting requests.
* This region is ready to take over if components in the currently active region fails
*/
PASSIVE
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import static com.linkedin.venice.ConfigKeys.CONTROLLER_NAME;
import static com.linkedin.venice.ConfigKeys.CONTROLLER_PARENT_EXTERNAL_SUPERSET_SCHEMA_GENERATION_ENABLED;
import static com.linkedin.venice.ConfigKeys.CONTROLLER_PARENT_MODE;
import static com.linkedin.venice.ConfigKeys.CONTROLLER_PARENT_REGION_STATE;
import static com.linkedin.venice.ConfigKeys.CONTROLLER_PARENT_SYSTEM_STORE_HEARTBEAT_CHECK_WAIT_TIME_SECONDS;
import static com.linkedin.venice.ConfigKeys.CONTROLLER_PARENT_SYSTEM_STORE_REPAIR_CHECK_INTERVAL_SECONDS;
import static com.linkedin.venice.ConfigKeys.CONTROLLER_PARENT_SYSTEM_STORE_REPAIR_RETRY_COUNT;
Expand Down Expand Up @@ -157,6 +158,7 @@
import static com.linkedin.venice.SSLConfig.DEFAULT_CONTROLLER_SSL_ENABLED;
import static com.linkedin.venice.VeniceConstants.DEFAULT_PER_ROUTER_READ_QUOTA;
import static com.linkedin.venice.VeniceConstants.DEFAULT_SSL_FACTORY_CLASS_NAME;
import static com.linkedin.venice.controller.ParentControllerRegionState.ACTIVE;
import static com.linkedin.venice.pubsub.PubSubConstants.DEFAULT_KAFKA_REPLICATION_FACTOR;
import static com.linkedin.venice.pubsub.PubSubConstants.PUBSUB_TOPIC_MANAGER_METADATA_FETCHER_CONSUMER_POOL_SIZE_DEFAULT_VALUE;
import static com.linkedin.venice.utils.ByteUtils.BYTES_PER_MB;
Expand Down Expand Up @@ -221,6 +223,7 @@ public class VeniceControllerClusterConfig {
private final String controllerClusterZkAddress;
private final boolean multiRegion;
private final boolean parent;
private final ParentControllerRegionState parentControllerRegionState;
private final Map<String, String> childDataCenterControllerUrlMap;
private final String d2ServiceName;
private final String clusterDiscoveryD2ServiceName;
Expand Down Expand Up @@ -631,6 +634,8 @@ public VeniceControllerClusterConfig(VeniceProperties props) {
this.controllerClusterReplica = props.getInt(CONTROLLER_CLUSTER_REPLICA, 3);
this.controllerClusterZkAddress = props.getString(CONTROLLER_CLUSTER_ZK_ADDRESSS, getZkAddress());
this.parent = props.getBoolean(CONTROLLER_PARENT_MODE, false);
this.parentControllerRegionState =
ParentControllerRegionState.valueOf(props.getString(CONTROLLER_PARENT_REGION_STATE, ACTIVE.name()));

if (childDatacenters.isEmpty()) {
this.childDataCenterControllerUrlMap = Collections.emptyMap();
Expand Down Expand Up @@ -1162,6 +1167,10 @@ public boolean isParent() {
return parent;
}

public ParentControllerRegionState getParentControllerRegionState() {
return parentControllerRegionState;
}

public long getDeprecatedJobTopicRetentionMs() {
return deprecatedJobTopicRetentionMs;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ public boolean isParent() {
return getCommonConfig().isParent();
}

public ParentControllerRegionState getParentControllerRegionState() {
return getCommonConfig().getParentControllerRegionState();
}

public String getControllerName() {
return getCommonConfig().getControllerName();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7553,6 +7553,14 @@ public boolean isParent() {
return multiClusterConfigs.isParent();
}

/**
* @see Admin#getParentControllerRegionState()
*/
@Override
public ParentControllerRegionState getParentControllerRegionState() {
return multiClusterConfigs.getParentControllerRegionState();
}

/**
* @see Admin#getChildDataCenterControllerUrlMap(String)
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4871,6 +4871,14 @@ public boolean isParent() {
return getVeniceHelixAdmin().isParent();
}

/**
* @see Admin#getParentControllerRegionState()
*/
@Override
public ParentControllerRegionState getParentControllerRegionState() {
return getVeniceHelixAdmin().getParentControllerRegionState();
}

/**
* @see Admin#getChildDataCenterControllerUrlMap(String)
*/
Expand Down
Loading

0 comments on commit cdbd176

Please sign in to comment.