From a429022518cbbdb9f7bb7986a9fb07aa725ef995 Mon Sep 17 00:00:00 2001 From: Yang Chiu Date: Mon, 11 Nov 2024 15:33:08 +0800 Subject: [PATCH] test(robot): migrate test_replica_auto_balance_node_least_effort Signed-off-by: Yang Chiu --- e2e/keywords/volume.resource | 38 ++++++++++++++++++++ e2e/libs/keywords/volume_keywords.py | 3 ++ e2e/libs/volume/crd.py | 3 ++ e2e/libs/volume/rest.py | 17 +++++++++ e2e/libs/volume/volume.py | 3 ++ e2e/tests/regression/test_scheduling.robot | 42 ++++++++++++++++++++++ 6 files changed, 106 insertions(+) diff --git a/e2e/keywords/volume.resource b/e2e/keywords/volume.resource index b322b369e..70e7a71dd 100644 --- a/e2e/keywords/volume.resource +++ b/e2e/keywords/volume.resource @@ -231,6 +231,44 @@ Check volume ${volume_id} replica on node ${node_id} exist ${replica_name} get_replica_name_on_node ${volume_name} ${node_name} Should Not Be Equal ${replica_name} ${None} +Volume ${volume_id} should have ${expected_replica_count} replicas running + ${volume_name} = generate_name_with_suffix volume ${volume_id} + ${replica_count} = wait_for_replica_count ${volume_name} node_name= replica_count=${expected_replica_count} + +Volume ${volume_id} should have ${expected_replica_count} replicas running on node ${node_id} + ${volume_name} = generate_name_with_suffix volume ${volume_id} + ${node_name} = get_node_by_index ${node_id} + ${replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name} replica_count=${expected_replica_count} + Set Test Variable ${volume_name} + Set Test Variable ${node_name} + Set Test Variable ${replica_count} + +Volume ${volume_id} should have replicas running on node ${node_id} + ${volume_name} = generate_name_with_suffix volume ${volume_id} + ${node_name} = get_node_by_index ${node_id} + ${replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name} + Set Test Variable ${volume_name} + Set Test Variable ${node_name} + Set Test Variable ${replica_count} + +Volume ${volume_id} should have ${expected_replica_count} replicas running on node ${node_id} and no additional scheduling occurs + Volume ${volume_id} should have ${expected_replica_count} replicas running on node ${node_id} + FOR ${i} IN RANGE 3 + Log to console Ensuring there's no additional scheduling for node ${node_name} ... (${i}) + ${new_replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name} replica_count=${expected_replica_count} + Should Be Equal As Integers ${replica_count} ${new_replica_count} + Sleep 5 + END + +Volume ${volume_id} should have replicas running on node ${node_id} and no additional scheduling occurs + Volume ${volume_id} should have replicas running on node ${node_id} + FOR ${i} IN RANGE 3 + Log to console Ensuring there's no additional scheduling for node ${node_name} ... (${i}) + ${new_replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name} + Should Be Equal As Integers ${replica_count} ${new_replica_count} + Sleep 5 + END + Check volume ${volume_id} data is intact ${volume_name} = generate_name_with_suffix volume ${volume_id} check_data_checksum ${volume_name} diff --git a/e2e/libs/keywords/volume_keywords.py b/e2e/libs/keywords/volume_keywords.py index 2ef9c77ac..832323561 100644 --- a/e2e/libs/keywords/volume_keywords.py +++ b/e2e/libs/keywords/volume_keywords.py @@ -236,6 +236,9 @@ def wait_for_replica_running(self, volume_name, node_name): def get_replica_name_on_node(self, volume_name, node_name): return self.volume.get_replica_name_on_node(volume_name, node_name) + def wait_for_replica_count(self, volume_name, node_name=None, replica_count=None): + return self.volume.wait_for_replica_count(volume_name, node_name, replica_count) + def wait_for_replica_rebuilding_to_stop_on_node(self, volume_name, replica_locality): node_id = self.get_node_id_by_replica_locality(volume_name, replica_locality) retry_count, retry_interval = get_retry_count_and_interval() diff --git a/e2e/libs/volume/crd.py b/e2e/libs/volume/crd.py index 9598a99b8..90d4dfcde 100644 --- a/e2e/libs/volume/crd.py +++ b/e2e/libs/volume/crd.py @@ -264,6 +264,9 @@ def is_replica_running(self, volume_name, node_name, is_running): def get_replica_name_on_node(self, volume_name, node_name): return Rest().get_replica_name_on_node(volume_name, node_name) + def wait_for_replica_count(self, volume_name, node_name, replica_count): + return Rest().wait_for_replica_count(volume_name, node_name, replica_count) + def wait_for_volume_keep_in_state(self, volume_name, desired_state): self.wait_for_volume_state(volume_name, desired_state) diff --git a/e2e/libs/volume/rest.py b/e2e/libs/volume/rest.py index 3d6a4225a..bec1a8b55 100644 --- a/e2e/libs/volume/rest.py +++ b/e2e/libs/volume/rest.py @@ -231,6 +231,23 @@ def get_replica_name_on_node(self, volume_name, node_name): if r.hostId == node_name: return r.name + def wait_for_replica_count(self, volume_name, node_name, replica_count): + for i in range(self.retry_count): + running_replica_count = 0 + volume = get_longhorn_client().by_id_volume(volume_name) + for r in volume.replicas: + if node_name and r.hostId == node_name and r.running: + running_replica_count += 1 + elif not node_name and r.running: + running_replica_count += 1 + logging(f"Waiting for {replica_count if replica_count else ''} replicas for volume {volume_name} running on {node_name if node_name else 'nodes'}, currently it's {running_replica_count} ... ({i})") + if replica_count and running_replica_count == int(replica_count): + break + elif not replica_count and running_replica_count: + break + time.sleep(self.retry_interval) + return running_replica_count + def wait_for_replica_rebuilding_complete(self, volume_name, node_name=None): completed = False for i in range(self.retry_count): diff --git a/e2e/libs/volume/volume.py b/e2e/libs/volume/volume.py index b039545a4..145430c04 100644 --- a/e2e/libs/volume/volume.py +++ b/e2e/libs/volume/volume.py @@ -125,6 +125,9 @@ def wait_for_replica_running(self, volume_name, node_name): def get_replica_name_on_node(self, volume_name, node_name): return self.volume.get_replica_name_on_node(volume_name, node_name) + def wait_for_replica_count(self, volume_name, node_name, replica_count): + return self.volume.wait_for_replica_count(volume_name, node_name, replica_count) + def wait_for_replica_rebuilding_complete(self, volume_name, node_name=None): return self.volume.wait_for_replica_rebuilding_complete(volume_name, node_name) diff --git a/e2e/tests/regression/test_scheduling.robot b/e2e/tests/regression/test_scheduling.robot index d0f1fdeb4..38aab0caa 100644 --- a/e2e/tests/regression/test_scheduling.robot +++ b/e2e/tests/regression/test_scheduling.robot @@ -10,6 +10,7 @@ Resource ../keywords/deployment.resource Resource ../keywords/persistentvolumeclaim.resource Resource ../keywords/workload.resource Resource ../keywords/k8s.resource +Resource ../keywords/node.resource Test Setup Set test environment Test Teardown Cleanup test resources @@ -51,3 +52,44 @@ Test Soft Anti Affinity Scheduling Then Wait until volume 0 replicas rebuilding completed And Wait for volume 0 healthy And Check volume 0 data is intact + +Test Replica Auto Balance Node Least Effort + [Tags] coretest + [Documentation] Scenario: replica auto-balance nodes with `least_effort` + Given Set setting replica-soft-anti-affinity to true + And Set setting replica-auto-balance to least-effort + + When Disable node 1 scheduling + And Disable node 2 scheduling + And Create volume 0 with numberOfReplicas=6 dataEngine=${DATA_ENGINE} + And Attach volume 0 + And Wait for volume 0 healthy + And Write data to volume 0 + Then Volume 0 should have 6 replicas running on node 0 + And Volume 0 should have 0 replicas running on node 1 + And Volume 0 should have 0 replicas running on node 2 + + When Enable node 1 scheduling + # wait for auto balance + Then Volume 0 should have replicas running on node 1 + And Volume 0 should have 6 replicas running + # loop 3 times with 5-second wait and compare the replica count to: + # ensure no additional scheduling occurs + # the replica count remains unchaged + And Volume 0 should have 5 replicas running on node 0 and no additional scheduling occurs + And Volume 0 should have 1 replicas running on node 1 and no additional scheduling occurs + And Volume 0 should have 0 replicas running on node 2 and no additional scheduling occurs + + When Enable node 2 scheduling + # wait for auto balance + Then Volume 0 should have replicas running on node 2 + And Volume 0 should have 6 replicas running + # loop 3 times with 5-second wait and compare the replica count to: + # ensure no additional scheduling occurs + # the replica count remains unchaged + And Volume 0 should have 4 replicas running on node 0 and no additional scheduling occurs + And Volume 0 should have 1 replicas running on node 1 and no additional scheduling occurs + And Volume 0 should have 1 replicas running on node 2 and no additional scheduling occurs + + And Wait for volume 0 healthy + And Check volume 0 data is intact