Skip to content

Commit

Permalink
test(robot): migrate test_replica_auto_balance_node_least_effort
Browse files Browse the repository at this point in the history
Signed-off-by: Yang Chiu <[email protected]>
  • Loading branch information
yangchiu committed Nov 14, 2024
1 parent a360760 commit a429022
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 0 deletions.
38 changes: 38 additions & 0 deletions e2e/keywords/volume.resource
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,44 @@ Check volume ${volume_id} replica on node ${node_id} exist
${replica_name} get_replica_name_on_node ${volume_name} ${node_name}
Should Not Be Equal ${replica_name} ${None}

Volume ${volume_id} should have ${expected_replica_count} replicas running
${volume_name} = generate_name_with_suffix volume ${volume_id}
${replica_count} = wait_for_replica_count ${volume_name} node_name= replica_count=${expected_replica_count}

Volume ${volume_id} should have ${expected_replica_count} replicas running on node ${node_id}
${volume_name} = generate_name_with_suffix volume ${volume_id}
${node_name} = get_node_by_index ${node_id}
${replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name} replica_count=${expected_replica_count}
Set Test Variable ${volume_name}
Set Test Variable ${node_name}
Set Test Variable ${replica_count}

Volume ${volume_id} should have replicas running on node ${node_id}
${volume_name} = generate_name_with_suffix volume ${volume_id}
${node_name} = get_node_by_index ${node_id}
${replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name}
Set Test Variable ${volume_name}
Set Test Variable ${node_name}
Set Test Variable ${replica_count}

Volume ${volume_id} should have ${expected_replica_count} replicas running on node ${node_id} and no additional scheduling occurs
Volume ${volume_id} should have ${expected_replica_count} replicas running on node ${node_id}
FOR ${i} IN RANGE 3
Log to console Ensuring there's no additional scheduling for node ${node_name} ... (${i})
${new_replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name} replica_count=${expected_replica_count}
Should Be Equal As Integers ${replica_count} ${new_replica_count}
Sleep 5
END

Volume ${volume_id} should have replicas running on node ${node_id} and no additional scheduling occurs
Volume ${volume_id} should have replicas running on node ${node_id}
FOR ${i} IN RANGE 3
Log to console Ensuring there's no additional scheduling for node ${node_name} ... (${i})
${new_replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name}
Should Be Equal As Integers ${replica_count} ${new_replica_count}
Sleep 5
END

Check volume ${volume_id} data is intact
${volume_name} = generate_name_with_suffix volume ${volume_id}
check_data_checksum ${volume_name}
Expand Down
3 changes: 3 additions & 0 deletions e2e/libs/keywords/volume_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,9 @@ def wait_for_replica_running(self, volume_name, node_name):
def get_replica_name_on_node(self, volume_name, node_name):
return self.volume.get_replica_name_on_node(volume_name, node_name)

def wait_for_replica_count(self, volume_name, node_name=None, replica_count=None):
return self.volume.wait_for_replica_count(volume_name, node_name, replica_count)

def wait_for_replica_rebuilding_to_stop_on_node(self, volume_name, replica_locality):
node_id = self.get_node_id_by_replica_locality(volume_name, replica_locality)
retry_count, retry_interval = get_retry_count_and_interval()
Expand Down
3 changes: 3 additions & 0 deletions e2e/libs/volume/crd.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,9 @@ def is_replica_running(self, volume_name, node_name, is_running):
def get_replica_name_on_node(self, volume_name, node_name):
return Rest().get_replica_name_on_node(volume_name, node_name)

def wait_for_replica_count(self, volume_name, node_name, replica_count):
return Rest().wait_for_replica_count(volume_name, node_name, replica_count)

def wait_for_volume_keep_in_state(self, volume_name, desired_state):
self.wait_for_volume_state(volume_name, desired_state)

Expand Down
17 changes: 17 additions & 0 deletions e2e/libs/volume/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,23 @@ def get_replica_name_on_node(self, volume_name, node_name):
if r.hostId == node_name:
return r.name

def wait_for_replica_count(self, volume_name, node_name, replica_count):
for i in range(self.retry_count):
running_replica_count = 0
volume = get_longhorn_client().by_id_volume(volume_name)
for r in volume.replicas:
if node_name and r.hostId == node_name and r.running:
running_replica_count += 1
elif not node_name and r.running:
running_replica_count += 1
logging(f"Waiting for {replica_count if replica_count else ''} replicas for volume {volume_name} running on {node_name if node_name else 'nodes'}, currently it's {running_replica_count} ... ({i})")
if replica_count and running_replica_count == int(replica_count):
break
elif not replica_count and running_replica_count:
break
time.sleep(self.retry_interval)
return running_replica_count

def wait_for_replica_rebuilding_complete(self, volume_name, node_name=None):
completed = False
for i in range(self.retry_count):
Expand Down
3 changes: 3 additions & 0 deletions e2e/libs/volume/volume.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ def wait_for_replica_running(self, volume_name, node_name):
def get_replica_name_on_node(self, volume_name, node_name):
return self.volume.get_replica_name_on_node(volume_name, node_name)

def wait_for_replica_count(self, volume_name, node_name, replica_count):
return self.volume.wait_for_replica_count(volume_name, node_name, replica_count)

def wait_for_replica_rebuilding_complete(self, volume_name, node_name=None):
return self.volume.wait_for_replica_rebuilding_complete(volume_name, node_name)

Expand Down
42 changes: 42 additions & 0 deletions e2e/tests/regression/test_scheduling.robot
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Resource ../keywords/deployment.resource
Resource ../keywords/persistentvolumeclaim.resource
Resource ../keywords/workload.resource
Resource ../keywords/k8s.resource
Resource ../keywords/node.resource

Test Setup Set test environment
Test Teardown Cleanup test resources
Expand Down Expand Up @@ -51,3 +52,44 @@ Test Soft Anti Affinity Scheduling
Then Wait until volume 0 replicas rebuilding completed
And Wait for volume 0 healthy
And Check volume 0 data is intact

Test Replica Auto Balance Node Least Effort
[Tags] coretest
[Documentation] Scenario: replica auto-balance nodes with `least_effort`
Given Set setting replica-soft-anti-affinity to true
And Set setting replica-auto-balance to least-effort

When Disable node 1 scheduling
And Disable node 2 scheduling
And Create volume 0 with numberOfReplicas=6 dataEngine=${DATA_ENGINE}
And Attach volume 0
And Wait for volume 0 healthy
And Write data to volume 0
Then Volume 0 should have 6 replicas running on node 0
And Volume 0 should have 0 replicas running on node 1
And Volume 0 should have 0 replicas running on node 2

When Enable node 1 scheduling
# wait for auto balance
Then Volume 0 should have replicas running on node 1
And Volume 0 should have 6 replicas running
# loop 3 times with 5-second wait and compare the replica count to:
# ensure no additional scheduling occurs
# the replica count remains unchaged

Check failure on line 78 in e2e/tests/regression/test_scheduling.robot

View workflow job for this annotation

GitHub Actions / codespell

unchaged ==> unchanged
And Volume 0 should have 5 replicas running on node 0 and no additional scheduling occurs
And Volume 0 should have 1 replicas running on node 1 and no additional scheduling occurs
And Volume 0 should have 0 replicas running on node 2 and no additional scheduling occurs

When Enable node 2 scheduling
# wait for auto balance
Then Volume 0 should have replicas running on node 2
And Volume 0 should have 6 replicas running
# loop 3 times with 5-second wait and compare the replica count to:
# ensure no additional scheduling occurs
# the replica count remains unchaged

Check failure on line 89 in e2e/tests/regression/test_scheduling.robot

View workflow job for this annotation

GitHub Actions / codespell

unchaged ==> unchanged
And Volume 0 should have 4 replicas running on node 0 and no additional scheduling occurs
And Volume 0 should have 1 replicas running on node 1 and no additional scheduling occurs
And Volume 0 should have 1 replicas running on node 2 and no additional scheduling occurs

And Wait for volume 0 healthy
And Check volume 0 data is intact

0 comments on commit a429022

Please sign in to comment.