Skip to content

Commit

Permalink
chore: set RAY_gcs_rpc_server_reconnect_timeout_s to reduce wait for …
Browse files Browse the repository at this point in the history
…gcs to be kill
  • Loading branch information
CheyuWu committed Dec 2, 2024
1 parent 4819287 commit c381c31
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
2 changes: 1 addition & 1 deletion ray-operator/test/e2e/rayservice_ha_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ func TestRayServiceGCSFaultTolerance(t *testing.T) {
// Kill gcs server
ExecPodCmd(test, noOpsHeadPod, common.RayHeadContainer, []string{"pkill", "gcs_server"})
// wait for head pod not to be ready
g.Eventually(HeadPod(test, rayServiceUnderlyingRayCluster), TestTimeoutMedium).Should(WithTransform(sampleyaml.IsPodRunningAndReady, BeFalse()))
g.Eventually(HeadPod(test, rayServiceUnderlyingRayCluster), TestTimeoutShort).Should(WithTransform(sampleyaml.IsPodRunningAndReady, BeFalse()))

go func() {
// wait for head pod to be ready
Expand Down
5 changes: 4 additions & 1 deletion ray-operator/test/e2e/testdata/rayservice.ft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,17 @@ spec:
ray_actor_options:
num_cpus: 1
rayClusterConfig:
rayVersion: '2.9.0'
rayVersion: "2.9.0"
headGroupSpec:
rayStartParams: {}
template:
spec:
containers:
- name: ray-head
image: rayproject/ray:2.9.0
env:
- name: RAY_gcs_rpc_server_reconnect_timeout_s
value: "20"
resources:
requests:
cpu: 300m
Expand Down

0 comments on commit c381c31

Please sign in to comment.