Skip to content

Commit

Permalink
[RayJob][Feature] test light weight job submitter in kuberay image
Browse files Browse the repository at this point in the history
Signed-off-by: Rueian <[email protected]>
  • Loading branch information
rueian committed Nov 30, 2024
1 parent 509556f commit 9483127
Show file tree
Hide file tree
Showing 8 changed files with 354 additions and 153 deletions.
47 changes: 47 additions & 0 deletions .github/workflows/e2e-tests-ray-job-submitter.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: e2e-ray-job-submitter

on:
pull_request:
branches:
- master
- 'release-*'
push:
branches:
- master
- 'release-*'

concurrency:
group: ${{ github.head_ref }}-${{ github.workflow }}
cancel-in-progress: true

jobs:
ray-job-submitter:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
ray-version: [ '2.39.0' ]
go-version: [ '1.22.0' ]
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
submodules: recursive

- name: Set up Go
uses: actions/setup-go@v3
with:
go-version: ${{ matrix.go-version }}

- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.x'

- name: Install Ray
run: pip install ray[default]==${{ matrix.ray-version }}

- name: Run e2e tests
run: |
cd ray-operator
go test -timeout 30m -v ./test/e2erayjobsubmitter
4 changes: 2 additions & 2 deletions ray-operator/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ COPY main.go main.go
COPY apis/ apis/
COPY controllers/ controllers/
COPY pkg/features pkg/features
COPY rayjob-submitter/ rayjob-submitter/
COPY rayjobsubmitter/ rayjobsubmitter/

# Build
USER root
RUN CGO_ENABLED=1 GOOS=linux go build -tags strictfipsruntime -a -o manager main.go
RUN CGO_ENABLED=1 GOOS=linux go build -tags strictfipsruntime -a -o submitter rayjob-submitter/main.go
RUN CGO_ENABLED=1 GOOS=linux go build -tags strictfipsruntime -a -o submitter ./rayjobsubmitter/cmd/main.go

FROM gcr.io/distroless/base-debian12:nonroot
WORKDIR /
Expand Down
4 changes: 4 additions & 0 deletions ray-operator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ test-sampleyaml: WHAT ?= ./test/sampleyaml
test-sampleyaml: manifests fmt vet
go test -timeout 30m -v $(WHAT)

test-e2erayjobsubmitter: WHAT ?= ./test/e2erayjobsubmitter
test-e2erayjobsubmitter: fmt vet
go test -timeout 30m -v $(WHAT)

sync: helm api-docs
./hack/update-codegen.sh

Expand Down
151 changes: 0 additions & 151 deletions ray-operator/rayjob-submitter/main.go

This file was deleted.

21 changes: 21 additions & 0 deletions ray-operator/rayjobsubmitter/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Ray Job Submitter

This is a Go Ray Job Submitter for KubeRay to submit a Ray Job
and tail its logs without installing Ray which is very large.

Note that this tool is designed specifically for KubeRay and
will not support some `ray job submit` features that people
don't use with KubeRay, for example, uploading local files to
a Ray cluster will not be supported by this tool.

## Testing

Tests are located at [../test/e2erayjobsubmitter](../test/e2erayjobsubmitter).

As the e2e suggests, you need to have `ray` installed for these tests
because they need to start a real Ray Head. You can run the tests with:

```sh
make test-e2erayjobsubmitter
```
or GitHub Action: [../../.github/workflows/e2e-tests-ray-job-submitter.yaml](../../.github/workflows/e2e-tests-ray-job-submitter.yaml)
61 changes: 61 additions & 0 deletions ray-operator/rayjobsubmitter/cmd/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package main

import (
"encoding/json"
"os"
"strings"

flag "github.com/spf13/pflag"

"github.com/ray-project/kuberay/ray-operator/controllers/ray/utils"
"github.com/ray-project/kuberay/ray-operator/rayjobsubmitter"
)

func main() {
var (
runtimeEnvJson string
metadataJson string
entrypointResources string
entrypointNumCpus float32
entrypointNumGpus float32
)

flag.StringVar(&runtimeEnvJson, "runtime-env-json", "", "")
flag.StringVar(&metadataJson, "metadata-json", "", "")
flag.StringVar(&entrypointResources, "entrypoint-resources", "", "")
flag.Float32Var(&entrypointNumCpus, "entrypoint-num-cpus", 0.0, "")
flag.Float32Var(&entrypointNumGpus, "entrypoint-num-gpus", 0.0, "")
flag.Parse()

address := os.Getenv("RAY_DASHBOARD_ADDRESS")
if address == "" {
panic("Missing RAY_DASHBOARD_ADDRESS")
}
submissionId := os.Getenv("RAY_JOB_SUBMISSION_ID")
if submissionId == "" {
panic("Missing RAY_JOB_SUBMISSION_ID")
}

req := utils.RayJobRequest{
Entrypoint: strings.Join(flag.Args(), " "),
SubmissionId: submissionId,
NumCpus: entrypointNumCpus,
NumGpus: entrypointNumGpus,
}
if len(runtimeEnvJson) > 0 {
if err := json.Unmarshal([]byte(runtimeEnvJson), &req.RuntimeEnv); err != nil {
panic(err)
}
}
if len(metadataJson) > 0 {
if err := json.Unmarshal([]byte(metadataJson), &req.Metadata); err != nil {
panic(err)
}
}
if len(entrypointResources) > 0 {
if err := json.Unmarshal([]byte(entrypointResources), &req.Resources); err != nil {
panic(err)
}
}
rayjobsubmitter.Submit(address, req, os.Stdout)
}
Loading

0 comments on commit 9483127

Please sign in to comment.