Skip to content

Commit

Permalink
Support inf request rate and number prompts in LPG (GoogleCloudPlatfo…
Browse files Browse the repository at this point in the history
…rm#847)

* minor updates to lpg and checkpoint conversion

* Support inf request rate and number prompts
  • Loading branch information
vivianrwu authored Oct 15, 2024
1 parent b0588cc commit 4fdcca6
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,14 @@ for request_rate in $(echo $REQUEST_RATES | tr ',' ' '); do
# TODO: Check if profile already exists, if so then skip
timestamp=$(date +"%Y-%m-%d_%H-%M-%S")
output_file="latency-profile-${timestamp}.txt"
PYTHON_OPTS="$PYTHON_OPTS --save-json-results --host=$IP --port=$PORT --model=$TOKENIZER --dataset=$PROMPT_DATASET_FILE --tokenizer=$TOKENIZER --request-rate=$request_rate --backend=$BACKEND --num-prompts=$((request_rate * $BENCHMARK_TIME_SECONDS)) --max-input-length=$INPUT_LENGTH --max-output-length=$OUTPUT_LENGTH"
if [ ${request_rate} == 0 ]; then
request_rate="inf"
NUM_PROMPTS=$MAX_NUM_PROMPTS
else
NUM_PROMPTS=$((${request_rate} * $BENCHMARK_TIME_SECONDS))
fi

PYTHON_OPTS="$PYTHON_OPTS --save-json-results --host=$IP --port=$PORT --model=$TOKENIZER --dataset=$PROMPT_DATASET_FILE --tokenizer=$TOKENIZER --request-rate=$request_rate --backend=$BACKEND --num-prompts=$NUM_PROMPTS --max-input-length=$INPUT_LENGTH --max-output-length=$OUTPUT_LENGTH"
if [[ "$SCRAPE_SERVER_METRICS" = "true" ]]; then
PYTHON_OPTS="$PYTHON_OPTS --scrape-server-metrics"
fi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ spec:
value: ${output_bucket}
- name: SCRAPE_SERVER_METRICS
value: ${scrape_server_metrics}
- name: MAX_NUM_PROMPTS
value: ${max_num_prompts}
%{ for hugging_face_token_secret in hugging_face_token_secret_list ~}
- name: HF_TOKEN
valueFrom:
Expand Down

0 comments on commit 4fdcca6

Please sign in to comment.