diff --git a/modules/inference-service/main.tf b/modules/inference-service/main.tf index 44655fc23..d15d608f8 100644 --- a/modules/inference-service/main.tf +++ b/modules/inference-service/main.tf @@ -89,7 +89,7 @@ resource "kubernetes_deployment" "inference_deployment" { } } container { - image = "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310" + image = "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124.2-3.ubuntu2204.py311" name = "mistral-7b-instruct" port { diff --git a/tutorials-and-examples/genAI-LLM/deploying-mistral-7b-instruct-L4gpus/README.md b/tutorials-and-examples/genAI-LLM/deploying-mistral-7b-instruct-L4gpus/README.md index 6362c35a6..f40e5d1f8 100644 --- a/tutorials-and-examples/genAI-LLM/deploying-mistral-7b-instruct-L4gpus/README.md +++ b/tutorials-and-examples/genAI-LLM/deploying-mistral-7b-instruct-L4gpus/README.md @@ -104,7 +104,7 @@ Pod Template: Labels: app=mistral-7b Containers: mistral-7b: - Image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310 + Image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124.2-3.ubuntu2204.py311 Port: 8080/TCP Host Port: 0/TCP Limits: diff --git a/tutorials-and-examples/genAI-LLM/deploying-mistral-7b-instruct-L4gpus/mistral-7b.yaml b/tutorials-and-examples/genAI-LLM/deploying-mistral-7b-instruct-L4gpus/mistral-7b.yaml index 1828472a8..387a155ce 100644 --- a/tutorials-and-examples/genAI-LLM/deploying-mistral-7b-instruct-L4gpus/mistral-7b.yaml +++ b/tutorials-and-examples/genAI-LLM/deploying-mistral-7b-instruct-L4gpus/mistral-7b.yaml @@ -28,7 +28,7 @@ spec: spec: containers: - name: mistral-7b - image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310 + image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124.2-3.ubuntu2204.py311 resources: limits: nvidia.com/gpu: 1 diff --git a/tutorials-and-examples/genAI-LLM/deploying-mixtral-8x7b-instruct-L4-gpus/README.md b/tutorials-and-examples/genAI-LLM/deploying-mixtral-8x7b-instruct-L4-gpus/README.md index 60739ffc6..089740edf 100644 --- a/tutorials-and-examples/genAI-LLM/deploying-mixtral-8x7b-instruct-L4-gpus/README.md +++ b/tutorials-and-examples/genAI-LLM/deploying-mixtral-8x7b-instruct-L4-gpus/README.md @@ -127,7 +127,7 @@ Pod Template: Labels: app=mixtral8x7b Containers: mixtral8x7b: - Image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310 + Image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124.2-3.ubuntu2204.py311 Port: 8080/TCP Host Port: 0/TCP Limits: diff --git a/tutorials-and-examples/genAI-LLM/deploying-mixtral-8x7b-instruct-L4-gpus/mixtral-8x7b.yaml b/tutorials-and-examples/genAI-LLM/deploying-mixtral-8x7b-instruct-L4-gpus/mixtral-8x7b.yaml index 72a7e61d6..46a1c9475 100644 --- a/tutorials-and-examples/genAI-LLM/deploying-mixtral-8x7b-instruct-L4-gpus/mixtral-8x7b.yaml +++ b/tutorials-and-examples/genAI-LLM/deploying-mixtral-8x7b-instruct-L4-gpus/mixtral-8x7b.yaml @@ -30,7 +30,7 @@ spec: cloud.google.com/gke-accelerator: "nvidia-l4" containers: - name: mixtral8x7b - image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310 + image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124.2-3.ubuntu2204.py311 ports: - name: server-port containerPort: 8080 diff --git a/tutorials-and-examples/genAI-LLM/serving-llama2-70b-on-l4-gpus/README.md b/tutorials-and-examples/genAI-LLM/serving-llama2-70b-on-l4-gpus/README.md index 617e4072c..4574a8a70 100644 --- a/tutorials-and-examples/genAI-LLM/serving-llama2-70b-on-l4-gpus/README.md +++ b/tutorials-and-examples/genAI-LLM/serving-llama2-70b-on-l4-gpus/README.md @@ -76,7 +76,7 @@ spec: spec: containers: - name: llama-2-70b - image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310 + image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124.2-3.ubuntu2204.py311 resources: limits: nvidia.com/gpu: 2 diff --git a/tutorials-and-examples/genAI-LLM/serving-llama2-70b-on-l4-gpus/text-generation-interface.yaml b/tutorials-and-examples/genAI-LLM/serving-llama2-70b-on-l4-gpus/text-generation-interface.yaml index a9963a719..462fce210 100644 --- a/tutorials-and-examples/genAI-LLM/serving-llama2-70b-on-l4-gpus/text-generation-interface.yaml +++ b/tutorials-and-examples/genAI-LLM/serving-llama2-70b-on-l4-gpus/text-generation-interface.yaml @@ -28,7 +28,7 @@ spec: spec: containers: - name: llama-2-70b - image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310 + image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124.2-3.ubuntu2204.py311 resources: limits: nvidia.com/gpu: 2