From 7003793b15ad0ee28bc09d0fceb91eb2d0104961 Mon Sep 17 00:00:00 2001 From: Margaret Qian Date: Mon, 18 Sep 2023 19:39:17 -0700 Subject: [PATCH] Bump images for llama2 and update compute (#436) image and compute --- examples/inference-deployments/llama2/llama2_13b.yaml | 7 +++---- examples/inference-deployments/llama2/llama2_7b_chat.yaml | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/examples/inference-deployments/llama2/llama2_13b.yaml b/examples/inference-deployments/llama2/llama2_13b.yaml index a7fc1745e..a0499711e 100644 --- a/examples/inference-deployments/llama2/llama2_13b.yaml +++ b/examples/inference-deployments/llama2/llama2_13b.yaml @@ -5,9 +5,8 @@ command: |- # Note this command is a workaround until we build vllm into the inf pip uninstall torch -y pip install torch==2.0.1 compute: - gpus: 1 - instance: oci.vm.gpu.a10.1 -image: mosaicml/inference:0.1.37 -cluster: r7z15 + gpus: 2 + instance: oci.vm.gpu.a10.2 +image: mosaicml/inference:0.1.40 default_model: model_type: llama2-13b diff --git a/examples/inference-deployments/llama2/llama2_7b_chat.yaml b/examples/inference-deployments/llama2/llama2_7b_chat.yaml index c9f8047d9..ffd6eb24d 100644 --- a/examples/inference-deployments/llama2/llama2_7b_chat.yaml +++ b/examples/inference-deployments/llama2/llama2_7b_chat.yaml @@ -7,7 +7,6 @@ command: |- # Note this command is a workaround until we build vllm into the inf compute: gpus: 1 instance: oci.vm.gpu.a10.1 -image: mosaicml/inference:0.1.37 -cluster: r7z15 +image: mosaicml/inference:0.1.40 default_model: model_type: llama2-7b-chat