Skip to content

Commit

Permalink
Merge branch 'use_upstream_cc_tools' into 'main'
Browse files Browse the repository at this point in the history
Use upstream gpu_cc_tool from nvtrust

See merge request nvidia/cloud-native/k8s-cc-manager!2
  • Loading branch information
shivamerla committed Jul 28, 2023
2 parents a550169 + ec92c28 commit fdef8d0
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 7 deletions.
8 changes: 5 additions & 3 deletions deployments/container/Dockerfile.ubi8
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ ARG GIT_COMMIT="unknown"

RUN go build -o /artifacts/k8s-cc-manager ./cmd
RUN cp ./scripts/cc-manager.sh /artifacts/cc-manager.sh
#RUN cp ./scripts/nvidia-gpu-tools.py /artifacts/nvidia-gpu-tools.py

RUN OS_ARCH=${TARGETARCH/x86_64/amd64} && OS_ARCH=${OS_ARCH/aarch64/arm64} && curl -o /artifacts/kubectl -L "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/${OS_ARCH}/kubectl";
RUN chmod a+x /artifacts/kubectl
Expand All @@ -50,10 +49,13 @@ LABEL description="See summary"
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE

COPY --from=build /artifacts/k8s-cc-manager /usr/bin/k8s-cc-manager
COPY --from=build /artifacts/cc-manager.sh /usr/bin/cc-manager.sh
#COPY --from=build /artifacts/nvidia-gpu-tools.py /usr/bin/nvidia-gpu-tools.py
COPY --from=build /artifacts/kubectl /usr/bin/kubectl

# Setup CC tools and licenses
COPY --from=build /artifacts/cc-manager.sh /usr/bin/cc-manager.sh
RUN curl -o /usr/bin/gpu_cc_tool.py -L https://raw.githubusercontent.com/NVIDIA/nvtrust/main/host_tools/python/gpu_cc_tool.py && chmod a+x /usr/bin/gpu_cc_tool.py
RUN curl -o /licenses/LICENSE.txt -L https://raw.githubusercontent.com/NVIDIA/nvtrust/main/host_tools/python/LICENSE.txt

# Install / upgrade packages here that are required to resolve CVEs
ARG CVE_UPDATES
RUN if [ -n "${CVE_UPDATES}" ]; then \
Expand Down
8 changes: 4 additions & 4 deletions scripts/cc-manager.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ _populate_cc_capable_device_ids() {

_reset_gpu_after_cc_mode() {
local gpu=$1
python3 /usr/bin/nvidia-gpu-tools.py --reset-after-cc-mode-switch --gpu-bdf=$gpu
python3 /usr/bin/gpu_cc_tool.py --reset-after-cc-mode-switch --gpu-bdf=$gpu
if [ $? -ne 0 ]; then
echo "unable to reset gpu $gpu for cc mode switch, output"
return 1
Expand Down Expand Up @@ -149,7 +149,7 @@ _assert_gpu_cc_mode() {
local gpu=$1
local mode=$2

output=$(python3 /usr/bin/nvidia-gpu-tools.py --query-cc-mode --gpu-bdf=$gpu 2>&1)
output=$(python3 /usr/bin/gpu_cc_tool.py --query-cc-mode --gpu-bdf=$gpu 2>&1)
if [ $? -ne 0 ]; then
_exit_failed
fi
Expand Down Expand Up @@ -386,7 +386,7 @@ set_gpu_cc_mode() {
local mode=$CC_MODE

if ! _assert_gpu_cc_mode $gpu $mode; then
output=$(python3 /usr/bin/nvidia-gpu-tools.py --set-cc-mode=$mode --reset-after-cc-mode-switch --gpu-bdf=$gpu 2>&1)
output=$(python3 /usr/bin/gpu_cc_tool.py --set-cc-mode=$mode --reset-after-cc-mode-switch --gpu-bdf=$gpu 2>&1)
if [ $? -ne 0 ]; then
echo "unable to set cc mode of gpu $gpu to $mode, output $output"
return 1
Expand Down Expand Up @@ -434,7 +434,7 @@ get_cc_mode() {

get_gpu_cc_mode() {
local gpu=$1
output=$(python3 /usr/bin/nvidia-gpu-tools.py --query-cc-mode --gpu-bdf=$gpu 2>&1)
output=$(python3 /usr/bin/gpu_cc_tool.py --query-cc-mode --gpu-bdf=$gpu 2>&1)
if [ $? -ne 0 ]; then
echo "unable to get cc mode of gpu $gpu, output $output"
return 1
Expand Down

0 comments on commit fdef8d0

Please sign in to comment.