Skip to content

Commit

Permalink
victor: image choices and gpu machine
Browse files Browse the repository at this point in the history
  • Loading branch information
consideRatio committed Sep 24, 2024
1 parent fc611c5 commit 5e30140
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 0 deletions.
33 changes: 33 additions & 0 deletions config/clusters/victor/common.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,27 +63,60 @@ basehub:
mem_guarantee: 6.5G
node_selector:
node.kubernetes.io/instance-type: m5.large
profile_options: &profile_options
image:
display_name: Image
choices:
victor-notebook:
display_name: Victor Notebook
default: true
kubespawner_override:
image: quay.io/volcanocyber/victor-notebook:a045ad3616d1
pytorch-notebook:
display_name: Pangeo ML Notebook (Pytorch)
kubespawner_override:
image: "quay.io/pangeo/pytorch-notebook:2024.09.11"
ml-notebook:
display_name: Pangeo ML Notebook (Tensorflow)
kubespawner_override:
image: "quay.io/pangeo/ml-notebook:2024.09.11"
- display_name: "Medium: m5.xlarge"
description: "~4 CPU, ~15G RAM"
kubespawner_override:
mem_limit: 15G
mem_guarantee: 12G
node_selector:
node.kubernetes.io/instance-type: m5.xlarge
profile_options: *profile_options
- display_name: "Large: m5.2xlarge"
description: "~8 CPU, ~30G RAM"
kubespawner_override:
mem_limit: 30G
mem_guarantee: 25G
node_selector:
node.kubernetes.io/instance-type: m5.2xlarge
profile_options: *profile_options
- display_name: "Huge: m5.8xlarge"
description: "~16 CPU, ~60G RAM"
kubespawner_override:
mem_limit: 60G
mem_guarantee: 50G
node_selector:
node.kubernetes.io/instance-type: m5.8xlarge
profile_options: *profile_options
- display_name: NVIDIA Tesla T4, ~16 GB, ~4 CPUs
description: "Start a container on a dedicated node with a GPU"
slug: "gpu"
kubespawner_override:
environment:
NVIDIA_DRIVER_CAPABILITIES: compute,utility
mem_limit: null
mem_guarantee: 14G
node_selector:
node.kubernetes.io/instance-type: g4dn.xlarge
extra_resource_limits:
nvidia.com/gpu: "1"
profile_options: *profile_options
defaultUrl: /lab
scheduling:
userScheduler:
Expand Down
12 changes: 12 additions & 0 deletions eksctl/victor.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,18 @@ local notebookNodes = [
{ instanceType: "r5.xlarge" },
{ instanceType: "r5.4xlarge" },
{ instanceType: "r5.16xlarge" },
{
instanceType: "g4dn.xlarge",
tags+: {
"k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1"
},
taints+: {
"nvidia.com/gpu": "present:NoSchedule"
},
// Allow provisioning GPUs across all AZs, to prevent situation where all
// GPUs in a single AZ are in use and no new nodes can be spawned
availabilityZones: masterAzs,
},
];

local daskNodes = [
Expand Down

0 comments on commit 5e30140

Please sign in to comment.