Skip to content

Commit 6a0e4da

Browse files
pauldotyuasw101
andauthored
doc: adding byo models with aks kaito and oss tools (#62)
* doc: adding byo models with aks kaito and oss tools * doc: updating byo models guide * doc: add note about gpu quota * fix: replace newgrp with exec sg docker for user group changes * doc: add Python to the list of included software in README * fix: correct shebang line in install script * docs: add section to reason about the tools * doc: update README to clarify tool stack and GPU requirements for AKS * fix: standardize callout formatting in README * fix: update caution and warning callouts in README for clarity * fix: remove unnecessary comment character from vm_image_offer description * fix: add Python requirement for model inference development in README * fix: reorganize installation steps for kubectl and kubelogin in install.sh * fix: update README for clarity and consistency in code examples and instructions * doc: add reference to ModelCar approach in README for clarity on initContainer usage * fix: update descriptions in variables.tf for clarity and consistency fix: correct path in install.sh for moving kubelogin binary * doc: add community connection section to README for feedback and support * doc: update community connection section in README for improved engagement and support * Tweak machine names, add URLs Signed-off-by: Aaron Wislang <aaron.wislang@microsoft.com> * Split commands, fix whitespace Signed-off-by: Aaron Wislang <aaron.wislang@microsoft.com> * Fix links Signed-off-by: Aaron Wislang <aaron.wislang@microsoft.com> * Add local files for embedded code Signed-off-by: Aaron Wislang <aaron.wislang@microsoft.com> * Update title Signed-off-by: Aaron Wislang <aaron.wislang@microsoft.com> --------- Signed-off-by: Aaron Wislang <aaron.wislang@microsoft.com> Co-authored-by: Aaron Wislang <aaron.wislang@microsoft.com>
1 parent aad17c3 commit 6a0e4da

File tree

13 files changed

+1187
-0
lines changed

13 files changed

+1187
-0
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
FROM alpine:latest
2+
RUN wget https://github.com/jozu-ai/kitops/releases/latest/download/kitops-linux-x86_64.tar.gz && \
3+
tar -xzvf kitops-linux-x86_64.tar.gz && \
4+
mv kit /usr/local/bin/
5+
6+
# Set default values for environment variables
7+
ENV UNPACK_PATH=/home/user/modelkit/
8+
ENV UNPACK_FILTER=model
9+
10+
CMD echo $PASSWORD | kit login $REGISTRY_URL -u $USERNAME --password-stdin && \
11+
kit unpack "$MODELKIT_REF" --dir "$UNPACK_PATH" --filter="$UNPACK_FILTER"
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
manifestVersion: 1.0.0
2+
package:
3+
name: mysmollm2app
4+
authors:
5+
- Paul Yu
6+
description: My project working with HuggingFaceTB/SmolLM2-1.7B-Instruct-16k model
7+
model:
8+
name: model
9+
path: model.safetensors
10+
parts:
11+
- path: training_args.bin
12+
- path: all_results.json
13+
- path: config.json
14+
- path: generation_config.json
15+
- path: merges.txt
16+
- path: special_tokens_map.json
17+
- path: tokenizer.json
18+
- path: tokenizer_config.json
19+
- path: train_results.json
20+
- path: trainer_state.json
21+
- path: vocab.json
22+
docs:
23+
- path: README.md
24+
description: Readme file
25+
code: # Add code spec
26+
- path: src/cog/
27+
description: Source code to run AI model predictions

cloud-native/aks-byo-models-kaito/README.md

Lines changed: 626 additions & 0 deletions
Large diffs are not rendered by default.
47.2 KB
Loading
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from cog import BasePredictor, Input
2+
from transformers import AutoModelForCausalLM, AutoTokenizer
3+
import os
4+
5+
class Predictor(BasePredictor):
6+
def setup(self) -> None:
7+
"""Load the model into memory to make running multiple predictions efficient"""
8+
model_path = os.getenv("MODEL_PATH", "../../") # locally the model is in the root directory
9+
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
10+
self.model = AutoModelForCausalLM.from_pretrained(
11+
model_path,
12+
device_map="auto", # Automatically distributes across available GPUs or uses CPU
13+
trust_remote_code=True
14+
)
15+
16+
def predict(
17+
self,
18+
prompt: str = Input(description="Ask the LLM a question"),
19+
) -> str:
20+
"""Run a single prediction on the model"""
21+
inputs = self.tokenizer(prompt, return_tensors="pt", padding=True).to(self.model.device)
22+
23+
outputs = self.model.generate(
24+
input_ids=inputs.input_ids,
25+
max_length=100,
26+
do_sample=True,
27+
top_p=0.95,
28+
temperature=0.3,
29+
attention_mask=inputs.attention_mask
30+
)
31+
32+
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
33+
return response
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Local .terraform directories
2+
.terraform/
3+
4+
# .tfstate files
5+
*.tfstate
6+
*.tfstate.*
7+
8+
# Crash log files
9+
crash.log
10+
crash.*.log
11+
12+
# Exclude all .tfvars files, which are likely to contain sensitive data, such as
13+
# password, private keys, and other secrets. These should not be part of version
14+
# control as they are data points which are potentially sensitive and subject
15+
# to change depending on the environment.
16+
*.tfvars
17+
*.tfvars.json
18+
19+
# Ignore override files as they are usually used to override resources locally and so
20+
# are not checked in
21+
override.tf
22+
override.tf.json
23+
*_override.tf
24+
*_override.tf.json
25+
26+
# Ignore transient lock info files created by terraform apply
27+
.terraform.tfstate.lock.info
28+
29+
# Include override files you do wish to add to version control using negated pattern
30+
# !example_override.tf
31+
32+
# Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan
33+
# example: *tfplan*
34+
35+
# Ignore CLI configuration files
36+
.terraformrc
37+
terraform.rc
38+
39+
ssh_private_key

cloud-native/aks-byo-models-kaito/workstation/.terraform.lock.hcl

Lines changed: 117 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# Ubuntu Workstation (for BYO model on AKS with KAITO and open-source tools)
2+
3+
This Terraform script provisions an Ubuntu Virtual Machine (VM) in Azure, configured as a workstation for development and testing purposes. The VM is set up with essential software tools that facilitate cloud-native AI development workflows, including Azure CLI, Terraform, Docker, kubectl, KitOps CLI, and Cog CLI. The VM is provisioned using cloud-init to automate the installation of these tools at startup.
4+
5+
This particular VM will include the following software:
6+
7+
- [Azure CLI](https://learn.microsoft.com/cli/azure/install-azure-cli)
8+
- [Terraform](https://developer.hashicorp.com/terraform/install)
9+
- [Docker](https://www.docker.com/get-started/)
10+
- [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl)
11+
- [KitOps CLI](https://kitops.org/docs/cli/installation/)
12+
- [Cog CLI](https://cog.run/getting-started/#install-cog)
13+
- [Python](https://www.python.org/downloads/)
14+
15+
## Prerequisites
16+
17+
To use this template, you will need to have the following software installed on your local machine:
18+
19+
- [Terraform](https://www.terraform.io/downloads.html)
20+
- [Azure CLI](https://docs.microsoft.com/cli/azure/install-azure-cli)
21+
22+
## Provisioning resources
23+
24+
Login to Azure using the Azure CLI with the command below and follow the instructions output to the terminal.
25+
26+
```sh
27+
az login
28+
```
29+
30+
Set the subscription ID for Terraform to use.
31+
32+
```sh
33+
export ARM_SUBSCRIPTION_ID=$(az account show --query id -o tsv)
34+
```
35+
36+
Initialize the Terraform configuration.
37+
38+
```sh
39+
terraform init
40+
```
41+
42+
> [!note]
43+
> The Azure VM SKU is set in a variable named `vm_size` in the `variables.tf` file. This is defaulted to `Standard_D8s_v4`, which is a general-purpose VM with 8 vCPUs and 32 GiB of memory. You can change this to a different SKU based on your requirements. If you are deploying a N-series VM which are NVIDIA GPU-enabled, you will need to install drivers to make use of the GPU. See the [N-series VM documentation](https://learn.microsoft.com/azure/virtual-machines/linux/n-series-driver-setup) for more information on how to install the NVIDIA drivers. You can also install the NVIDIA drivers using the [NVIDIA GPU Driver Extension for Linux](https://learn.microsoft.com/azure/virtual-machines/extensions/hpccompute-gpu-linux).
44+
45+
Run the following command to create the resources. This will prompt you to confirm the changes. Type `yes` to proceed.
46+
47+
```sh
48+
terraform apply
49+
```
50+
51+
## Connecting to VM
52+
53+
An [Azure Network Security Group (NSG)](https://learn.microsoft.com/azure/virtual-network/network-security-groups-overview) is created to allow [SSH](https://www.man7.org/linux/man-pages/man1/ssh.1.html) access to the VM only from the IP address of the machine running the script. To authenticate, a new public/private key pair is generated and stored in Azure. The private key pem file is stored in the current directory and is meant to be ephemeral and will be deleted when as resources get deleted but should still be kept secure.
54+
55+
To SSH into the VM, use the following command:
56+
57+
```bash
58+
ssh -i $(terraform output -raw ssh_private_key) $(terraform output -raw ssh_username)@$(terraform output -raw public_ip)
59+
```
60+
61+
You could also SSH into the VM from VSCode using the [Remote - SSH](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-ssh) extension. To do this, add the following to your SSH config file:
62+
63+
```text
64+
# ~/.ssh/config
65+
Host <replace_this_with_public_ip>
66+
HostName <replace_this_with_public_ip>
67+
User <replace_this_with_ssh_username>
68+
IdentityFile <replace_this_with_path_to_private_key>
69+
```
70+
71+
See this [documentation](https://code.visualstudio.com/docs/remote/ssh) for more information on Remote Development using SSH.
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#cloud-config
2+
# See documentation for more configuration examples
3+
# https://cloudinit.readthedocs.io/en/latest/reference/examples.html
4+
5+
# Install arbitrary packages
6+
# https://cloudinit.readthedocs.io/en/latest/reference/examples.html#install-arbitrary-packages
7+
packages:
8+
- python
9+
- ca-certificates
10+
- unzip
11+
# Run commands on first boot
12+
# https://cloudinit.readthedocs.io/en/latest/reference/examples.html#run-commands-on-first-boot
13+
runcmd:
14+
- [ ls, -l, / ]
15+
- [ sh, -xc, "echo $(date) ': hello world!'" ]
16+
- [ sh, -c, echo "=========hello world=========" ]
17+
- ls -l /root
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/bin/bash
2+
3+
# install azure cli
4+
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
5+
6+
# install terraform
7+
wget -O - https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg
8+
echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list
9+
sudo apt update && sudo apt install terraform
10+
11+
# install kit
12+
wget https://github.com/jozu-ai/kitops/releases/latest/download/kitops-linux-x86_64.tar.gz
13+
tar -xzvf kitops-linux-x86_64.tar.gz
14+
sudo mv kit /usr/local/bin/
15+
16+
# install cog
17+
sudo curl -o /usr/local/bin/cog -L "https://github.com/replicate/cog/releases/latest/download/cog_$(uname -s)_$(uname -m)"
18+
sudo chmod +x /usr/local/bin/cog
19+
20+
# install kubectl
21+
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
22+
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
23+
24+
# install kubelogin
25+
wget https://github.com/Azure/kubelogin/releases/download/v0.2.7/kubelogin-linux-amd64.zip
26+
unzip kubelogin-linux-amd64.zip
27+
sudo mv ./linux_amd64/kubelogin /usr/local/bin/kubelogin
28+
29+
# install docker
30+
sudo install -m 0755 -d /etc/apt/keyrings
31+
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
32+
sudo chmod a+r /etc/apt/keyrings/docker.asc
33+
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
34+
sudo apt-get update
35+
sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin -y
36+
sudo groupadd docker
37+
sudo usermod -aG docker "${current_user}"
38+
newgrp docker

0 commit comments

Comments
 (0)