# HAMi GPU工作负载示例配置
# 展示HAMi的GPU切分和共享能力
# 参考: https://github.com/Project-HAMi/HAMi
# 官方示例: https://github.com/Project-HAMi/HAMi/tree/master/examples/nvidia
# 文档: https://project-hami.io/docs/

---
# HAMi GPU切分示例 - 单个GPU切分为多个虚拟GPU
apiVersion: v1
kind: Pod
metadata:
  name: hami-gpu-slice-example-1
  labels:
    app: hami-slice-demo
    instance: "1"
spec:
  containers:
    - name: gpu-workload
      image: nvidia/cuda:12.2-runtime-ubuntu20.04
      command: ["nvidia-smi"]
      resources:
        limits:
          nvidia.com/gpu: "1" # 声明需要的物理GPU数量
          nvidia.com/gpumem: "2048" # 2GB GPU内存
          nvidia.com/gpucores: "30" # 30% GPU计算资源
          memory: "1Gi"
          cpu: "500m"
        requests:
          memory: "1Gi"
          cpu: "500m"
  restartPolicy: Never
  tolerations:
    - operator: Exists
      effect: NoSchedule

# HAMi 使用说明和最佳实践:
# 1. 节点标签要求: kubectl label nodes {node-name} gpu=on
# 2. 资源声明: nvidia.com/gpu(物理GPU数量), nvidia.com/gpumem(内存MB), nvidia.com/gpucores(计算资源%)
# 3. 重要注解: nvidia.com/use-gputype, nvidia.com/priority, hami.io/gpu-scheduler-policy等
# 4. 环境变量: GPU_CORE_UTILIZATION_POLICY=force, TF_FORCE_GPU_ALLOW_GROWTH=true
# 5. 支持设备: NVIDIA GPU, Cambricon MLU, HYGON DCU, Iluvatar CoreX等

---
# HAMi GPU切分示例 - 另一个切片
apiVersion: v1
kind: Pod
metadata:
  name: hami-gpu-slice-example-2
  labels:
    app: hami-slice-demo
    instance: "2"
spec:
  containers:
    - name: gpu-workload
      image: nvidia/cuda:12.2-runtime-ubuntu20.04
      command: ["nvidia-smi"]
      resources:
        limits:
          nvidia.com/gpu: "1" # 声明需要的物理GPU数量
          nvidia.com/gpumem: "3072" # 3GB GPU内存
          nvidia.com/gpucores: "50" # 50% GPU计算资源
          memory: "1Gi"
          cpu: "500m"
        requests:
          memory: "1Gi"
          cpu: "500m"
  restartPolicy: Never
  tolerations:
    - operator: Exists
      effect: NoSchedule

---
# HAMi多GPU工作负载示例
apiVersion: v1
kind: Pod
metadata:
  name: hami-multi-gpu-example
  labels:
    app: hami-multi-gpu
spec:
  containers:
    - name: multi-gpu-workload
      image: pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
      command: ["python", "-c"]
      args:
        - |
          import torch
          print(f"PyTorch version: {torch.__version__}")
          print(f"CUDA available: {torch.cuda.is_available()}")
          print(f"CUDA device count: {torch.cuda.device_count()}")

          for i in range(torch.cuda.device_count()):
            print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
            print(f"GPU {i} Memory: {torch.cuda.get_device_properties(i).total_memory / 1024**3:.2f} GB")

          # 测试多GPU计算
          if torch.cuda.device_count() > 1:
            print("Testing multi-GPU computation...")
            x = torch.randn(1000, 1000).cuda(0)
            y = torch.randn(1000, 1000).cuda(1)
            print("Multi-GPU tensors created successfully")
      resources:
        limits:
          nvidia.com/gpu: "2" # 声明需要的物理GPU数量
          nvidia.com/gpumem: "8192" # 总共8GB GPU内存
          nvidia.com/gpucores: "80" # 总共80% GPU计算资源
          memory: "4Gi"
          cpu: "2"
        requests:
          memory: "4Gi"
          cpu: "2"
  restartPolicy: Never
  tolerations:
    - operator: Exists
      effect: NoSchedule

---
# HAMi GPU共享训练任务
apiVersion: apps/v1
kind: Deployment
metadata:
  name: hami-shared-training
  labels:
    app: hami-shared-training
spec:
  replicas: 3
  selector:
    matchLabels:
      app: hami-shared-training
  template:
    metadata:
      labels:
        app: hami-shared-training
    spec:
      containers:
        - name: training-worker
          image: tensorflow/tensorflow:2.13.0-gpu
          command: ["python", "-c"]
          args:
            - |
              import tensorflow as tf
              import time
              import os

              worker_id = os.environ.get('HOSTNAME', 'unknown')
              print(f"Worker {worker_id} starting...")
              print(f"TensorFlow version: {tf.__version__}")
              print(f"GPU devices: {tf.config.list_physical_devices('GPU')}")

              # 配置GPU内存增长
              gpus = tf.config.experimental.list_physical_devices('GPU')
              if gpus:
                try:
                  for gpu in gpus:
                    tf.config.experimental.set_memory_growth(gpu, True)
                except RuntimeError as e:
                  print(e)

              # 简单的训练任务
              with tf.device('/GPU:0'):
                # 创建数据
                X = tf.random.normal([500, 20])
                y = tf.random.normal([500, 1])
                
                # 创建模型
                model = tf.keras.Sequential([
                  tf.keras.layers.Dense(32, activation='relu', input_shape=(20,)),
                  tf.keras.layers.Dense(16, activation='relu'),
                  tf.keras.layers.Dense(1)
                ])
                
                model.compile(optimizer='adam', loss='mse')
                
                # 训练
                print(f"Worker {worker_id} starting training...")
                for epoch in range(20):
                  history = model.fit(X, y, epochs=1, batch_size=16, verbose=0)
                  print(f"Worker {worker_id} - Epoch {epoch+1}/20, Loss: {history.history['loss'][0]:.4f}")
                  time.sleep(2)  # 模拟训练时间
                
                print(f"Worker {worker_id} training completed!")
          resources:
            limits:
              nvidia.com/gpu: "1" # 声明需要的物理GPU数量
              nvidia.com/gpumem: "2048" # 每个副本2GB GPU内存
              nvidia.com/gpucores: "25" # 每个副本25% GPU计算资源
              memory: "1Gi"
              cpu: "500m"
            requests:
              memory: "1Gi"
              cpu: "500m"
          env:
            - name: TF_FORCE_GPU_ALLOW_GROWTH
              value: "true"
            - name: CUDA_VISIBLE_DEVICES
              value: "0"
            - name: GPU_CORE_UTILIZATION_POLICY
              value: "force" # 强制GPU核心利用率限制
      tolerations:
        - operator: Exists
          effect: NoSchedule

---
# HAMi GPU推理服务
apiVersion: apps/v1
kind: Deployment
metadata:
  name: hami-inference-service
  labels:
    app: hami-inference
spec:
  replicas: 2
  selector:
    matchLabels:
      app: hami-inference
  template:
    metadata:
      labels:
        app: hami-inference
    spec:
      containers:
        - name: inference-server
          image: pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
          command: ["python", "-c"]
          args:
            - |
              import torch
              import torch.nn as nn
              import time
              import json
              from http.server import HTTPServer, BaseHTTPRequestHandler
              import threading

              # 简单的推理模型
              class SimpleModel(nn.Module):
                def __init__(self):
                  super(SimpleModel, self).__init__()
                  self.fc1 = nn.Linear(10, 32)
                  self.fc2 = nn.Linear(32, 16)
                  self.fc3 = nn.Linear(16, 1)
                  self.relu = nn.ReLU()
                  
                def forward(self, x):
                  x = self.relu(self.fc1(x))
                  x = self.relu(self.fc2(x))
                  x = self.fc3(x)
                  return x

              # 初始化模型
              device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
              model = SimpleModel().to(device)
              model.eval()

              print(f"Model loaded on device: {device}")

              # HTTP服务器
              class InferenceHandler(BaseHTTPRequestHandler):
                def do_POST(self):
                  if self.path == '/predict':
                    content_length = int(self.headers['Content-Length'])
                    post_data = self.rfile.read(content_length)
                    
                    try:
                      data = json.loads(post_data.decode('utf-8'))
                      input_tensor = torch.tensor(data['input'], dtype=torch.float32).to(device)
                      
                      with torch.no_grad():
                        output = model(input_tensor)
                      
                      result = {'prediction': output.cpu().numpy().tolist()}
                      
                      self.send_response(200)
                      self.send_header('Content-type', 'application/json')
                      self.end_headers()
                      self.wfile.write(json.dumps(result).encode('utf-8'))
                    except Exception as e:
                      self.send_response(500)
                      self.send_header('Content-type', 'application/json')
                      self.end_headers()
                      self.wfile.write(json.dumps({'error': str(e)}).encode('utf-8'))
                  else:
                    self.send_response(404)
                    self.end_headers()
                
                def do_GET(self):
                  if self.path == '/health':
                    self.send_response(200)
                    self.send_header('Content-type', 'application/json')
                    self.end_headers()
                    self.wfile.write(json.dumps({'status': 'healthy'}).encode('utf-8'))
                  else:
                    self.send_response(404)
                    self.end_headers()

              # 启动服务器
              server = HTTPServer(('0.0.0.0', 8080), InferenceHandler)
              print("Inference server starting on port 8080...")
              server.serve_forever()
          ports:
            - containerPort: 8080
              name: http
          resources:
            limits:
              nvidia.com/gpu: "1" # 声明需要的物理GPU数量
              nvidia.com/gpumem: "1536" # 1.5GB GPU内存
              nvidia.com/gpucores: "20" # 20% GPU计算资源
              memory: "1Gi"
              cpu: "500m"
            requests:
              memory: "1Gi"
              cpu: "500m"
          readinessProbe:
            httpGet:
              path: /health
              port: 8080
            initialDelaySeconds: 30
            periodSeconds: 10
          livenessProbe:
            httpGet:
              path: /health
              port: 8080
            initialDelaySeconds: 60
            periodSeconds: 30
      tolerations:
        - operator: Exists
          effect: NoSchedule

---
# HAMi推理服务Service
apiVersion: v1
kind: Service
metadata:
  name: hami-inference-service
  labels:
    app: hami-inference
spec:
  selector:
    app: hami-inference
  ports:
    - name: http
      port: 80
      targetPort: 8080
      protocol: TCP
  type: ClusterIP

---
# HAMi GPU资源配额
apiVersion: v1
kind: ResourceQuota
metadata:
  name: hami-gpu-quota
  namespace: default
spec:
  hard:
    nvidia.com/gpu: "10" # 最多10个GPU
    nvidia.com/gpumem: "40960" # 最多40GB GPU内存
    nvidia.com/gpucores: "800" # 最多800% GPU计算资源
    requests.memory: "50Gi" # 最多50GB系统内存
    requests.cpu: "20" # 最多20个CPU核心
    pods: "50" # 最多50个Pod

---
# HAMi GPU节点标签示例
# 使用以下命令为GPU节点添加标签:
# kubectl label nodes {node-name} gpu=on
#
# HAMi GPU设备指定示例
# 可以通过注解指定使用特定的GPU设备:
apiVersion: v1
kind: Pod
metadata:
  name: hami-gpu-device-specific
  annotations:
    nvidia.com/use-gputype: "GeForce-RTX-3090" # 指定GPU类型
    nvidia.com/use-gpuuuid: "GPU-12345678" # 指定GPU UUID
  labels:
    app: hami-device-specific
spec:
  containers:
    - name: gpu-workload
      image: nvidia/cuda:12.2-runtime-ubuntu20.04
      command: ["nvidia-smi"]
      resources:
        limits:
          nvidia.com/gpu: "1"
          nvidia.com/gpumem: "4096"
          nvidia.com/gpucores: "50"
          memory: "2Gi"
          cpu: "1"
        requests:
          memory: "2Gi"
          cpu: "1"
  restartPolicy: Never
  tolerations:
    - operator: Exists
      effect: NoSchedule

---
# HAMi GPU内存百分比分配示例
apiVersion: v1
kind: Pod
metadata:
  name: hami-gpu-memory-percentage
  labels:
    app: hami-memory-percentage
spec:
  containers:
    - name: gpu-workload
      image: nvidia/cuda:12.2-runtime-ubuntu20.04
      command: ["nvidia-smi"]
      resources:
        limits:
          nvidia.com/gpu: "1"
          nvidia.com/gpumem-percentage: "50" # 使用50%的GPU内存
          nvidia.com/gpucores: "30"
          memory: "1Gi"
          cpu: "500m"
        requests:
          memory: "1Gi"
          cpu: "500m"
  restartPolicy: Never
  tolerations:
    - operator: Exists
      effect: NoSchedule

---
# HAMi GPU优先级示例
# nvidia.com/priority: 0 表示高优先级，1 表示低优先级（默认）
apiVersion: v1
kind: Pod
metadata:
  name: hami-high-priority-task
  annotations:
    nvidia.com/priority: "0" # 高优先级任务
  labels:
    app: hami-priority-demo
spec:
  containers:
    - name: high-priority-workload
      image: nvidia/cuda:12.2-runtime-ubuntu20.04
      command: ["nvidia-smi"]
      resources:
        limits:
          nvidia.com/gpu: "1"
          nvidia.com/gpumem: "2048"
          nvidia.com/gpucores: "50"
          memory: "1Gi"
          cpu: "500m"
        requests:
          memory: "1Gi"
          cpu: "500m"
  restartPolicy: Never
  tolerations:
    - operator: Exists
      effect: NoSchedule

---
# HAMi 调度策略示例
# 支持 binpack 和 spread 策略
apiVersion: v1
kind: Pod
metadata:
  name: hami-binpack-scheduling
  annotations:
    hami.io/gpu-scheduler-policy: "binpack" # 紧凑调度策略
  labels:
    app: hami-scheduling-demo
spec:
  containers:
    - name: binpack-workload
      image: nvidia/cuda:12.2-runtime-ubuntu20.04
      command: ["nvidia-smi"]
      resources:
        limits:
          nvidia.com/gpu: "1"
          nvidia.com/gpumem: "1024"
          nvidia.com/gpucores: "25"
          memory: "1Gi"
          cpu: "500m"
        requests:
          memory: "1Gi"
          cpu: "500m"
  restartPolicy: Never
  tolerations:
    - operator: Exists
      effect: NoSchedule

---
# HAMi MIG 动态切片示例
# 适用于支持MIG的GPU（如A100）
apiVersion: v1
kind: Pod
metadata:
  name: hami-dynamic-mig-example
  annotations:
    nvidia.com/vgpu-mode: "mig" # 启用MIG模式
    hami.io/gpu-scheduler-policy: "binpack"
  labels:
    app: hami-mig-demo
spec:
  containers:
    - name: mig-workload
      image: nvidia/cuda:12.2-runtime-ubuntu20.04
      command: ["nvidia-smi"]
      resources:
        limits:
          nvidia.com/gpu: "2" # 请求2个MIG实例
          nvidia.com/gpumem: "8000" # 8GB GPU内存
          memory: "2Gi"
          cpu: "1"
        requests:
          memory: "2Gi"
          cpu: "1"
  restartPolicy: Never
  tolerations:
    - operator: Exists
      effect: NoSchedule

---
# HAMi GPU独占使用示例
# 当需要独占整个GPU时使用
apiVersion: v1
kind: Pod
metadata:
  name: hami-exclusive-gpu
  labels:
    app: hami-exclusive-demo
spec:
  containers:
    - name: exclusive-workload
      image: pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
      command: ["python", "-c"]
      args:
        - |
          import torch
          print(f"CUDA available: {torch.cuda.is_available()}")
          print(f"GPU count: {torch.cuda.device_count()}")
          if torch.cuda.is_available():
            for i in range(torch.cuda.device_count()):
              print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
              print(f"GPU {i} Memory: {torch.cuda.get_device_properties(i).total_memory / 1024**3:.2f} GB")
      resources:
        limits:
          nvidia.com/gpu: "1" # 独占1个完整GPU
          memory: "8Gi"
          cpu: "4"
        requests:
          memory: "8Gi"
          cpu: "4"
  restartPolicy: Never
  tolerations:
    - operator: Exists
      effect: NoSchedule
