Deployment e CI/CD
Guida completa per deployment e configurazione pipeline CI/CD per l'ecosistema Emblema, coprendo Docker, Kubernetes, e cloud deployment.
π³ Containerizzazione Dockerβ
1. Multi-stage Docker Buildβ
Frontend (Next.js)β
# apps/www-emblema/Dockerfile
FROM node:20-alpine AS base
ENV APP_NAME=www-emblema
# Install base tools
RUN apk update && apk add --no-cache curl bind-tools
FROM base AS builder
RUN apk add --no-cache libc6-compat
WORKDIR /app
# Install turbo globally
RUN yarn global add turbo
COPY . .
RUN turbo telemetry disable
# Prune workspace for specific app
RUN turbo prune ${APP_NAME} --docker
COPY .env /app/out/full/.env
FROM base AS installer
RUN apk add --no-cache libc6-compat
WORKDIR /app
# Install dependencies
COPY --from=builder /app/out/json/ .
RUN npm i -g corepack@latest && corepack enable pnpm
RUN pnpm i --frozen-lockfile
# Build the project
COPY --from=builder /app/out/full/ .
RUN pnpm turbo build --filter=${APP_NAME}...
FROM base AS runner
WORKDIR /app
# Security: Don't run as root
RUN addgroup --system --gid 1001 nodejs
RUN adduser --system --uid 1001 nextjs
USER nextjs
# Copy built application
COPY --from=installer --chown=nextjs:nodejs /app/apps/${APP_NAME}/.next/standalone ./
COPY --from=installer --chown=nextjs:nodejs /app/apps/${APP_NAME}/.next/static ./apps/${APP_NAME}/.next/static
COPY --from=installer --chown=nextjs:nodejs /app/apps/${APP_NAME}/public ./apps/${APP_NAME}/public
EXPOSE 3000
ENV PORT=3000
ENV HOSTNAME="0.0.0.0"
CMD ["node", "apps/www-emblema/server.js"]
Backend Python (FastAPI)β
# apps/background-task/Dockerfile
FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04 AS base
ENV APP_PATH=apps/background-task
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
libgomp1 \
libmagic1 \
poppler-utils \
libreoffice \
pandoc \
tesseract-ocr \
ffmpeg \
imagemagick \
# Playwright dependencies
libnss3 libnspr4 libatk1.0-0 libatk-bridge2.0-0 \
libcups2 libdrm2 libxkbcommon0 libatspi2.0-0 \
libxcomposite1 libxdamage1 libxfixes3 libxrandr2 \
libgbm1 libasound2 \
fonts-liberation fonts-noto-cjk fonts-dejavu-core \
&& rm -rf /var/lib/apt/lists/*
FROM base AS deps
# Install uv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
# Copy dependency files
COPY ${APP_PATH}/pyproject.toml ${APP_PATH}/uv.lock /app/
WORKDIR /app
# Install dependencies
RUN uv sync --frozen --no-cache
# Install Playwright browsers
RUN /app/.venv/bin/playwright install chromium
FROM base
# Copy virtual environment and browsers
COPY --from=deps /app/.venv /app/.venv
COPY --from=deps /root/.cache/ms-playwright /root/.cache/ms-playwright
# Copy application code
COPY ${APP_PATH}/app /app/app
WORKDIR /app
# Set PATH to include virtual environment
ENV PATH="/app/.venv/bin:$PATH"
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
CMD curl -f http://localhost:80/health || exit 1
CMD ["fastapi", "run", "app/main.py", "--port", "80"]
2. Docker Compose per Sviluppoβ
Compose Base Structureβ
# docker-compose.yaml
include:
- path:
- ./docker-compose-base.yaml
- ./docker-compose-data-source.yaml
- ./docker-compose-auth.yaml
- ./docker-compose-llm.yaml
- ./docker-compose-monitoring.yaml
- ./docker-compose-background-task.yaml
env_file:
- ./.env
x-logging: &default-logging
driver: "json-file"
options:
max-size: "${MAX_LOG_SIZE:-1g}"
max-file: "3"
services:
www-emblema:
image: emblema/www:${EMBLEMA_VERSION:-dev}
build:
context: .
dockerfile: ./apps/www-emblema/Dockerfile
platforms:
- linux/amd64
restart: always
logging: *default-logging
depends_on:
- document-render
- milvus
- minio
- graphql-engine
- keycloak
- litellm
- background-task
environment:
- MILVUS_API_URL=http://milvus:19530/v2/vectordb
- HASURA_API_URL=http://graphql-engine:8080/v1/graphql
- LITELLM_API_URL=http://litellm:4000/v1
- BACKGROUND_TASK_API_URL=http://background-task
- DOCUMENT_RENDER_URL=http://document-render:8094
labels:
- "traefik.enable=true"
- "traefik.http.routers.emblema-web.rule=Host(`${EMBLEMA_WEB_HOSTNAME}`)"
- "traefik.http.routers.emblema-web.entrypoints=websecure"
- "traefik.http.routers.emblema-web.tls=true"
- "traefik.http.services.emblema-web.loadbalancer.server.port=3000"
networks:
- emblema
www-emblema-cron:
image: alpine:latest
restart: always
depends_on:
- www-emblema
environment:
- CRON_SECRET=${CRON_SECRET}
networks:
- emblema
logging: *default-logging
command: >
sh -c "
apk add --no-cache curl &&
echo '0 * * * * curl -s -H \"Authorization: Bearer \$\$CRON_SECRET\" http://www-emblema:3000/api/cron/cleanup-uploads > /proc/1/fd/1 2>&1' | crontab - &&
crond -f -l 2
"
Service-Specific Composeβ
# docker-compose-background-task.yaml
services:
background-task:
image: emblema/background-task:${EMBLEMA_VERSION:-dev}
build:
context: .
dockerfile: ./apps/background-task/Dockerfile
restart: always
logging: *default-logging
depends_on:
- redis
- postgres
environment:
- CELERY_BROKER_URL=redis://redis:6379/0
- CELERY_RESULT_BACKEND=redis://redis:6379/0
- DATABASE_URL=postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB}
- MINIO_ENDPOINT=http://minio:9000
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY_ID}
- MINIO_SECRET_KEY=${MINIO_SECRET_ACCESS_KEY}
volumes:
- shared-volume:/shared-volume
networks:
- emblema
labels:
- "traefik.enable=false"
background-task-worker:
image: emblema/background-task:${EMBLEMA_VERSION:-dev}
restart: always
logging: *default-logging
depends_on:
- redis
- postgres
- background-task
environment:
- CELERY_BROKER_URL=redis://redis:6379/0
- CELERY_RESULT_BACKEND=redis://redis:6379/0
- DATABASE_URL=postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB}
volumes:
- shared-volume:/shared-volume
networks:
- emblema
command: celery -A app.celery_app worker --loglevel=info --concurrency=2
labels:
- "traefik.enable=false"
background-task-flower:
image: emblema/background-task:${EMBLEMA_VERSION:-dev}
restart: always
logging: *default-logging
depends_on:
- redis
- background-task-worker
environment:
- CELERY_BROKER_URL=redis://redis:6379/0
- CELERY_RESULT_BACKEND=redis://redis:6379/0
networks:
- emblema
command: celery -A app.celery_app flower --port=5555
labels:
- "traefik.enable=true"
- "traefik.http.routers.flower.rule=Host(`flower.${EMBLEMA_DOMAIN}`)"
- "traefik.http.routers.flower.entrypoints=websecure"
- "traefik.http.routers.flower.tls=true"
- "traefik.http.services.flower.loadbalancer.server.port=5555"
3. Production Optimizationsβ
Multi-platform Buildsβ
# Build script per multiple architetture
#!/bin/bash
# scripts/build-multiplatform.sh
set -e
# Setup buildx
docker buildx create --use --name multiplatform || true
docker buildx inspect --bootstrap
# Build images per multiple piattaforme
docker buildx build \
--platform linux/amd64,linux/arm64 \
--tag emblema/www:${VERSION} \
--file apps/www-emblema/Dockerfile \
--push \
.
docker buildx build \
--platform linux/amd64 \
--tag emblema/background-task:${VERSION} \
--file apps/background-task/Dockerfile \
--push \
.
Image Optimizationβ
# Ottimizzazioni per ridurre size immagini
# Use specific versions
FROM node:20.11.0-alpine AS base
# Remove unnecessary packages
RUN apk del --purge \
&& rm -rf /var/cache/apk/* \
&& rm -rf /tmp/*
# Use .dockerignore
# .dockerignore
node_modules
.git
.github
*.md
*.log
.env
coverage/
.next/cache
βΈοΈ Kubernetes Deploymentβ
1. Kubernetes Manifestsβ
Namespace e ConfigMapβ
# k8s/namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
name: emblema
labels:
name: emblema
---
# k8s/configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: emblema-config
namespace: emblema
data:
MILVUS_API_URL: "http://milvus-service:19530/v2/vectordb"
HASURA_API_URL: "http://hasura-service:8080/v1/graphql"
LITELLM_API_URL: "http://litellm-service:4000/v1"
BACKGROUND_TASK_API_URL: "http://background-task-service"
DOCUMENT_RENDER_URL: "http://document-render-service:8094"
REDIS_URL: "redis://redis-service:6379/0"
Secretsβ
# k8s/secrets.yaml
apiVersion: v1
kind: Secret
metadata:
name: emblema-secrets
namespace: emblema
type: Opaque
stringData:
POSTGRES_PASSWORD: "your-secure-password"
HASURA_ADMIN_SECRET: "your-hasura-secret"
MINIO_SECRET_ACCESS_KEY: "your-minio-secret"
AUTH_SECRET: "your-auth-secret"
OPENAI_API_KEY: "your-openai-key"
LITELLM_MASTER_KEY: "your-litellm-key"
Main Application Deploymentβ
# k8s/www-emblema-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: www-emblema
namespace: emblema
labels:
app: www-emblema
spec:
replicas: 3
selector:
matchLabels:
app: www-emblema
template:
metadata:
labels:
app: www-emblema
spec:
containers:
- name: www-emblema
image: emblema/www:latest
ports:
- containerPort: 3000
env:
- name: NODE_ENV
value: "production"
envFrom:
- configMapRef:
name: emblema-config
- secretRef:
name: emblema-secrets
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "1Gi"
cpu: "500m"
livenessProbe:
httpGet:
path: /api/health
port: 3000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /api/health
port: 3000
initialDelaySeconds: 5
periodSeconds: 5
---
apiVersion: v1
kind: Service
metadata:
name: www-emblema-service
namespace: emblema
spec:
selector:
app: www-emblema
ports:
- protocol: TCP
port: 80
targetPort: 3000
type: ClusterIP
Background Task Deploymentβ
# k8s/background-task-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: background-task
namespace: emblema
spec:
replicas: 2
selector:
matchLabels:
app: background-task
template:
metadata:
labels:
app: background-task
spec:
containers:
- name: background-task
image: emblema/background-task:latest
ports:
- containerPort: 80
envFrom:
- configMapRef:
name: emblema-config
- secretRef:
name: emblema-secrets
resources:
requests:
memory: "1Gi"
cpu: "500m"
nvidia.com/gpu: 0
limits:
memory: "4Gi"
cpu: "2"
nvidia.com/gpu: 1
volumeMounts:
- name: shared-volume
mountPath: /shared-volume
volumes:
- name: shared-volume
persistentVolumeClaim:
claimName: shared-pvc
---
# Background Task Worker
apiVersion: apps/v1
kind: Deployment
metadata:
name: background-task-worker
namespace: emblema
spec:
replicas: 3
selector:
matchLabels:
app: background-task-worker
template:
metadata:
labels:
app: background-task-worker
spec:
containers:
- name: worker
image: emblema/background-task:latest
command: ["celery"]
args:
[
"-A",
"app.celery_app",
"worker",
"--loglevel=info",
"--concurrency=2",
]
envFrom:
- configMapRef:
name: emblema-config
- secretRef:
name: emblema-secrets
resources:
requests:
memory: "2Gi"
cpu: "1"
nvidia.com/gpu: 0
limits:
memory: "8Gi"
cpu: "4"
nvidia.com/gpu: 1
volumeMounts:
- name: shared-volume
mountPath: /shared-volume
volumes:
- name: shared-volume
persistentVolumeClaim:
claimName: shared-pvc
Ingress Configurationβ
# k8s/ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: emblema-ingress
namespace: emblema
annotations:
kubernetes.io/ingress.class: "nginx"
cert-manager.io/cluster-issuer: "letsencrypt-prod"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "100m"
nginx.ingress.kubernetes.io/proxy-read-timeout: "300"
nginx.ingress.kubernetes.io/proxy-send-timeout: "300"
spec:
tls:
- hosts:
- emblema.example.com
- api.emblema.example.com
secretName: emblema-tls
rules:
- host: emblema.example.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: www-emblema-service
port:
number: 80
- host: api.emblema.example.com
http:
paths:
- path: /v1/background-task
pathType: Prefix
backend:
service:
name: background-task-service
port:
number: 80
2. Storage e Persistenceβ
Persistent Volumesβ
# k8s/storage.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: shared-pvc
namespace: emblema
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 100Gi
storageClassName: nfs-client
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: postgres-pvc
namespace: emblema
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 50Gi
storageClassName: fast-ssd
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: milvus-pvc
namespace: emblema
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 200Gi
storageClassName: fast-ssd
π CI/CD Pipelineβ
1. GitHub Actions Workflowβ
Main Pipelineβ
# .github/workflows/deploy.yml
name: Build and Deploy
on:
push:
branches: [main, develop]
tags: ["v*"]
pull_request:
branches: [main]
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
jobs:
test:
name: Run Tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: "20"
cache: "pnpm"
- name: Install dependencies
run: pnpm install
- name: Run linting
run: pnpm lint
- name: Run type checking
run: pnpm type-check
- name: Run tests
run: pnpm test --coverage
- name: Upload coverage
uses: codecov/codecov-action@v3
security-scan:
name: Security Scan
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Run security scan
run: ./scripts/security-scan scan
- name: Upload security reports
uses: actions/upload-artifact@v4
if: always()
with:
name: security-reports
path: output/vuln-scan/
- name: Upload SARIF to GitHub Security
uses: github/codeql-action/upload-sarif@v2
if: always()
with:
sarif_file: output/vuln-scan/sarif/
category: container-scanning
- name: Comment PR with results
if: github.event_name == 'pull_request' && always()
uses: actions/github-script@v6
with:
script: |
const fs = require('fs');
const summaryPath = 'output/vuln-scan/summary.txt';
if (fs.existsSync(summaryPath)) {
const summary = fs.readFileSync(summaryPath, 'utf8');
const hasVulnerabilities = '${{ steps.scan.outputs.has_vulnerabilities }}' === 'true';
const status = hasVulnerabilities ? 'β οΈ Vulnerabilities Found' : 'β
Clean';
const comment = `## Container Vulnerability Scan ${status}
\`\`\`
${summary}
\`\`\`
[View detailed report](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})`;
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: comment
});
}
sbom-generation:
name: Generate SBOM
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Generate SBOM
run: ./scripts/security/generate-sbom.sh
- name: Upload SBOM artifacts
uses: actions/upload-artifact@v3
with:
name: sbom-reports
path: output/vuln-scan/sbom/
retention-days: 30
Script di Sicurezzaβ
Il progetto include script dedicati per la sicurezza:
# scripts/security-scan
# Script principale per vulnerability scanning e reporting
./scripts/security-scan <command>
# Comandi disponibili:
# scan - Scan Docker images for vulnerabilities
# test - Test security fixes for a service
# test-all - Test all services with security fixes
# report - Generate unified HTML report
# deps - Check security dependencies
# sbom - Generate Software Bill of Materials
# Esempio uso:
./scripts/security-scan scan --severity CRITICAL,HIGH
./scripts/security-scan report
2. Docker Compose Deploymentβ
Deployment con Docker Composeβ
Per ambienti non-Kubernetes, Emblema supporta deployment completo con Docker Compose:
# Setup iniziale su nuovo server
# 1. Clone repository
git clone https://github.com/emblema-ai/emblema.git
cd emblema
# 2. Configura ambiente
cp .env.example .env
# Modifica .env con configurazioni production
# 3. Esegui installazione
./install.sh
# Scegli percorso volumi o usa Docker volumes (raccomandato)
# 4. Import immagini Docker (se pre-build)
pnpm import-docker-images
# OPPURE build locale
docker compose build
# 5. Avvia servizi
docker compose up -d
# 6. Verifica deployment
docker compose ps
docker compose logs -f
Export/Import Immagini per Ambienti Air-Gapβ
# Export tutte le immagini Emblema
pnpm export-docker-images
# Crea docker-images.tar.gz (~5-10GB)
# Su server di destinazione
# 1. Trasferisci docker-images.tar.gz
# 2. Import immagini
pnpm import-docker-images
# 3. Verifica import
docker images | grep emblema
Script di Deployment Automatizzatoβ
#!/bin/bash
# scripts/deploy-docker-compose.sh
set -e
ENVIRONMENT=${1:-production}
BACKUP_FIRST=${2:-true}
echo "Deploying Emblema - Environment: $ENVIRONMENT"
# Backup database se richiesto
if [ "$BACKUP_FIRST" = "true" ]; then
echo "Creating database backup..."
./scripts/backup-postgres.sh
fi
# Pull latest changes
git pull origin main
# Update images
if [ -f "docker-images.tar.gz" ]; then
echo "Importing pre-built images..."
pnpm import-docker-images
else
echo "Building images locally..."
docker compose build --no-cache
fi
# Apply database migrations
echo "Running database migrations..."
docker compose exec -T hasura hasura migrate apply
docker compose exec -T hasura hasura metadata apply
# Restart services con zero downtime
echo "Restarting services..."
docker compose up -d --no-deps --scale www-emblema=2 www-emblema
sleep 30
docker compose up -d --no-deps www-emblema
# Health check
echo "Running health checks..."
./scripts/health-check.sh
echo "Deployment completed!"
3. Kubernetes Deployment (Future)β
Nota
Il deployment Kubernetes Γ¨ pianificato per future release. Attualmente il deployment production Γ¨ basato su Docker Compose.
Preparazione per Kubernetesβ
# k8s/namespace.yaml (esempio futuro)
apiVersion: v1
kind: Namespace
metadata:
name: emblema
labels:
name: emblema
Struttura futura Helm Chart
Chart Structureβ
helm/
βββ emblema/
β βββ Chart.yaml
β βββ values.yaml
β βββ values-staging.yaml
β βββ values-production.yaml
β βββ templates/
β βββ deployment.yaml
β βββ service.yaml
β βββ ingress.yaml
β βββ configmap.yaml
β βββ secrets.yaml
β βββ hpa.yaml
β βββ pdb.yaml
Main Chartβ
# helm/emblema/Chart.yaml
apiVersion: v2
name: emblema
description: Emblema AI Platform Helm Chart
type: application
version: 0.1.0
appVersion: "1.0.0"
dependencies:
- name: postgresql
version: "12.x.x"
repository: "https://charts.bitnami.com/bitnami"
condition: postgresql.enabled
- name: redis
version: "18.x.x"
repository: "https://charts.bitnami.com/bitnami"
condition: redis.enabled
- name: milvus
version: "4.x.x"
repository: "https://zilliztech.github.io/milvus-helm/"
condition: milvus.enabled
Values Templateβ
# helm/emblema/values.yaml
global:
imageRegistry: ghcr.io
imageRepository: emblema-ai/emblema
imageTag: latest
imagePullPolicy: IfNotPresent
replicaCount: 3
www:
enabled: true
image:
repository: www-emblema
tag: ""
service:
type: ClusterIP
port: 80
targetPort: 3000
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "1Gi"
cpu: "500m"
autoscaling:
enabled: true
minReplicas: 2
maxReplicas: 10
targetCPUUtilizationPercentage: 70
backgroundTask:
enabled: true
image:
repository: background-task
tag: ""
api:
replicaCount: 2
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "4Gi"
cpu: "2"
worker:
replicaCount: 3
resources:
requests:
memory: "2Gi"
cpu: "1"
nvidia.com/gpu: 0
limits:
memory: "8Gi"
cpu: "4"
nvidia.com/gpu: 1
ingress:
enabled: true
className: "nginx"
annotations:
cert-manager.io/cluster-issuer: "letsencrypt-prod"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "100m"
hosts:
- host: emblema.example.com
paths:
- path: /
pathType: Prefix
service: www-emblema
tls:
- secretName: emblema-tls
hosts:
- emblema.example.com
# External services
postgresql:
enabled: true
auth:
postgresPassword: "change-me"
database: "emblema"
primary:
persistence:
enabled: true
size: 50Gi
redis:
enabled: true
auth:
enabled: false
master:
persistence:
enabled: true
size: 8Gi
milvus:
enabled: true
cluster:
enabled: false
standalone:
persistence:
enabled: true
size: 200Gi
Production Valuesβ
# helm/emblema/values-production.yaml
global:
imageTag: "v1.0.0"
replicaCount: 5
www:
autoscaling:
minReplicas: 3
maxReplicas: 20
targetCPUUtilizationPercentage: 60
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "2Gi"
cpu: "1"
backgroundTask:
api:
replicaCount: 3
worker:
replicaCount: 5
ingress:
hosts:
- host: app.emblema.ai
paths:
- path: /
pathType: Prefix
service: www-emblema
tls:
- secretName: emblema-prod-tls
hosts:
- app.emblema.ai
postgresql:
auth:
postgresPassword: "super-secure-production-password"
primary:
persistence:
size: 200Gi
resources:
requests:
memory: "2Gi"
cpu: "1"
limits:
memory: "4Gi"
cpu: "2"
redis:
master:
persistence:
size: 20Gi
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "2Gi"
cpu: "1"
milvus:
standalone:
persistence:
size: 1Ti
resources:
requests:
memory: "4Gi"
cpu: "2"
limits:
memory: "8Gi"
cpu: "4"
π Cloud Deploymentβ
1. AWS EKS Setupβ
Terraform Infrastructureβ
# terraform/eks.tf
module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "~> 19.0"
cluster_name = "emblema-${var.environment}"
cluster_version = "1.28"
vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnets
cluster_endpoint_public_access = true
# EKS Managed Node Groups
eks_managed_node_groups = {
general = {
name = "general"
instance_types = ["t3.large"]
min_size = 2
max_size = 10
desired_size = 3
labels = {
role = "general"
}
}
gpu = {
name = "gpu"
instance_types = ["p3.2xlarge"]
min_size = 0
max_size = 5
desired_size = 1
labels = {
role = "gpu"
"nvidia.com/gpu" = "true"
}
taints = {
gpu = {
key = "nvidia.com/gpu"
value = "true"
effect = "NO_SCHEDULE"
}
}
}
}
# aws-auth configmap
manage_aws_auth_configmap = true
aws_auth_users = [
{
userarn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:user/admin"
username = "admin"
groups = ["system:masters"]
},
]
tags = {
Environment = var.environment
Project = "emblema"
}
}
# RDS for PostgreSQL
module "rds" {
source = "terraform-aws-modules/rds/aws"
identifier = "emblema-${var.environment}"
engine = "postgres"
engine_version = "15.4"
instance_class = "db.t3.large"
allocated_storage = 200
db_name = "emblema"
username = "postgres"
password = var.db_password
vpc_security_group_ids = [module.security_group_rds.security_group_id]
db_subnet_group_name = module.vpc.database_subnet_group
backup_retention_period = 7
backup_window = "03:00-04:00"
maintenance_window = "Mon:04:00-Mon:05:00"
tags = {
Environment = var.environment
Project = "emblema"
}
}
# ElastiCache for Redis
module "elasticache" {
source = "terraform-aws-modules/elasticache/aws"
cluster_id = "emblema-${var.environment}"
description = "Emblema Redis cluster"
node_type = "cache.t3.medium"
num_cache_nodes = 1
parameter_group_name = "default.redis7"
port = 6379
engine_version = "7.0"
subnet_group_name = module.vpc.elasticache_subnet_group_name
security_group_ids = [module.security_group_redis.security_group_id]
tags = {
Environment = var.environment
Project = "emblema"
}
}
Deployment Scriptβ
#!/bin/bash
# scripts/deploy-aws.sh
set -e
ENVIRONMENT=${1:-staging}
REGION=${2:-us-west-2}
echo "π Deploying Emblema to AWS EKS ($ENVIRONMENT)"
# Install/update kubectl
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
chmod +x kubectl
sudo mv kubectl /usr/local/bin/
# Install/update Helm
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
# Update kubeconfig
aws eks update-kubeconfig --region $REGION --name emblema-$ENVIRONMENT
# Install cert-manager
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.0/cert-manager.yaml
# Install NVIDIA GPU Operator (if needed)
if kubectl get nodes -l node.kubernetes.io/instance-type | grep -q p3; then
helm repo add nvidia https://helm.ngc.nvidia.com/nvidia
helm repo update
helm install gpu-operator nvidia/gpu-operator \
--namespace gpu-operator \
--create-namespace \
--wait
fi
# Deploy Emblema
helm dependency build ./helm/emblema
helm upgrade --install emblema-$ENVIRONMENT ./helm/emblema \
--namespace emblema-$ENVIRONMENT \
--create-namespace \
--values ./helm/values-$ENVIRONMENT.yaml \
--set global.imageTag=$IMAGE_TAG \
--wait --timeout=15m
echo "β
Deployment completed successfully!"
# Run health checks
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=emblema \
--namespace emblema-$ENVIRONMENT --timeout=300s
# Test deployment
./scripts/post-deploy-tests.sh https://emblema-$ENVIRONMENT.example.com
2. Azure AKS Setupβ
# azure-pipelines.yml
trigger:
branches:
include:
- main
- develop
tags:
include:
- v*
variables:
imageRepository: "emblema"
containerRegistry: "emblemaregistry"
dockerfilePath: "**/Dockerfile"
tag: "$(Build.BuildId)"
k8sNamespace: "emblema-$(Build.SourceBranchName)"
stages:
- stage: Build
displayName: Build stage
jobs:
- job: Build
pool:
vmImage: "ubuntu-latest"
steps:
- task: Docker@2
displayName: Build and push images
inputs:
command: buildAndPush
repository: $(imageRepository)
dockerfile: $(dockerfilePath)
containerRegistry: $(containerRegistry)
tags: |
$(tag)
latest
- stage: Deploy
displayName: Deploy stage
dependsOn: Build
condition: and(succeeded(), or(eq(variables['Build.SourceBranch'], 'refs/heads/main'), startsWith(variables['Build.SourceBranch'], 'refs/tags/v')))
jobs:
- deployment: Deploy
pool:
vmImage: "ubuntu-latest"
environment: "production"
strategy:
runOnce:
deploy:
steps:
- task: KubernetesManifest@0
displayName: Deploy to Kubernetes cluster
inputs:
action: deploy
kubernetesServiceConnection: "aks-connection"
namespace: $(k8sNamespace)
manifests: |
k8s/deployment.yaml
k8s/service.yaml
k8s/ingress.yaml
containers: |
$(containerRegistry).azurecr.io/$(imageRepository):$(tag)
π Monitoring e Observabilityβ
1. Prometheus & Grafana Setupβ
# k8s/monitoring.yaml
apiVersion: v1
kind: Namespace
metadata:
name: monitoring
---
# Prometheus
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
template:
metadata:
labels:
app: prometheus
spec:
containers:
- name: prometheus
image: prom/prometheus:latest
ports:
- containerPort: 9090
volumeMounts:
- name: config
mountPath: /etc/prometheus
- name: storage
mountPath: /prometheus
volumes:
- name: config
configMap:
name: prometheus-config
- name: storage
persistentVolumeClaim:
claimName: prometheus-pvc
---
# Grafana
apiVersion: apps/v1
kind: Deployment
metadata:
name: grafana
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
app: grafana
template:
metadata:
labels:
app: grafana
spec:
containers:
- name: grafana
image: grafana/grafana:latest
ports:
- containerPort: 3000
env:
- name: GF_SECURITY_ADMIN_PASSWORD
valueFrom:
secretKeyRef:
name: grafana-secrets
key: admin-password
volumeMounts:
- name: storage
mountPath: /var/lib/grafana
volumes:
- name: storage
persistentVolumeClaim:
claimName: grafana-pvc
2. Alerting Configurationβ
# alerts/emblema-alerts.yaml
groups:
- name: emblema
rules:
- alert: HighErrorRate
expr: rate(http_requests_total{status=~"5.."}[5m]) > 0.1
for: 5m
labels:
severity: critical
annotations:
summary: "High error rate detected"
description: "Error rate is {{ $value }} for {{ $labels.instance }}"
- alert: HighMemoryUsage
expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes > 0.9
for: 10m
labels:
severity: warning
annotations:
summary: "High memory usage"
description: "Memory usage is {{ $value | humanizePercentage }}"
- alert: CeleryTasksFailing
expr: rate(celery_tasks_total{status="failure"}[5m]) > 0.05
for: 5m
labels:
severity: critical
annotations:
summary: "Celery tasks failing"
description: "Task failure rate: {{ $value }}"
Questo setup di deployment garantisce:
- β ScalabilitΓ : Auto-scaling orizzontale e verticale
- β Resilienza: Multi-replica, health checks, circuit breakers
- β Security: Secrets management, network policies, RBAC
- β Observability: Monitoring completo, logging, alerting
- β Automation: CI/CD completa, GitOps workflow
- β Multi-cloud: Supporto AWS, Azure, GCP