> deepgram-deploy-integration

Deploy Deepgram integrations to production environments. Use when deploying to cloud platforms, configuring containers, or setting up Deepgram in Docker/Kubernetes/serverless. Trigger: "deploy deepgram", "deepgram docker", "deepgram kubernetes", "deepgram production deploy", "deepgram cloud run", "deepgram lambda".

fetch
$curl "https://skillshub.wtf/jeremylongshore/claude-code-plugins-plus-skills/deepgram-deploy-integration?format=md"
SKILL.mddeepgram-deploy-integration

Deepgram Deploy Integration

Overview

Deploy Deepgram transcription services to Docker, Kubernetes, AWS Lambda, and Google Cloud Run. Includes production Dockerfile, K8s manifests with secret management, serverless handlers for event-driven transcription, and health check patterns.

Prerequisites

  • Working Deepgram integration (tested locally)
  • Production API key in secret manager
  • Container registry access (Docker Hub, ECR, GCR)
  • Target platform CLI installed

Instructions

Step 1: Production Dockerfile

# Multi-stage build for minimal production image
FROM node:20-alpine AS builder

WORKDIR /app
COPY package*.json ./
RUN npm ci --production=false
COPY tsconfig.json ./
COPY src/ ./src/
RUN npm run build

FROM node:20-alpine AS runtime

# Security: non-root user
RUN addgroup -g 1001 -S app && adduser -S app -u 1001
WORKDIR /app

# Production dependencies only
COPY package*.json ./
RUN npm ci --production && npm cache clean --force

# Copy built application
COPY --from=builder /app/dist ./dist

# Health check (tests Deepgram connectivity)
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
  CMD wget -q --spider http://localhost:3000/health || exit 1

USER app
EXPOSE 3000

CMD ["node", "dist/server.js"]

Step 2: Docker Compose

# docker-compose.yml
version: '3.8'

services:
  deepgram-service:
    build: .
    ports:
      - "3000:3000"
    environment:
      - NODE_ENV=production
      - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
      - DEEPGRAM_MODEL=nova-3
    healthcheck:
      test: ["CMD", "wget", "-q", "--spider", "http://localhost:3000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
    restart: unless-stopped
    deploy:
      resources:
        limits:
          memory: 512M
          cpus: '1.0'

  redis:
    image: redis:7-alpine
    ports:
      - "6379:6379"
    volumes:
      - redis-data:/data

volumes:
  redis-data:

Step 3: Kubernetes Deployment

# k8s/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: deepgram-service
  labels:
    app: deepgram-service
spec:
  replicas: 3
  selector:
    matchLabels:
      app: deepgram-service
  template:
    metadata:
      labels:
        app: deepgram-service
    spec:
      containers:
        - name: deepgram-service
          image: your-registry/deepgram-service:latest
          ports:
            - containerPort: 3000
          env:
            - name: NODE_ENV
              value: production
            - name: DEEPGRAM_API_KEY
              valueFrom:
                secretKeyRef:
                  name: deepgram-secrets
                  key: api-key
            - name: DEEPGRAM_MODEL
              value: nova-3
          resources:
            requests:
              memory: "256Mi"
              cpu: "250m"
            limits:
              memory: "512Mi"
              cpu: "1000m"
          livenessProbe:
            httpGet:
              path: /health
              port: 3000
            initialDelaySeconds: 10
            periodSeconds: 30
          readinessProbe:
            httpGet:
              path: /health
              port: 3000
            initialDelaySeconds: 5
            periodSeconds: 10
---
apiVersion: v1
kind: Service
metadata:
  name: deepgram-service
spec:
  selector:
    app: deepgram-service
  ports:
    - port: 80
      targetPort: 3000
  type: ClusterIP
---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: deepgram-service-hpa
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: deepgram-service
  minReplicas: 2
  maxReplicas: 10
  metrics:
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: 70
# Create secret
kubectl create secret generic deepgram-secrets \
  --from-literal=api-key=$DEEPGRAM_API_KEY

# Deploy
kubectl apply -f k8s/

Step 4: AWS Lambda Handler

// lambda/handler.ts
import { createClient } from '@deepgram/sdk';
import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3';
import type { S3Event } from 'aws-lambda';

const deepgram = createClient(process.env.DEEPGRAM_API_KEY!);
const s3 = new S3Client({});

// Trigger: S3 upload of audio file -> Lambda -> Deepgram -> Store result
export async function handler(event: S3Event) {
  for (const record of event.Records) {
    const bucket = record.s3.bucket.name;
    const key = decodeURIComponent(record.s3.object.key);

    console.log(`Processing: s3://${bucket}/${key}`);

    // Get audio from S3
    const { Body } = await s3.send(new GetObjectCommand({ Bucket: bucket, Key: key }));
    const audio = Buffer.from(await Body!.transformToByteArray());

    // Transcribe
    const { result, error } = await deepgram.listen.prerecorded.transcribeFile(
      audio,
      {
        model: 'nova-3',
        smart_format: true,
        diarize: true,
        utterances: true,
      }
    );

    if (error) {
      console.error(`Transcription failed for ${key}:`, error.message);
      throw error;
    }

    console.log(`Transcribed ${key}: ${result.metadata.duration}s, ` +
      `${result.results.channels[0].alternatives[0].words?.length} words`);

    return {
      statusCode: 200,
      body: JSON.stringify({
        file: key,
        duration: result.metadata.duration,
        transcript: result.results.channels[0].alternatives[0].transcript,
        request_id: result.metadata.request_id,
      }),
    };
  }
}

Step 5: Google Cloud Run

// server.ts — Cloud Run entry point
import express from 'express';
import { createClient } from '@deepgram/sdk';

const app = express();
app.use(express.json({ limit: '50mb' }));

const deepgram = createClient(process.env.DEEPGRAM_API_KEY!);

app.post('/transcribe', async (req, res) => {
  try {
    const { url, model = 'nova-3', diarize = false } = req.body;

    const { result, error } = await deepgram.listen.prerecorded.transcribeUrl(
      { url },
      { model, smart_format: true, diarize }
    );

    if (error) return res.status(502).json({ error: error.message });

    res.json({
      transcript: result.results.channels[0].alternatives[0].transcript,
      confidence: result.results.channels[0].alternatives[0].confidence,
      duration: result.metadata.duration,
      request_id: result.metadata.request_id,
    });
  } catch (err: any) {
    res.status(500).json({ error: err.message });
  }
});

app.get('/health', async (req, res) => {
  try {
    const { error } = await deepgram.manage.getProjects();
    res.json({ status: error ? 'degraded' : 'healthy' });
  } catch {
    res.status(503).json({ status: 'unhealthy' });
  }
});

const port = process.env.PORT || 3000;
app.listen(port, () => console.log(`Listening on port ${port}`));
# Deploy to Cloud Run
gcloud run deploy deepgram-service \
  --source . \
  --set-env-vars DEEPGRAM_API_KEY=$(gcloud secrets versions access latest --secret deepgram-key) \
  --memory 512Mi \
  --timeout 300 \
  --concurrency 50 \
  --min-instances 1 \
  --max-instances 10

Step 6: Deploy Script

#!/bin/bash
set -euo pipefail

ENV="${1:?Usage: deploy.sh <staging|production>}"

echo "Deploying to $ENV..."

# Build
npm ci && npm run build && npm test

# Build container
docker build -t deepgram-service:$ENV .

# Deploy based on target
case $ENV in
  staging)
    kubectl --context staging apply -f k8s/
    kubectl --context staging rollout status deployment/deepgram-service
    ;;
  production)
    kubectl --context production apply -f k8s/
    kubectl --context production rollout status deployment/deepgram-service
    ;;
esac

# Post-deploy smoke test
echo "Running smoke test..."
ENDPOINT=$(kubectl get svc deepgram-service -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
curl -sf "http://$ENDPOINT/health" || { echo "SMOKE TEST FAILED"; exit 1; }
echo "Deploy successful."

Output

  • Production Dockerfile (multi-stage, non-root, health check)
  • Docker Compose with Redis for caching
  • Kubernetes manifests (Deployment, Service, HPA, Secret)
  • AWS Lambda handler (S3 trigger -> Deepgram -> result)
  • Cloud Run service with health check
  • Environment-aware deploy script

Error Handling

IssueCauseSolution
Container OOMMemory limit too lowIncrease to 512Mi+
Health check failingService not ready yetIncrease initialDelaySeconds
Lambda timeoutAudio too longIncrease timeout to 300s, or use callback
Cloud Run 429Too many concurrent requestsDecrease --concurrency flag
Secret not foundK8s secret missingCreate secret before deploying

Resources

┌ stats

installs/wk0
░░░░░░░░░░
github stars1.7K
██████████
first seenMar 23, 2026
└────────────

┌ repo

jeremylongshore/claude-code-plugins-plus-skills
by jeremylongshore
└────────────