From 58445340c88b09dcabdf09fdfb2bf36c753106e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= <valknar@pivoine.art>
Date: Thu, 19 Feb 2026 12:50:10 +0100
Subject: [PATCH] feat: remove gpu support and simplify to cpu-only

---
 .gitea/workflows/docker-build-push.yml | 73 +++-----------------------
 CLAUDE.md                              | 14 ++---
 Dockerfile                             | 31 +----------
 README.md                              | 18 ++-----
 app/routers/system.py                  | 27 +---------
 app/schemas/process.py                 |  1 -
 app/schemas/system.py                  |  8 ---
 app/services/facefusion_bridge.py      |  2 -
 docker-compose.gpu.yml                 | 31 -----------
 docker-compose.prod.yml                |  2 +-
 docker-compose.yml                     |  2 -
 requirements-gpu.txt                   |  2 -
 12 files changed, 21 insertions(+), 190 deletions(-)
 delete mode 100644 docker-compose.gpu.yml
 delete mode 100644 requirements-gpu.txt

diff --git a/.gitea/workflows/docker-build-push.yml b/.gitea/workflows/docker-build-push.yml
index cb2b0f7..35fd909 100644
--- a/.gitea/workflows/docker-build-push.yml
+++ b/.gitea/workflows/docker-build-push.yml
@@ -22,7 +22,7 @@ env:
   IMAGE_NAME: valknar/facefusion-api
 
 jobs:
-  build-gpu:
+  build:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
@@ -42,7 +42,7 @@ jobs:
           username: ${{ gitea.actor }}
           password: ${{ secrets.REGISTRY_TOKEN }}
 
-      - name: Extract metadata (GPU)
+      - name: Extract metadata
         id: meta
         uses: docker/metadata-action@v5
         with:
@@ -58,10 +58,10 @@ jobs:
             type=raw,value=${{ gitea.event.inputs.tag }},enable=${{ gitea.event_name == 'workflow_dispatch' }}
           labels: |
             org.opencontainers.image.title=facefusion-api
-            org.opencontainers.image.description=REST API for FaceFusion face processing (GPU)
+            org.opencontainers.image.description=REST API for FaceFusion face processing (CPU)
             org.opencontainers.image.vendor=valknar
 
-      - name: Build and push GPU image
+      - name: Build and push image
         uses: docker/build-push-action@v5
         with:
           context: .
@@ -69,73 +69,14 @@ jobs:
           push: ${{ gitea.event_name != 'pull_request' }}
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
-          build-args: VARIANT=gpu
-          cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-gpu
-          cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-gpu,mode=max
+          cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache
+          cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache,mode=max
 
       - name: Summary
         if: gitea.event_name != 'pull_request'
         run: |
-          echo "### GPU Image Published" >> $GITEA_STEP_SUMMARY
+          echo "### Image Published" >> $GITEA_STEP_SUMMARY
           echo "**Tags:**" >> $GITEA_STEP_SUMMARY
           echo "\`\`\`" >> $GITEA_STEP_SUMMARY
           echo "${{ steps.meta.outputs.tags }}" >> $GITEA_STEP_SUMMARY
           echo "\`\`\`" >> $GITEA_STEP_SUMMARY
-
-  build-cpu:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          submodules: recursive
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-        with:
-          platforms: linux/amd64
-
-      - name: Log in to Gitea Container Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ gitea.actor }}
-          password: ${{ secrets.REGISTRY_TOKEN }}
-
-      - name: Extract metadata (CPU)
-        id: meta-cpu
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
-          flavor: suffix=-cpu
-          tags: |
-            type=raw,value=latest-cpu,enable={{is_default_branch}}
-            type=ref,event=branch,suffix=-cpu
-            type=ref,event=pr,suffix=-cpu
-            type=semver,pattern={{version}},suffix=-cpu
-            type=sha,prefix={{branch}}-,suffix=-cpu
-          labels: |
-            org.opencontainers.image.title=facefusion-api
-            org.opencontainers.image.description=REST API for FaceFusion face processing (CPU)
-            org.opencontainers.image.vendor=valknar
-
-      - name: Build and push CPU image
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          platforms: linux/amd64
-          push: ${{ gitea.event_name != 'pull_request' }}
-          tags: ${{ steps.meta-cpu.outputs.tags }}
-          labels: ${{ steps.meta-cpu.outputs.labels }}
-          build-args: VARIANT=cpu
-          cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-cpu
-          cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-cpu,mode=max
-
-      - name: Summary
-        if: gitea.event_name != 'pull_request'
-        run: |
-          echo "### CPU Image Published" >> $GITEA_STEP_SUMMARY
-          echo "**Tags:**" >> $GITEA_STEP_SUMMARY
-          echo "\`\`\`" >> $GITEA_STEP_SUMMARY
-          echo "${{ steps.meta-cpu.outputs.tags }}" >> $GITEA_STEP_SUMMARY
-          echo "\`\`\`" >> $GITEA_STEP_SUMMARY
diff --git a/CLAUDE.md b/CLAUDE.md
index 1edf5d8..26fc476 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -2,7 +2,7 @@
 
 ## Overview
 
-FaceFusion API - A Python REST API wrapping FaceFusion v3.5.3 for face swapping, enhancement, lip sync, and other face/frame processing. Containerized with CUDA GPU support, deployed via Gitea CI/CD at `dev.pivoine.art`.
+FaceFusion API - A Python REST API wrapping FaceFusion v3.5.3 for face swapping, enhancement, lip sync, and other face/frame processing. Containerized for CPU-only execution, deployed via Gitea CI/CD at `dev.pivoine.art`.
 
 ## Architecture
 
@@ -43,9 +43,6 @@ docker compose up
 # Production (CPU VPS)
 docker compose -f docker-compose.prod.yml up -d
 
-# Production (GPU server)
-docker compose -f docker-compose.gpu.yml up -d
-
 # Test endpoints
 curl http://localhost:8000/api/v1/health
 curl http://localhost:8000/api/v1/system
@@ -69,19 +66,18 @@ curl -X POST http://localhost:8000/api/v1/process \
 - `GET /api/v1/models` - List downloaded models
 - `POST /api/v1/models/download` - Download models
 - `GET /api/v1/health` - Health check
-- `GET /api/v1/system` - System info (GPU, memory)
+- `GET /api/v1/system` - System info (CPU, memory)
 
 ## Docker
 
-- `VARIANT=cpu` (default): `python:3.12-slim` + `onnxruntime`
-- `VARIANT=gpu`: `nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04` + `onnxruntime-gpu`
+- Base image: `python:3.12-slim` + `onnxruntime`
 - Models persisted in `/data/models` Docker volume (not baked into image)
 - Single worker mandatory (`--workers 1`)
 
 ## Environment Variables
 
 All prefixed with `FF_`:
-- `FF_EXECUTION_PROVIDERS` - JSON array, e.g. `["cuda","cpu"]`
+- `FF_EXECUTION_PROVIDERS` - JSON array, e.g. `["cpu"]`
 - `FF_EXECUTION_THREAD_COUNT` - Default 4
 - `FF_VIDEO_MEMORY_STRATEGY` - strict/moderate/tolerant
 - `FF_MODELS_DIR` - Model storage path
@@ -91,5 +87,5 @@ All prefixed with `FF_`:
 
 - Never run with multiple uvicorn workers - FaceFusion global state will corrupt
 - Models are 100MB-1GB each; pre-download via `/api/v1/models/download` before processing
-- The `facefusion/` submodule must not be modified - use symlinks for model paths
+- The `facefusion/` submodule should not be modified directly if possible - use symlinks for model paths
 - Git operations: always push with the valknarthing ssh key
diff --git a/Dockerfile b/Dockerfile
index 5130d06..34112b0 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,7 +1,5 @@
-ARG VARIANT=cpu
-
-# ---- CPU base ----
-FROM python:3.12-slim AS base-cpu
+# ---- Final stage ----
+FROM python:3.12-slim
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
     ffmpeg curl libgl1 libglib2.0-0 \
@@ -11,31 +9,6 @@ COPY requirements-cpu.txt /tmp/requirements-cpu.txt
 COPY requirements.txt /tmp/requirements.txt
 RUN pip install --no-cache-dir -r /tmp/requirements-cpu.txt && rm /tmp/requirements*.txt
 
-# ---- GPU base ----
-FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base-gpu
-
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    software-properties-common \
-    && add-apt-repository ppa:deadsnakes/ppa \
-    && apt-get update \
-    && apt-get install -y --no-install-recommends \
-    python3.12 python3.12-venv python3.12-dev \
-    ffmpeg curl libgl1 libglib2.0-0 \
-    && ln -sf /usr/bin/python3.12 /usr/bin/python3 \
-    && ln -sf /usr/bin/python3 /usr/bin/python \
-    && python3 -m ensurepip --upgrade \
-    && python3 -m pip install --no-cache-dir --upgrade pip \
-    && rm -rf /var/lib/apt/lists/*
-
-COPY requirements-gpu.txt /tmp/requirements-gpu.txt
-COPY requirements.txt /tmp/requirements.txt
-RUN pip install --no-cache-dir -r /tmp/requirements-gpu.txt && rm /tmp/requirements*.txt
-
-# ---- Final stage ----
-FROM base-${VARIANT} AS final
-
 # CWD must be the FaceFusion submodule root so resolve_file_paths('facefusion/...')
 # resolves correctly (FaceFusion uses relative paths from CWD internally)
 WORKDIR /app/facefusion-src
diff --git a/README.md b/README.md
index 2695ff6..8241ee2 100644
--- a/README.md
+++ b/README.md
@@ -1,19 +1,19 @@
 # FaceFusion API
 
-REST API wrapping [FaceFusion](https://github.com/facefusion/facefusion) v3.5.3 for face swapping, enhancement, lip sync, and other face/frame processing. Containerized with CPU and CUDA GPU support.
+REST API wrapping [FaceFusion](https://github.com/facefusion/facefusion) v3.5.3 for face swapping, enhancement, lip sync, and other face/frame processing. Containerized for CPU-only execution.
 
 ## Features
 
 - Synchronous and asynchronous (job-based) processing
 - Face swapping, enhancement, editing, lip sync, age modification, expression restoration, frame enhancement/colorization, background removal
-- Multi-target Docker builds (CPU + CUDA GPU)
+- Dockerized for easy deployment
 - Model persistence via Docker volumes
-- Gitea CI/CD with dual image publishing
+- Gitea CI/CD automated builds
 
 ## Quick Start
 
 ```bash
-# Build and run (CPU)
+# Build and run
 docker compose build
 docker compose up -d
 
@@ -53,7 +53,7 @@ curl http://localhost:8000/api/v1/jobs/{job_id}/result -o result.jpg
 | `GET` | `/api/v1/models` | List downloaded models |
 | `POST` | `/api/v1/models/download` | Trigger model download |
 | `GET` | `/api/v1/health` | Health check |
-| `GET` | `/api/v1/system` | System info (GPU, memory) |
+| `GET` | `/api/v1/system` | System info (CPU, memory) |
 
 ## Processing Options
 
@@ -80,18 +80,10 @@ Available processors: `face_swapper`, `face_enhancer`, `face_editor`, `lip_synce
 
 ## Deployment
 
-### CPU (VPS)
-
 ```bash
 docker compose -f docker-compose.prod.yml up -d
 ```
 
-### GPU (NVIDIA)
-
-```bash
-docker compose -f docker-compose.gpu.yml up -d
-```
-
 ## Configuration
 
 Environment variables (prefix `FF_`):
diff --git a/app/routers/system.py b/app/routers/system.py
index 071fed9..a97d954 100644
--- a/app/routers/system.py
+++ b/app/routers/system.py
@@ -5,7 +5,7 @@ import psutil
 
 from fastapi import APIRouter
 
-from app.schemas.system import GpuDevice, HealthResponse, SystemInfoResponse
+from app.schemas.system import HealthResponse, SystemInfoResponse
 from app.services import facefusion_bridge
 
 logger = logging.getLogger(__name__)
@@ -21,36 +21,11 @@ async def health_check():
 @router.get('/system', response_model=SystemInfoResponse)
 async def system_info():
     providers = facefusion_bridge.get_execution_providers()
-    gpu_devices = _detect_gpu_devices()
     mem = psutil.virtual_memory()
 
     return SystemInfoResponse(
         execution_providers=providers,
-        gpu_devices=gpu_devices,
         cpu_count=os.cpu_count(),
         memory_total=mem.total,
         memory_available=mem.available,
     )
-
-
-def _detect_gpu_devices():
-    devices = []
-    try:
-        import subprocess
-        result = subprocess.run(
-            ['nvidia-smi', '--query-gpu=index,name,memory.total,memory.used', '--format=csv,noheader,nounits'],
-            capture_output=True, text=True, timeout=5,
-        )
-        if result.returncode == 0:
-            for line in result.stdout.strip().split('\n'):
-                parts = [p.strip() for p in line.split(',')]
-                if len(parts) >= 4:
-                    devices.append(GpuDevice(
-                        id=int(parts[0]),
-                        name=parts[1],
-                        memory_total=int(float(parts[2])) * 1024 * 1024,
-                        memory_used=int(float(parts[3])) * 1024 * 1024,
-                    ))
-    except (FileNotFoundError, subprocess.TimeoutExpired):
-        pass
-    return devices
diff --git a/app/schemas/process.py b/app/schemas/process.py
index 7968f7a..934a2f4 100644
--- a/app/schemas/process.py
+++ b/app/schemas/process.py
@@ -104,7 +104,6 @@ class ProcessingOptions(BaseModel):
     face_detector: Optional[FaceDetectorOptions] = None
     face_selector: Optional[FaceSelectorOptions] = None
     output: Optional[OutputOptions] = None
-    execution_providers: Optional[List[str]] = None
     execution_thread_count: Optional[int] = None
     video_memory_strategy: Optional[str] = None
 
diff --git a/app/schemas/system.py b/app/schemas/system.py
index cd6e6de..de3d5c1 100644
--- a/app/schemas/system.py
+++ b/app/schemas/system.py
@@ -7,16 +7,8 @@ class HealthResponse(BaseModel):
     status: str = 'ok'
 
 
-class GpuDevice(BaseModel):
-    id: int
-    name: str
-    memory_total: Optional[int] = None
-    memory_used: Optional[int] = None
-
-
 class SystemInfoResponse(BaseModel):
     execution_providers: List[str]
-    gpu_devices: List[GpuDevice]
     cpu_count: Optional[int] = None
     memory_total: Optional[int] = None
     memory_available: Optional[int] = None
diff --git a/app/services/facefusion_bridge.py b/app/services/facefusion_bridge.py
index 40a6359..bbf8016 100644
--- a/app/services/facefusion_bridge.py
+++ b/app/services/facefusion_bridge.py
@@ -339,8 +339,6 @@ def build_args_from_options(
                 args[f'output_{key}'] = out[key]
 
     # Execution overrides
-    if 'execution_providers' in options:
-        args['execution_providers'] = options['execution_providers']
     if 'execution_thread_count' in options:
         args['execution_thread_count'] = options['execution_thread_count']
     if 'video_memory_strategy' in options:
diff --git a/docker-compose.gpu.yml b/docker-compose.gpu.yml
deleted file mode 100644
index 14d259a..0000000
--- a/docker-compose.gpu.yml
+++ /dev/null
@@ -1,31 +0,0 @@
-services:
-  api:
-    image: dev.pivoine.art/valknar/facefusion-api:latest
-    ports:
-      - "8000:8000"
-    volumes:
-      - uploads:/data/uploads
-      - outputs:/data/outputs
-      - models:/data/models
-      - temp:/data/temp
-      - jobs:/data/jobs
-    environment:
-      - FF_EXECUTION_PROVIDERS=["cuda","cpu"]
-      - FF_EXECUTION_THREAD_COUNT=4
-      - FF_VIDEO_MEMORY_STRATEGY=moderate
-      - FF_LOG_LEVEL=info
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-    restart: unless-stopped
-
-volumes:
-  uploads:
-  outputs:
-  models:
-  temp:
-  jobs:
diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
index 61d3aec..8dbdf50 100644
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -1,6 +1,6 @@
 services:
   api:
-    image: dev.pivoine.art/valknar/facefusion-api:latest-cpu
+    image: dev.pivoine.art/valknar/facefusion-api:latest
     ports:
       - "8000:8000"
     volumes:
diff --git a/docker-compose.yml b/docker-compose.yml
index 3dbcfc3..8ca78d1 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,8 +2,6 @@ services:
   api:
     build:
       context: .
-      args:
-        VARIANT: cpu
     ports:
       - "8000:8000"
     volumes:
diff --git a/requirements-gpu.txt b/requirements-gpu.txt
deleted file mode 100644
index 79b263f..0000000
--- a/requirements-gpu.txt
+++ /dev/null
@@ -1,2 +0,0 @@
--r requirements.txt
-onnxruntime-gpu==1.24.1