feat: remove gpu support and simplify to cpu-only

2026-02-19 12:50:10 +01:00
parent 0fe96a9615
commit 58445340c8
12 changed files with 21 additions and 190 deletions
--- a/.gitea/workflows/docker-build-push.yml
+++ b/.gitea/workflows/docker-build-push.yml
@@ -22,7 +22,7 @@ env:
  IMAGE_NAME: valknar/facefusion-api

 jobs:
-  build-gpu:
+  build:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
@@ -42,7 +42,7 @@ jobs:
          username: ${{ gitea.actor }}
          password: ${{ secrets.REGISTRY_TOKEN }}

-      - name: Extract metadata (GPU)
+      - name: Extract metadata
        id: meta
        uses: docker/metadata-action@v5
        with:
@@ -58,10 +58,10 @@ jobs:
            type=raw,value=${{ gitea.event.inputs.tag }},enable=${{ gitea.event_name == 'workflow_dispatch' }}
          labels: |
            org.opencontainers.image.title=facefusion-api
-            org.opencontainers.image.description=REST API for FaceFusion face processing (GPU)
+            org.opencontainers.image.description=REST API for FaceFusion face processing (CPU)
            org.opencontainers.image.vendor=valknar

-      - name: Build and push GPU image
+      - name: Build and push image
        uses: docker/build-push-action@v5
        with:
          context: .
@@ -69,73 +69,14 @@ jobs:
          push: ${{ gitea.event_name != 'pull_request' }}
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
-          build-args: VARIANT=gpu
-          cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-gpu
-          cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-gpu,mode=max
+          cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache
+          cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache,mode=max

      - name: Summary
        if: gitea.event_name != 'pull_request'
        run: |
-          echo "### GPU Image Published" >> $GITEA_STEP_SUMMARY
+          echo "### Image Published" >> $GITEA_STEP_SUMMARY
          echo "**Tags:**" >> $GITEA_STEP_SUMMARY
          echo "\`\`\`" >> $GITEA_STEP_SUMMARY
          echo "${{ steps.meta.outputs.tags }}" >> $GITEA_STEP_SUMMARY
          echo "\`\`\`" >> $GITEA_STEP_SUMMARY
-
-  build-cpu:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          submodules: recursive
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-        with:
-          platforms: linux/amd64
-
-      - name: Log in to Gitea Container Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ gitea.actor }}
-          password: ${{ secrets.REGISTRY_TOKEN }}
-
-      - name: Extract metadata (CPU)
-        id: meta-cpu
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
-          flavor: suffix=-cpu
-          tags: |
-            type=raw,value=latest-cpu,enable={{is_default_branch}}
-            type=ref,event=branch,suffix=-cpu
-            type=ref,event=pr,suffix=-cpu
-            type=semver,pattern={{version}},suffix=-cpu
-            type=sha,prefix={{branch}}-,suffix=-cpu
-          labels: |
-            org.opencontainers.image.title=facefusion-api
-            org.opencontainers.image.description=REST API for FaceFusion face processing (CPU)
-            org.opencontainers.image.vendor=valknar
-
-      - name: Build and push CPU image
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          platforms: linux/amd64
-          push: ${{ gitea.event_name != 'pull_request' }}
-          tags: ${{ steps.meta-cpu.outputs.tags }}
-          labels: ${{ steps.meta-cpu.outputs.labels }}
-          build-args: VARIANT=cpu
-          cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-cpu
-          cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache-cpu,mode=max
-
-      - name: Summary
-        if: gitea.event_name != 'pull_request'
-        run: |
-          echo "### CPU Image Published" >> $GITEA_STEP_SUMMARY
-          echo "**Tags:**" >> $GITEA_STEP_SUMMARY
-          echo "\`\`\`" >> $GITEA_STEP_SUMMARY
-          echo "${{ steps.meta-cpu.outputs.tags }}" >> $GITEA_STEP_SUMMARY
-          echo "\`\`\`" >> $GITEA_STEP_SUMMARY
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -2,7 +2,7 @@

 ## Overview

-FaceFusion API - A Python REST API wrapping FaceFusion v3.5.3 for face swapping, enhancement, lip sync, and other face/frame processing. Containerized with CUDA GPU support, deployed via Gitea CI/CD at `dev.pivoine.art`.
+FaceFusion API - A Python REST API wrapping FaceFusion v3.5.3 for face swapping, enhancement, lip sync, and other face/frame processing. Containerized for CPU-only execution, deployed via Gitea CI/CD at `dev.pivoine.art`.

 ## Architecture

@@ -43,9 +43,6 @@ docker compose up
 # Production (CPU VPS)
 docker compose -f docker-compose.prod.yml up -d

-# Production (GPU server)
-docker compose -f docker-compose.gpu.yml up -d
-
 # Test endpoints
 curl http://localhost:8000/api/v1/health
 curl http://localhost:8000/api/v1/system
@@ -69,19 +66,18 @@ curl -X POST http://localhost:8000/api/v1/process \
 - `GET /api/v1/models` - List downloaded models
 - `POST /api/v1/models/download` - Download models
 - `GET /api/v1/health` - Health check
- `GET /api/v1/system` - System info (GPU, memory)
+- `GET /api/v1/system` - System info (CPU, memory)

 ## Docker

- `VARIANT=cpu` (default): `python:3.12-slim` + `onnxruntime`
- `VARIANT=gpu`: `nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04` + `onnxruntime-gpu`
+- Base image: `python:3.12-slim` + `onnxruntime`
 - Models persisted in `/data/models` Docker volume (not baked into image)
 - Single worker mandatory (`--workers 1`)

 ## Environment Variables

 All prefixed with `FF_`:
- `FF_EXECUTION_PROVIDERS` - JSON array, e.g. `["cuda","cpu"]`
+- `FF_EXECUTION_PROVIDERS` - JSON array, e.g. `["cpu"]`
 - `FF_EXECUTION_THREAD_COUNT` - Default 4
 - `FF_VIDEO_MEMORY_STRATEGY` - strict/moderate/tolerant
 - `FF_MODELS_DIR` - Model storage path
@@ -91,5 +87,5 @@ All prefixed with `FF_`:

 - Never run with multiple uvicorn workers - FaceFusion global state will corrupt
 - Models are 100MB-1GB each; pre-download via `/api/v1/models/download` before processing
- The `facefusion/` submodule must not be modified - use symlinks for model paths
+- The `facefusion/` submodule should not be modified directly if possible - use symlinks for model paths
 - Git operations: always push with the valknarthing ssh key
--- a/31
+++ b/31
@@ -1,7 +1,5 @@
-ARG VARIANT=cpu
-
-# ---- CPU base ----
-FROM python:3.12-slim AS base-cpu
+# ---- Final stage ----
+FROM python:3.12-slim

 RUN apt-get update && apt-get install -y --no-install-recommends \
    ffmpeg curl libgl1 libglib2.0-0 \
@@ -11,31 +9,6 @@ COPY requirements-cpu.txt /tmp/requirements-cpu.txt
 COPY requirements.txt /tmp/requirements.txt
 RUN pip install --no-cache-dir -r /tmp/requirements-cpu.txt && rm /tmp/requirements*.txt

-# ---- GPU base ----
-FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base-gpu
-
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    software-properties-common \
-    && add-apt-repository ppa:deadsnakes/ppa \
-    && apt-get update \
-    && apt-get install -y --no-install-recommends \
-    python3.12 python3.12-venv python3.12-dev \
-    ffmpeg curl libgl1 libglib2.0-0 \
-    && ln -sf /usr/bin/python3.12 /usr/bin/python3 \
-    && ln -sf /usr/bin/python3 /usr/bin/python \
-    && python3 -m ensurepip --upgrade \
-    && python3 -m pip install --no-cache-dir --upgrade pip \
-    && rm -rf /var/lib/apt/lists/*
-
-COPY requirements-gpu.txt /tmp/requirements-gpu.txt
-COPY requirements.txt /tmp/requirements.txt
-RUN pip install --no-cache-dir -r /tmp/requirements-gpu.txt && rm /tmp/requirements*.txt
-
-# ---- Final stage ----
-FROM base-${VARIANT} AS final
-
 # CWD must be the FaceFusion submodule root so resolve_file_paths('facefusion/...')
 # resolves correctly (FaceFusion uses relative paths from CWD internally)
 WORKDIR /app/facefusion-src
--- a/README.md
+++ b/README.md
@@ -1,19 +1,19 @@
 # FaceFusion API

-REST API wrapping [FaceFusion](https://github.com/facefusion/facefusion) v3.5.3 for face swapping, enhancement, lip sync, and other face/frame processing. Containerized with CPU and CUDA GPU support.
+REST API wrapping [FaceFusion](https://github.com/facefusion/facefusion) v3.5.3 for face swapping, enhancement, lip sync, and other face/frame processing. Containerized for CPU-only execution.

 ## Features

 - Synchronous and asynchronous (job-based) processing
 - Face swapping, enhancement, editing, lip sync, age modification, expression restoration, frame enhancement/colorization, background removal
- Multi-target Docker builds (CPU + CUDA GPU)
+- Dockerized for easy deployment
 - Model persistence via Docker volumes
- Gitea CI/CD with dual image publishing
+- Gitea CI/CD automated builds

 ## Quick Start

 ```bash
-# Build and run (CPU)
+# Build and run
 docker compose build
 docker compose up -d

@@ -53,7 +53,7 @@ curl http://localhost:8000/api/v1/jobs/{job_id}/result -o result.jpg
 | `GET` | `/api/v1/models` | List downloaded models |
 | `POST` | `/api/v1/models/download` | Trigger model download |
 | `GET` | `/api/v1/health` | Health check |
-| `GET` | `/api/v1/system` | System info (GPU, memory) |
+| `GET` | `/api/v1/system` | System info (CPU, memory) |

 ## Processing Options

@@ -80,18 +80,10 @@ Available processors: `face_swapper`, `face_enhancer`, `face_editor`, `lip_synce

 ## Deployment

-### CPU (VPS)
-
 ```bash
 docker compose -f docker-compose.prod.yml up -d
 ```

-### GPU (NVIDIA)
-
-```bash
-docker compose -f docker-compose.gpu.yml up -d
-```
-
 ## Configuration

 Environment variables (prefix `FF_`):
--- a/app/routers/system.py
+++ b/app/routers/system.py
@@ -5,7 +5,7 @@ import psutil

 from fastapi import APIRouter

-from app.schemas.system import GpuDevice, HealthResponse, SystemInfoResponse
+from app.schemas.system import HealthResponse, SystemInfoResponse
 from app.services import facefusion_bridge

 logger = logging.getLogger(__name__)
@@ -21,36 +21,11 @@ async def health_check():
@router.get('/system', response_model=SystemInfoResponse)
 async def system_info():
    providers = facefusion_bridge.get_execution_providers()
-    gpu_devices = _detect_gpu_devices()
    mem = psutil.virtual_memory()

    return SystemInfoResponse(
        execution_providers=providers,
-        gpu_devices=gpu_devices,
        cpu_count=os.cpu_count(),
        memory_total=mem.total,
        memory_available=mem.available,
    )
-
-
-def _detect_gpu_devices():
-    devices = []
-    try:
-        import subprocess
-        result = subprocess.run(
-            ['nvidia-smi', '--query-gpu=index,name,memory.total,memory.used', '--format=csv,noheader,nounits'],
-            capture_output=True, text=True, timeout=5,
-        )
-        if result.returncode == 0:
-            for line in result.stdout.strip().split('\n'):
-                parts = [p.strip() for p in line.split(',')]
-                if len(parts) >= 4:
-                    devices.append(GpuDevice(
-                        id=int(parts[0]),
-                        name=parts[1],
-                        memory_total=int(float(parts[2])) * 1024 * 1024,
-                        memory_used=int(float(parts[3])) * 1024 * 1024,
-                    ))
-    except (FileNotFoundError, subprocess.TimeoutExpired):
-        pass
-    return devices
--- a/app/schemas/process.py
+++ b/app/schemas/process.py
@@ -104,7 +104,6 @@ class ProcessingOptions(BaseModel):
    face_detector: Optional[FaceDetectorOptions] = None
    face_selector: Optional[FaceSelectorOptions] = None
    output: Optional[OutputOptions] = None
-    execution_providers: Optional[List[str]] = None
    execution_thread_count: Optional[int] = None
    video_memory_strategy: Optional[str] = None

--- a/app/schemas/system.py
+++ b/app/schemas/system.py
@@ -7,16 +7,8 @@ class HealthResponse(BaseModel):
    status: str = 'ok'


-class GpuDevice(BaseModel):
-    id: int
-    name: str
-    memory_total: Optional[int] = None
-    memory_used: Optional[int] = None
-
-
 class SystemInfoResponse(BaseModel):
    execution_providers: List[str]
-    gpu_devices: List[GpuDevice]
    cpu_count: Optional[int] = None
    memory_total: Optional[int] = None
    memory_available: Optional[int] = None
--- a/app/services/facefusion_bridge.py
+++ b/app/services/facefusion_bridge.py
@@ -339,8 +339,6 @@ def build_args_from_options(
                args[f'output_{key}'] = out[key]

    # Execution overrides
-    if 'execution_providers' in options:
-        args['execution_providers'] = options['execution_providers']
    if 'execution_thread_count' in options:
        args['execution_thread_count'] = options['execution_thread_count']
    if 'video_memory_strategy' in options:
--- a/docker-compose.gpu.yml
+++ b/docker-compose.gpu.yml
@@ -1,31 +0,0 @@
-services:
-  api:
-    image: dev.pivoine.art/valknar/facefusion-api:latest
-    ports:
-      - "8000:8000"
-    volumes:
-      - uploads:/data/uploads
-      - outputs:/data/outputs
-      - models:/data/models
-      - temp:/data/temp
-      - jobs:/data/jobs
-    environment:
-      - FF_EXECUTION_PROVIDERS=["cuda","cpu"]
-      - FF_EXECUTION_THREAD_COUNT=4
-      - FF_VIDEO_MEMORY_STRATEGY=moderate
-      - FF_LOG_LEVEL=info
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-    restart: unless-stopped
-
-volumes:
-  uploads:
-  outputs:
-  models:
-  temp:
-  jobs:
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -1,6 +1,6 @@
 services:
  api:
-    image: dev.pivoine.art/valknar/facefusion-api:latest-cpu
+    image: dev.pivoine.art/valknar/facefusion-api:latest
    ports:
      - "8000:8000"
    volumes:
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,8 +2,6 @@ services:
  api:
    build:
      context: .
-      args:
-        VARIANT: cpu
    ports:
      - "8000:8000"
    volumes:
--- a/requirements-gpu.txt
+++ b/requirements-gpu.txt
@@ -1,2 +0,0 @@
-r requirements.txt
-onnxruntime-gpu==1.24.1