refactor(ai): remove crawl4ai service, add backrest config to repo

- Remove crawl4ai service from ai/compose.yaml (will use local MCP instead)
- Remove crawl4ai backup volume from core/compose.yaml
- Add core/backrest/config.json (infrastructure as code)
- Change backrest from volume to bind-mounted config
- Update CLAUDE.md and README.md documentation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-25 06:20:22 +01:00
parent 071a74a996
commit ef0309838c
6 changed files with 374 additions and 42 deletions

View File

@@ -451,12 +451,6 @@ AI infrastructure with Open WebUI, Crawl4AI, and dedicated PostgreSQL with pgvec
- User signup enabled
- Data persisted in `ai_webui_data` volume
- **crawl4ai**: Crawl4AI web scraping service (internal API, no public access)
- Optimized web scraper for LLM content preparation
- Internal API on port 11235 (not exposed via Traefik)
- Designed for integration with Open WebUI and n8n workflows
- Data persisted in `ai_crawl4ai_data` volume
- **comfyui**: ComfyUI reverse proxy exposed at `comfy.ai.pivoine.art:80`
- Nginx-based proxy to ComfyUI running on RunPod GPU server
- Node-based UI for Flux.1 Schnell image generation workflows
@@ -727,7 +721,7 @@ Backrest backup system with restic backend:
- Retention: 7 daily, 4 weekly, 3 monthly
16. **ai-backup** (3 AM daily)
- Paths: `/volumes/ai_postgres_data`, `/volumes/ai_webui_data`, `/volumes/ai_crawl4ai_data`
- Paths: `/volumes/ai_postgres_data`, `/volumes/ai_webui_data`
- Retention: 7 daily, 4 weekly, 6 monthly, 2 yearly
17. **asciinema-backup** (11 AM daily)
@@ -738,8 +732,7 @@ Backrest backup system with restic backend:
All Docker volumes are mounted read-only to `/volumes/` with prefixed names (e.g., `backup_core_postgres_data`) to avoid naming conflicts with other compose stacks.
**Configuration Management**:
- `config.json` template in repository defines all backup plans
- On first run, copy config into volume: `docker cp restic/config.json restic_app:/config/config.json`
- `core/backrest/config.json` in repository defines all backup plans (bind-mounted to container)
- Config version must be `4` for Backrest 1.10.1 compatibility
- Backrest manages auth automatically (username: `valknar`, password set via web UI on first access)
@@ -777,7 +770,7 @@ Each service uses named volumes prefixed with project name:
- `vault_data`: Vaultwarden password vault (SQLite database)
- `joplin_data`: Joplin note-taking data
- `jelly_config`: Jellyfin media server configuration
- `ai_postgres_data`, `ai_webui_data`, `ai_crawl4ai_data`: AI stack databases and application data
- `ai_postgres_data`, `ai_webui_data`: AI stack databases and application data
- `netdata_config`: Netdata monitoring configuration
- `restic_data`, `restic_config`, `restic_cache`, `restic_tmp`: Backrest backup system
- `proxy_letsencrypt_data`: SSL certificates

View File

@@ -406,11 +406,10 @@ THE FALCON (falcon_network)
│ ├─ vaultwarden [vault.pivoine.art] → Password Manager
│ └─ tandoor [tandoor.pivoine.art] → Recipe Manager
├─ 🤖 AI STACK (5 services)
├─ 🤖 AI STACK (4 services)
│ ├─ ai_postgres [Internal] → pgvector Database
│ ├─ webui [ai.pivoine.art] → Open WebUI (Claude)
│ ├─ litellm [llm.ai.pivoine.art] → API Proxy
│ ├─ crawl4ai [Internal:11235] → Web Scraper
│ └─ facefusion [facefusion.ai.pivoine.art] → Face AI
├─ 🛡️ NET STACK (4 services)
@@ -435,7 +434,7 @@ THE FALCON (falcon_network)
├─ Core: postgres_data, redis_data, backrest_*
├─ Sexy: directus_uploads, directus_bundle
├─ Util: pairdrop_*, joplin_data, linkwarden_*, mattermost_*, vaultwarden_data, tandoor_*
├─ AI: ai_postgres_data, ai_webui_data, ai_crawl4ai_data, facefusion_*
├─ AI: ai_postgres_data, ai_webui_data, facefusion_*
├─ Net: letsencrypt_data, netdata_*
├─ Media: jelly_config, jelly_cache, filestash_data
└─ Dev: gitea_*, coolify_data, n8n_data, asciinema_data

View File

@@ -146,25 +146,6 @@ services:
# Watchtower
- 'com.centurylinklabs.watchtower.enable=${WATCHTOWER_LABEL_ENABLE}'
# Crawl4AI - Web scraping for LLMs (internal API, no public access)
crawl4ai:
image: ${AI_CRAWL4AI_IMAGE:-unclecode/crawl4ai:latest}
container_name: ${AI_COMPOSE_PROJECT_NAME}_crawl4ai
restart: unless-stopped
environment:
TZ: ${TIMEZONE:-Europe/Berlin}
# API configuration
PORT: ${AI_CRAWL4AI_PORT:-11235}
volumes:
- ai_crawl4ai_data:/app/.crawl4ai
networks:
- compose_network
labels:
# No Traefik exposure - internal only
- 'traefik.enable=false'
# Watchtower
- 'com.centurylinklabs.watchtower.enable=${WATCHTOWER_LABEL_ENABLE}'
# Facefusion - AI face swapping and enhancement
facefusion:
build:
@@ -273,7 +254,5 @@ volumes:
name: ${AI_COMPOSE_PROJECT_NAME}_postgres_data
ai_webui_data:
name: ${AI_COMPOSE_PROJECT_NAME}_webui_data
ai_crawl4ai_data:
name: ${AI_COMPOSE_PROJECT_NAME}_crawl4ai_data
ai_facefusion_data:
name: ${AI_COMPOSE_PROJECT_NAME}_facefusion_data

View File

@@ -135,7 +135,6 @@ envs:
AI_COMPOSE_PROJECT_NAME: ai
AI_POSTGRES_IMAGE: pgvector/pgvector:pg16
AI_WEBUI_IMAGE: ghcr.io/open-webui/open-webui:main
AI_CRAWL4AI_IMAGE: unclecode/crawl4ai:latest
AI_FACEFUSION_IMAGE: facefusion/facefusion:3.5.0-cpu
AI_FACEFUSION_TRAEFIK_ENABLED: true
AI_FACEFUSION_TRAEFIK_HOST: facefusion.ai.pivoine.art

368
core/backrest/config.json Normal file
View File

@@ -0,0 +1,368 @@
{
"modno": 1,
"version": 4,
"instance": "falcon",
"repos": [
{
"id": "hidrive-backup",
"uri": "/repos",
"guid": "df03886ea215b0a3ff9730190d906d7034032bf0f1906ed4ad00f2c4f1748215",
"password": "falcon-backup-2025",
"prunePolicy": {
"schedule": {
"cron": "0 2 * * 0"
}
},
"checkPolicy": {
"schedule": {
"cron": "0 3 * * 0"
}
},
"autoUnlock": true
}
],
"plans": [
{
"id": "ai-backup",
"repo": "hidrive-backup",
"paths": [
"/volumes/ai_postgres_data",
"/volumes/ai_webui_data"
],
"schedule": {
"cron": "0 3 * * *"
},
"retention": {
"policyTimeBucketed": {
"daily": 7,
"weekly": 4,
"monthly": 6,
"yearly": 2
}
}
},
{
"id": "asciinema-backup",
"repo": "hidrive-backup",
"paths": [
"/volumes/asciinema_data"
],
"schedule": {
"cron": "0 11 * * *"
},
"retention": {
"policyTimeBucketed": {
"daily": 7,
"weekly": 4,
"monthly": 6,
"yearly": 2
}
}
},
{
"id": "coolify-backup",
"repo": "hidrive-backup",
"paths": [
"/volumes/dev_coolify_data"
],
"schedule": {
"cron": "0 0 * * *"
},
"retention": {
"policyTimeBucketed": {
"daily": 7,
"weekly": 4,
"monthly": 6,
"yearly": 2
}
}
},
{
"id": "directus-bundle-backup",
"repo": "hidrive-backup",
"paths": [
"/volumes/directus_bundle"
],
"schedule": {
"cron": "0 4 * * *"
},
"retention": {
"policyTimeBucketed": {
"daily": 7,
"weekly": 4,
"monthly": 3
}
}
},
{
"id": "directus-uploads-backup",
"repo": "hidrive-backup",
"paths": [
"/volumes/directus_uploads"
],
"schedule": {
"cron": "0 4 * * *"
},
"retention": {
"policyTimeBucketed": {
"daily": 7,
"weekly": 4,
"monthly": 6,
"yearly": 2
}
}
},
{
"id": "filestash-backup",
"repo": "hidrive-backup",
"paths": [
"/volumes/filestash_data"
],
"schedule": {
"cron": "0 7 * * *"
},
"retention": {
"policyTimeBucketed": {
"daily": 7,
"weekly": 4,
"monthly": 3
}
}
},
{
"id": "gitea-backup",
"repo": "hidrive-backup",
"paths": [
"/volumes/dev_gitea_config",
"/volumes/dev_gitea_data",
"/volumes/dev_gitea_runner_data"
],
"schedule": {
"cron": "0 11 * * *"
},
"retention": {
"policyTimeBucketed": {
"daily": 7,
"weekly": 4,
"monthly": 6,
"yearly": 2
}
}
},
{
"id": "jellyfin-backup",
"repo": "hidrive-backup",
"paths": [
"/volumes/jelly_config"
],
"schedule": {
"cron": "0 9 * * *"
},
"retention": {
"policyTimeBucketed": {
"daily": 7,
"weekly": 4,
"monthly": 6,
"yearly": 2
}
}
},
{
"id": "joplin-backup",
"repo": "hidrive-backup",
"paths": [
"/volumes/joplin_data"
],
"schedule": {
"cron": "0 2 * * *"
},
"retention": {
"policyTimeBucketed": {
"daily": 7,
"weekly": 4,
"monthly": 6,
"yearly": 2
}
}
},
{
"id": "letsencrypt-backup",
"repo": "hidrive-backup",
"paths": [
"/volumes/letsencrypt_data"
],
"schedule": {
"cron": "0 8 * * *"
},
"retention": {
"policyTimeBucketed": {
"daily": 7,
"weekly": 4,
"monthly": 12,
"yearly": 3
}
}
},
{
"id": "linkwarden-backup",
"repo": "hidrive-backup",
"paths": [
"/volumes/linkwarden_data",
"/volumes/linkwarden_meili_data"
],
"schedule": {
"cron": "0 7 * * *"
},
"retention": {
"policyTimeBucketed": {
"daily": 7,
"weekly": 4,
"monthly": 6
}
}
},
{
"id": "mattermost-backup",
"repo": "hidrive-backup",
"paths": [
"/volumes/mattermost_config",
"/volumes/mattermost_data",
"/volumes/mattermost_plugins"
],
"schedule": {
"cron": "0 5 * * *"
},
"retention": {
"policyTimeBucketed": {
"daily": 7,
"weekly": 4,
"monthly": 6,
"yearly": 2
}
}
},
{
"id": "n8n-backup",
"repo": "hidrive-backup",
"paths": [
"/volumes/n8n_data"
],
"schedule": {
"cron": "0 6 * * *"
},
"retention": {
"policyTimeBucketed": {
"daily": 7,
"weekly": 4,
"monthly": 6
}
}
},
{
"id": "netdata-backup",
"repo": "hidrive-backup",
"paths": [
"/volumes/netdata_config"
],
"schedule": {
"cron": "0 10 * * *"
},
"retention": {
"policyTimeBucketed": {
"daily": 7,
"weekly": 4,
"monthly": 3
}
}
},
{
"id": "postgres-backup",
"repo": "hidrive-backup",
"paths": [
"/volumes/core_postgres_data"
],
"schedule": {
"cron": "0 2 * * *"
},
"retention": {
"policyTimeBucketed": {
"daily": 7,
"weekly": 4,
"monthly": 6,
"yearly": 2
}
}
},
{
"id": "redis-backup",
"repo": "hidrive-backup",
"paths": [
"/volumes/core_redis_data"
],
"schedule": {
"cron": "0 3 * * *"
},
"retention": {
"policyTimeBucketed": {
"daily": 7,
"weekly": 4,
"monthly": 3
}
}
},
{
"id": "scrapy-backup",
"repo": "hidrive-backup",
"paths": [
"/volumes/scrapy_code",
"/volumes/scrapyd_data"
],
"schedule": {
"cron": "0 6 * * *"
},
"retention": {
"policyTimeBucketed": {
"daily": 7,
"weekly": 4,
"monthly": 3
}
}
},
{
"id": "tandoor-backup",
"repo": "hidrive-backup",
"paths": [
"/volumes/tandoor_mediafiles",
"/volumes/tandoor_staticfiles"
],
"schedule": {
"cron": "0 5 * * *"
},
"retention": {
"policyTimeBucketed": {
"daily": 7,
"weekly": 4,
"monthly": 6
}
}
},
{
"id": "vaultwarden-backup",
"repo": "hidrive-backup",
"paths": [
"/volumes/vaultwarden_data"
],
"schedule": {
"cron": "0 8 * * *"
},
"retention": {
"policyTimeBucketed": {
"daily": 7,
"weekly": 4,
"monthly": 12,
"yearly": 3
}
}
}
]
}

View File

@@ -56,7 +56,7 @@ services:
volumes:
# Backrest application data
- backrest_data:/data
- backrest_config:/config
- ./backrest/config.json:/config/config.json
- backrest_cache:/cache
- backrest_tmp:/tmp
@@ -84,7 +84,6 @@ services:
- backup_netdata_config:/volumes/netdata_config:ro
- backup_ai_postgres_data:/volumes/ai_postgres_data:ro
- backup_ai_webui_data:/volumes/ai_webui_data:ro
- backup_ai_crawl4ai_data:/volumes/ai_crawl4ai_data:ro
- backup_asciinema_data:/volumes/asciinema_data:ro
- backup_dev_gitea_data:/volumes/dev_gitea_data:ro
- backup_dev_gitea_config:/volumes/dev_gitea_config:ro
@@ -124,8 +123,6 @@ volumes:
name: ${CORE_COMPOSE_PROJECT_NAME}_redis_data
backrest_data:
name: ${CORE_COMPOSE_PROJECT_NAME}_backrest_data
backrest_config:
name: ${CORE_COMPOSE_PROJECT_NAME}_backrest_config
backrest_cache:
name: ${CORE_COMPOSE_PROJECT_NAME}_backrest_cache
backrest_tmp:
@@ -192,9 +189,6 @@ volumes:
backup_ai_webui_data:
name: ai_webui_data
external: true
backup_ai_crawl4ai_data:
name: ai_crawl4ai_data
external: true
backup_asciinema_data:
name: dev_asciinema_data
external: true