"""Health check and system information endpoints.""" import logging import os import time from typing import Optional import psutil from fastapi import APIRouter, HTTPException from app.config import settings from app.schemas.health import HealthResponse, RequestStats, SystemInfo from app.services import file_manager, worker logger = logging.getLogger(__name__) router = APIRouter(prefix='/api/v1', tags=['system']) # Track uptime _start_time = time.time() # Request statistics _stats = { 'total_requests': 0, 'successful_requests': 0, 'failed_requests': 0, 'total_processing_time': 0.0, 'total_images_processed': 0, } @router.get('/health') async def health_check() -> HealthResponse: """API health check.""" uptime = time.time() - _start_time return HealthResponse( status='ok', version='1.0.0', uptime_seconds=uptime, message='Real-ESRGAN API is running', ) @router.get('/health/ready') async def readiness_check(): """Kubernetes readiness probe.""" from app.services import realesrgan_bridge bridge = realesrgan_bridge.get_bridge() if not bridge.initialized: raise HTTPException(status_code=503, detail='Not ready') return {'ready': True} @router.get('/health/live') async def liveness_check(): """Kubernetes liveness probe.""" return {'alive': True} @router.get('/system') async def get_system_info() -> SystemInfo: """Get comprehensive system information.""" try: # Uptime uptime = time.time() - _start_time # CPU and memory cpu_percent = psutil.cpu_percent(interval=1) memory = psutil.virtual_memory() memory_percent = memory.percent # Disk disk = psutil.disk_usage('/') disk_percent = disk.percent # Models directory size models_size = file_manager.get_directory_size_mb(settings.models_dir) # Jobs queue wq = worker.get_worker_queue() queue_length = wq.queue.qsize() return SystemInfo( status='ok', version='1.0.0', uptime_seconds=uptime, cpu_usage_percent=cpu_percent, memory_usage_percent=memory_percent, disk_usage_percent=disk_percent, execution_providers=settings.get_execution_providers(), models_dir_size_mb=models_size, jobs_queue_length=queue_length, ) except Exception as e: logger.error(f'Failed to get system info: {e}', exc_info=True) raise HTTPException(status_code=500, detail=str(e)) @router.get('/stats') async def get_stats() -> RequestStats: """Get request statistics.""" avg_time = 0.0 if _stats['successful_requests'] > 0: avg_time = _stats['total_processing_time'] / _stats['successful_requests'] return RequestStats( total_requests=_stats['total_requests'], successful_requests=_stats['successful_requests'], failed_requests=_stats['failed_requests'], average_processing_time_seconds=avg_time, total_images_processed=_stats['total_images_processed'], ) @router.post('/cleanup') async def cleanup_old_jobs(hours: int = 24): """Clean up old job directories.""" try: cleaned = file_manager.cleanup_old_jobs(hours) return { 'success': True, 'cleaned_jobs': cleaned, 'message': f'Cleaned up {cleaned} job directories older than {hours} hours', } except Exception as e: logger.error(f'Cleanup failed: {e}', exc_info=True) raise HTTPException(status_code=500, detail=str(e))