refactor: remove all document conversion support, keep only media conversions

This commit completely removes document conversion functionality to focus exclusively on media file conversions (video, audio, images). Changes: - Remove all document converter services (pandocService.ts, pdfService.ts, docxService.ts) - Uninstall document-related packages: marked, turndown, dompurify, jspdf, pdfjs-dist, docx, mammoth, @types/turndown - Remove document formats (PDF, DOCX, Markdown, HTML, TXT) from formatMappings.ts - Remove pandoc converter from FileConverter.tsx - Remove pandoc loader and references from wasmLoader.ts - Update TypeScript types to remove 'pandoc' from ConverterEngine and 'document' from FileCategory - Remove pandoc from WASMModuleState interface - Update README.md to remove all document conversion documentation - Update UI descriptions to reflect media-only conversions Supported conversions now: - Video: MP4, WebM, AVI, MOV, MKV, GIF - Audio: MP3, WAV, OGG, AAC, FLAC - Images: PNG, JPG, WebP, GIF, BMP, TIFF, SVG 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-17 11:35:20 +01:00
parent de3997f4df
commit 594a0ca314
10 changed files with 6 additions and 1867 deletions
--- a/lib/converters/docxService.ts
+++ b/lib/converters/docxService.ts
@@ -1,443 +0,0 @@
-import { Document, Packer, Paragraph, TextRun, HeadingLevel } from 'docx';
-import type { ConversionOptions, ProgressCallback, ConversionResult } from '@/types/conversion';
-
-/**
- * Extract text from DOCX file using mammoth
- */
-export async function extractTextFromDOCX(file: File, onProgress?: ProgressCallback): Promise<string> {
-  if (onProgress) onProgress(10);
-
-  // Dynamically import mammoth (client-side only)
-  const mammoth = await import('mammoth');
-
-  if (onProgress) onProgress(30);
-
-  // Read file as ArrayBuffer
-  const arrayBuffer = await file.arrayBuffer();
-
-  if (onProgress) onProgress(50);
-
-  // Extract text from DOCX
-  const result = await mammoth.extractRawText({ arrayBuffer });
-
-  if (onProgress) onProgress(100);
-
-  return result.value;
-}
-
-/**
- * Extract HTML from DOCX file using mammoth
- */
-export async function extractHTMLFromDOCX(file: File, onProgress?: ProgressCallback): Promise<string> {
-  if (onProgress) onProgress(10);
-
-  // Dynamically import mammoth (client-side only)
-  const mammoth = await import('mammoth');
-
-  if (onProgress) onProgress(30);
-
-  // Read file as ArrayBuffer
-  const arrayBuffer = await file.arrayBuffer();
-
-  if (onProgress) onProgress(50);
-
-  // Convert DOCX to HTML
-  const result = await mammoth.convertToHtml({ arrayBuffer });
-
-  if (onProgress) onProgress(100);
-
-  return result.value;
-}
-
-/**
- * Convert DOCX to plain text
- */
-export async function docxToText(
-  file: File,
-  onProgress?: ProgressCallback
-): Promise<ConversionResult> {
-  const startTime = Date.now();
-
-  try {
-    const text = await extractTextFromDOCX(file, onProgress);
-    const blob = new Blob([text], { type: 'text/plain' });
-
-    return {
-      success: true,
-      blob,
-      duration: Date.now() - startTime,
-    };
-  } catch (error) {
-    console.error('[DOCX Converter] DOCX to text error:', error);
-
-    return {
-      success: false,
-      error: error instanceof Error ? error.message : 'Failed to extract text from DOCX',
-      duration: Date.now() - startTime,
-    };
-  }
-}
-
-/**
- * Convert DOCX to HTML
- */
-export async function docxToHTML(
-  file: File,
-  onProgress?: ProgressCallback
-): Promise<ConversionResult> {
-  const startTime = Date.now();
-
-  try {
-    const html = await extractHTMLFromDOCX(file, (progress) => {
-      if (onProgress) onProgress(progress * 0.9);
-    });
-
-    // Wrap in full HTML document
-    const fullHTML = `<!DOCTYPE html>
-<html lang="en">
-<head>
-  <meta charset="UTF-8">
-  <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  <title>Converted Document</title>
-  <style>
-    body {
-      font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
-      line-height: 1.6;
-      max-width: 800px;
-      margin: 2rem auto;
-      padding: 0 1rem;
-      color: #333;
-    }
-  </style>
-</head>
-<body>
-${html}
-</body>
-</html>`;
-
-    if (onProgress) onProgress(100);
-
-    const blob = new Blob([fullHTML], { type: 'text/html' });
-
-    return {
-      success: true,
-      blob,
-      duration: Date.now() - startTime,
-    };
-  } catch (error) {
-    console.error('[DOCX Converter] DOCX to HTML error:', error);
-
-    return {
-      success: false,
-      error: error instanceof Error ? error.message : 'Failed to convert DOCX to HTML',
-      duration: Date.now() - startTime,
-    };
-  }
-}
-
-/**
- * Convert DOCX to Markdown
- */
-export async function docxToMarkdown(
-  file: File,
-  onProgress?: ProgressCallback
-): Promise<ConversionResult> {
-  const startTime = Date.now();
-
-  try {
-    // First convert to HTML
-    const html = await extractHTMLFromDOCX(file, (progress) => {
-      if (onProgress) onProgress(progress * 0.7);
-    });
-
-    if (onProgress) onProgress(80);
-
-    // Import turndown for HTML to Markdown
-    const TurndownService = (await import('turndown')).default;
-    const turndownService = new TurndownService({
-      headingStyle: 'atx',
-      codeBlockStyle: 'fenced',
-      bulletListMarker: '-',
-    });
-
-    const markdown = turndownService.turndown(html);
-
-    if (onProgress) onProgress(100);
-
-    const blob = new Blob([markdown], { type: 'text/markdown' });
-
-    return {
-      success: true,
-      blob,
-      duration: Date.now() - startTime,
-    };
-  } catch (error) {
-    console.error('[DOCX Converter] DOCX to Markdown error:', error);
-
-    return {
-      success: false,
-      error: error instanceof Error ? error.message : 'Failed to convert DOCX to Markdown',
-      duration: Date.now() - startTime,
-    };
-  }
-}
-
-/**
- * Create DOCX from text content
- */
-async function createDOCXFromText(text: string, onProgress?: ProgressCallback): Promise<Blob> {
-  if (onProgress) onProgress(20);
-
-  // Split text into paragraphs
-  const paragraphs = text.split('\n\n').filter(p => p.trim());
-
-  if (onProgress) onProgress(40);
-
-  // Create document with paragraphs
-  const doc = new Document({
-    sections: [
-      {
-        properties: {},
-        children: paragraphs.map((para) => {
-          return new Paragraph({
-            children: [new TextRun(para.trim())],
-            spacing: {
-              after: 200,
-            },
-          });
-        }),
-      },
-    ],
-  });
-
-  if (onProgress) onProgress(70);
-
-  // Generate DOCX blob
-  const blob = await Packer.toBlob(doc);
-
-  if (onProgress) onProgress(100);
-
-  return blob;
-}
-
-/**
- * Create DOCX from Markdown
- */
-async function createDOCXFromMarkdown(markdown: string, onProgress?: ProgressCallback): Promise<Blob> {
-  if (onProgress) onProgress(10);
-
-  // Parse markdown and create structured document
-  const lines = markdown.split('\n');
-  const children: Paragraph[] = [];
-
-  let currentParagraph: string[] = [];
-
-  for (const line of lines) {
-    if (line.startsWith('# ')) {
-      // Heading 1
-      if (currentParagraph.length > 0) {
-        children.push(new Paragraph({
-          children: [new TextRun(currentParagraph.join(' '))],
-          spacing: { after: 200 },
-        }));
-        currentParagraph = [];
-      }
-      children.push(new Paragraph({
-        text: line.substring(2),
-        heading: HeadingLevel.HEADING_1,
-        spacing: { before: 240, after: 120 },
-      }));
-    } else if (line.startsWith('## ')) {
-      // Heading 2
-      if (currentParagraph.length > 0) {
-        children.push(new Paragraph({
-          children: [new TextRun(currentParagraph.join(' '))],
-          spacing: { after: 200 },
-        }));
-        currentParagraph = [];
-      }
-      children.push(new Paragraph({
-        text: line.substring(3),
-        heading: HeadingLevel.HEADING_2,
-        spacing: { before: 200, after: 100 },
-      }));
-    } else if (line.startsWith('### ')) {
-      // Heading 3
-      if (currentParagraph.length > 0) {
-        children.push(new Paragraph({
-          children: [new TextRun(currentParagraph.join(' '))],
-          spacing: { after: 200 },
-        }));
-        currentParagraph = [];
-      }
-      children.push(new Paragraph({
-        text: line.substring(4),
-        heading: HeadingLevel.HEADING_3,
-        spacing: { before: 160, after: 80 },
-      }));
-    } else if (line.trim() === '') {
-      // Empty line - paragraph break
-      if (currentParagraph.length > 0) {
-        children.push(new Paragraph({
-          children: [new TextRun(currentParagraph.join(' '))],
-          spacing: { after: 200 },
-        }));
-        currentParagraph = [];
-      }
-    } else {
-      // Regular text
-      currentParagraph.push(line);
-    }
-  }
-
-  // Add remaining paragraph
-  if (currentParagraph.length > 0) {
-    children.push(new Paragraph({
-      children: [new TextRun(currentParagraph.join(' '))],
-      spacing: { after: 200 },
-    }));
-  }
-
-  if (onProgress) onProgress(60);
-
-  const doc = new Document({
-    sections: [
-      {
-        properties: {},
-        children,
-      },
-    ],
-  });
-
-  if (onProgress) onProgress(80);
-
-  const blob = await Packer.toBlob(doc);
-
-  if (onProgress) onProgress(100);
-
-  return blob;
-}
-
-/**
- * Convert plain text to DOCX
- */
-export async function textToDOCX(
-  file: File,
-  onProgress?: ProgressCallback
-): Promise<ConversionResult> {
-  const startTime = Date.now();
-
-  try {
-    if (onProgress) onProgress(10);
-
-    const text = await file.text();
-
-    if (onProgress) onProgress(20);
-
-    const blob = await createDOCXFromText(text, (progress) => {
-      if (onProgress) onProgress(20 + progress * 0.8);
-    });
-
-    return {
-      success: true,
-      blob,
-      duration: Date.now() - startTime,
-    };
-  } catch (error) {
-    console.error('[DOCX Converter] Text to DOCX error:', error);
-
-    return {
-      success: false,
-      error: error instanceof Error ? error.message : 'Failed to convert text to DOCX',
-      duration: Date.now() - startTime,
-    };
-  }
-}
-
-/**
- * Convert Markdown to DOCX
- */
-export async function markdownToDOCX(
-  file: File,
-  onProgress?: ProgressCallback
-): Promise<ConversionResult> {
-  const startTime = Date.now();
-
-  try {
-    if (onProgress) onProgress(10);
-
-    const markdown = await file.text();
-
-    if (onProgress) onProgress(20);
-
-    const blob = await createDOCXFromMarkdown(markdown, (progress) => {
-      if (onProgress) onProgress(20 + progress * 0.8);
-    });
-
-    return {
-      success: true,
-      blob,
-      duration: Date.now() - startTime,
-    };
-  } catch (error) {
-    console.error('[DOCX Converter] Markdown to DOCX error:', error);
-
-    return {
-      success: false,
-      error: error instanceof Error ? error.message : 'Failed to convert Markdown to DOCX',
-      duration: Date.now() - startTime,
-    };
-  }
-}
-
-/**
- * Convert HTML to DOCX
- */
-export async function htmlToDOCX(
-  file: File,
-  onProgress?: ProgressCallback
-): Promise<ConversionResult> {
-  const startTime = Date.now();
-
-  try {
-    if (onProgress) onProgress(10);
-
-    const html = await file.text();
-
-    if (onProgress) onProgress(20);
-
-    // Strip HTML tags to get plain text
-    const text = html
-      .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
-      .replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '')
-      .replace(/<[^>]*>/g, ' ')
-      .replace(/&nbsp;/g, ' ')
-      .replace(/&quot;/g, '"')
-      .replace(/&apos;/g, "'")
-      .replace(/&lt;/g, '<')
-      .replace(/&gt;/g, '>')
-      .replace(/&amp;/g, '&')
-      .replace(/\s+/g, ' ')
-      .trim();
-
-    if (onProgress) onProgress(50);
-
-    const blob = await createDOCXFromText(text, (progress) => {
-      if (onProgress) onProgress(50 + progress * 0.5);
-    });
-
-    return {
-      success: true,
-      blob,
-      duration: Date.now() - startTime,
-    };
-  } catch (error) {
-    console.error('[DOCX Converter] HTML to DOCX error:', error);
-
-    return {
-      success: false,
-      error: error instanceof Error ? error.message : 'Failed to convert HTML to DOCX',
-      duration: Date.now() - startTime,
-    };
-  }
-}
--- a/lib/converters/pandocService.ts
+++ b/lib/converters/pandocService.ts
@@ -1,379 +0,0 @@
-import { marked } from 'marked';
-import TurndownService from 'turndown';
-import type { ConversionOptions, ProgressCallback, ConversionResult } from '@/types/conversion';
-import {
-  pdfToText,
-  pdfToMarkdown,
-  markdownToPDF,
-  htmlToPDF,
-  plainTextToPDF,
-} from './pdfService';
-import {
-  docxToText,
-  docxToHTML,
-  docxToMarkdown,
-  textToDOCX,
-  markdownToDOCX,
-  htmlToDOCX,
-} from './docxService';
-
-// Import DOMPurify only on client side
-let DOMPurify: any;
-if (typeof window !== 'undefined') {
-  DOMPurify = require('dompurify');
-}
-
-/**
- * Convert document using Markdown/HTML converters
- * Note: This uses lightweight JS libraries instead of Pandoc WASM (which isn't widely available)
- */
-export async function convertWithPandoc(
-  file: File,
-  outputFormat: string,
-  options: ConversionOptions = {},
-  onProgress?: ProgressCallback
-): Promise<ConversionResult> {
-  const startTime = Date.now();
-
-  try {
-    if (onProgress) onProgress(10);
-
-    // Read file content as text
-    const text = await file.text();
-
-    if (onProgress) onProgress(30);
-
-    // Detect input format from file extension or content
-    const inputExt = file.name.split('.').pop()?.toLowerCase();
-    let result: string;
-
-    if (onProgress) onProgress(50);
-
-    // Handle PDF conversions
-    if (inputExt === 'pdf') {
-      // PDF input
-      if (outputFormat === 'txt') {
-        return await pdfToText(file, onProgress);
-      } else if (outputFormat === 'md' || outputFormat === 'markdown') {
-        return await pdfToMarkdown(file, onProgress);
-      } else {
-        throw new Error(`Conversion from PDF to ${outputFormat} not supported`);
-      }
-    }
-
-    // Handle DOCX conversions
-    if (inputExt === 'docx') {
-      // DOCX input
-      if (outputFormat === 'txt') {
-        return await docxToText(file, onProgress);
-      } else if (outputFormat === 'html') {
-        return await docxToHTML(file, onProgress);
-      } else if (outputFormat === 'md' || outputFormat === 'markdown') {
-        return await docxToMarkdown(file, onProgress);
-      } else {
-        throw new Error(`Conversion from DOCX to ${outputFormat} not supported`);
-      }
-    }
-
-    // Handle conversions TO PDF
-    if (outputFormat === 'pdf') {
-      if (inputExt === 'md' || inputExt === 'markdown') {
-        return await markdownToPDF(file, onProgress);
-      } else if (inputExt === 'html' || inputExt === 'htm') {
-        return await htmlToPDF(file, onProgress);
-      } else if (inputExt === 'txt') {
-        return await plainTextToPDF(file, onProgress);
-      } else {
-        throw new Error(`Conversion from ${inputExt} to PDF not supported`);
-      }
-    }
-
-    // Handle conversions TO DOCX
-    if (outputFormat === 'docx') {
-      if (inputExt === 'md' || inputExt === 'markdown') {
-        return await markdownToDOCX(file, onProgress);
-      } else if (inputExt === 'html' || inputExt === 'htm') {
-        return await htmlToDOCX(file, onProgress);
-      } else if (inputExt === 'txt') {
-        return await textToDOCX(file, onProgress);
-      } else {
-        throw new Error(`Conversion from ${inputExt} to DOCX not supported`);
-      }
-    }
-
-    // Perform conversion based on input and output formats
-    if (inputExt === 'md' || inputExt === 'markdown') {
-      // Markdown input
-      if (outputFormat === 'html') {
-        result = await markdownToHtml(text);
-      } else if (outputFormat === 'txt') {
-        result = markdownToText(text);
-      } else {
-        throw new Error(`Conversion from Markdown to ${outputFormat} not supported`);
-      }
-    } else if (inputExt === 'html' || inputExt === 'htm') {
-      // HTML input
-      if (outputFormat === 'md' || outputFormat === 'markdown') {
-        result = await htmlToMarkdown(text);
-      } else if (outputFormat === 'txt') {
-        result = htmlToText(text);
-      } else {
-        throw new Error(`Conversion from HTML to ${outputFormat} not supported`);
-      }
-    } else if (inputExt === 'txt') {
-      // Plain text input
-      if (outputFormat === 'md' || outputFormat === 'markdown') {
-        result = textToMarkdown(text);
-      } else if (outputFormat === 'html') {
-        result = textToHtml(text);
-      } else {
-        throw new Error(`Conversion from TXT to ${outputFormat} not supported`);
-      }
-    } else {
-      throw new Error(`Input format ${inputExt} not supported`);
-    }
-
-    if (onProgress) onProgress(90);
-
-    // Create blob from result
-    const blob = new Blob([result], { type: getMimeType(outputFormat) });
-
-    if (onProgress) onProgress(100);
-
-    const duration = Date.now() - startTime;
-
-    return {
-      success: true,
-      blob,
-      duration,
-    };
-  } catch (error) {
-    console.error('[Document Converter] Conversion error:', error);
-
-    return {
-      success: false,
-      error: error instanceof Error ? error.message : 'Unknown conversion error',
-      duration: Date.now() - startTime,
-    };
-  }
-}
-
-/**
- * Convert Markdown to HTML
- */
-async function markdownToHtml(markdown: string): Promise<string> {
-  // Configure marked options
-  marked.setOptions({
-    gfm: true, // GitHub Flavored Markdown
-    breaks: true, // Convert \n to <br>
-  });
-
-  const html = await marked.parse(markdown);
-
-  // Sanitize HTML for security
-  const sanitized = DOMPurify.sanitize(html);
-
-  // Wrap in basic HTML document
-  return `<!DOCTYPE html>
-<html lang="en">
-<head>
-  <meta charset="UTF-8">
-  <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  <title>Converted Document</title>
-  <style>
-    body {
-      font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
-      line-height: 1.6;
-      max-width: 800px;
-      margin: 2rem auto;
-      padding: 0 1rem;
-      color: #333;
-    }
-    pre {
-      background: #f4f4f4;
-      border: 1px solid #ddd;
-      border-radius: 4px;
-      padding: 1rem;
-      overflow-x: auto;
-    }
-    code {
-      background: #f4f4f4;
-      padding: 0.2rem 0.4rem;
-      border-radius: 3px;
-      font-family: 'Courier New', monospace;
-    }
-    blockquote {
-      border-left: 4px solid #ddd;
-      margin: 1rem 0;
-      padding-left: 1rem;
-      color: #666;
-    }
-    table {
-      border-collapse: collapse;
-      width: 100%;
-      margin: 1rem 0;
-    }
-    th, td {
-      border: 1px solid #ddd;
-      padding: 0.5rem;
-      text-align: left;
-    }
-    th {
-      background: #f4f4f4;
-    }
-  </style>
-</head>
-<body>
-${sanitized}
-</body>
-</html>`;
-}
-
-/**
- * Convert HTML to Markdown
- */
-async function htmlToMarkdown(html: string): Promise<string> {
-  // Sanitize HTML first
-  const sanitized = DOMPurify.sanitize(html);
-
-  // Configure TurndownService
-  const turndownService = new TurndownService({
-    headingStyle: 'atx', // Use # for headings
-    codeBlockStyle: 'fenced', // Use ``` for code blocks
-    bulletListMarker: '-', // Use - for bullet lists
-  });
-
-  const markdown = turndownService.turndown(sanitized);
-  return markdown;
-}
-
-/**
- * Convert Markdown to plain text (strip formatting)
- */
-function markdownToText(markdown: string): string {
-  // Remove markdown syntax
-  let text = markdown
-    // Remove headers
-    .replace(/^#{1,6}\s+/gm, '')
-    // Remove bold/italic
-    .replace(/(\*\*|__)(.*?)\1/g, '$2')
-    .replace(/(\*|_)(.*?)\1/g, '$2')
-    // Remove links
-    .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
-    // Remove images
-    .replace(/!\[([^\]]*)\]\([^)]+\)/g, '$1')
-    // Remove code blocks
-    .replace(/```[\s\S]*?```/g, '')
-    // Remove inline code
-    .replace(/`([^`]+)`/g, '$1')
-    // Remove blockquotes
-    .replace(/^>\s+/gm, '')
-    // Remove horizontal rules
-    .replace(/^-{3,}$/gm, '')
-    // Clean up multiple newlines
-    .replace(/\n{3,}/g, '\n\n');
-
-  return text.trim();
-}
-
-/**
- * Convert HTML to plain text
- */
-function htmlToText(html: string): string {
-  // Sanitize HTML first
-  const sanitized = DOMPurify.sanitize(html, { ALLOWED_TAGS: [] });
-
-  // Clean up whitespace
-  return sanitized
-    .replace(/\s+/g, ' ')
-    .trim();
-}
-
-/**
- * Convert plain text to Markdown
- */
-function textToMarkdown(text: string): string {
-  // Add basic markdown formatting
-  // Treat lines as paragraphs
-  return text
-    .split('\n\n')
-    .filter(p => p.trim())
-    .join('\n\n');
-}
-
-/**
- * Convert plain text to HTML
- */
-function textToHtml(text: string): string {
-  // Escape HTML entities
-  const escaped = text
-    .replace(/&/g, '&amp;')
-    .replace(/</g, '&lt;')
-    .replace(/>/g, '&gt;')
-    .replace(/"/g, '&quot;')
-    .replace(/'/g, '&#039;');
-
-  // Convert newlines to paragraphs
-  const paragraphs = escaped
-    .split('\n\n')
-    .filter(p => p.trim())
-    .map(p => `  <p>${p.replace(/\n/g, '<br>')}</p>`)
-    .join('\n');
-
-  return `<!DOCTYPE html>
-<html lang="en">
-<head>
-  <meta charset="UTF-8">
-  <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  <title>Converted Document</title>
-  <style>
-    body {
-      font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
-      line-height: 1.6;
-      max-width: 800px;
-      margin: 2rem auto;
-      padding: 0 1rem;
-      color: #333;
-    }
-  </style>
-</head>
-<body>
-${paragraphs}
-</body>
-</html>`;
-}
-
-/**
- * Get MIME type for output format
- */
-function getMimeType(format: string): string {
-  const mimeTypes: Record<string, string> = {
-    html: 'text/html',
-    htm: 'text/html',
-    md: 'text/markdown',
-    markdown: 'text/markdown',
-    txt: 'text/plain',
-  };
-
-  return mimeTypes[format.toLowerCase()] || 'text/plain';
-}
-
-/**
- * Convert Markdown to HTML (convenience function)
- */
-export async function markdownToHtmlFile(
-  file: File,
-  onProgress?: ProgressCallback
-): Promise<ConversionResult> {
-  return convertWithPandoc(file, 'html', {}, onProgress);
-}
-
-/**
- * Convert HTML to Markdown (convenience function)
- */
-export async function htmlToMarkdownFile(
-  file: File,
-  onProgress?: ProgressCallback
-): Promise<ConversionResult> {
-  return convertWithPandoc(file, 'md', {}, onProgress);
-}
--- a/lib/converters/pdfService.ts
+++ b/lib/converters/pdfService.ts
@@ -1,334 +0,0 @@
-import { jsPDF } from 'jspdf';
-import type { ConversionOptions, ProgressCallback, ConversionResult } from '@/types/conversion';
-
-/**
- * Extract text from PDF file
- */
-export async function extractTextFromPDF(file: File, onProgress?: ProgressCallback): Promise<string> {
-  if (onProgress) onProgress(10);
-
-  // Dynamically import pdfjs-dist (client-side only)
-  const pdfjsLib = await import('pdfjs-dist');
-
-  // Set worker source
-  pdfjsLib.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjsLib.version}/pdf.worker.min.mjs`;
-
-  if (onProgress) onProgress(20);
-
-  // Read file as ArrayBuffer
-  const arrayBuffer = await file.arrayBuffer();
-
-  if (onProgress) onProgress(30);
-
-  // Load PDF document
-  const loadingTask = pdfjsLib.getDocument({ data: arrayBuffer });
-  const pdf = await loadingTask.promise;
-
-  if (onProgress) onProgress(50);
-
-  const numPages = pdf.numPages;
-  let fullText = '';
-
-  // Extract text from each page
-  for (let pageNum = 1; pageNum <= numPages; pageNum++) {
-    const page = await pdf.getPage(pageNum);
-    const textContent = await page.getTextContent();
-
-    // Combine text items
-    const pageText = textContent.items
-      .map((item: any) => item.str)
-      .join(' ');
-
-    fullText += pageText + '\n\n';
-
-    // Update progress
-    if (onProgress) {
-      const progress = 50 + (pageNum / numPages) * 40;
-      onProgress(Math.round(progress));
-    }
-  }
-
-  if (onProgress) onProgress(100);
-
-  return fullText.trim();
-}
-
-/**
- * Convert PDF to text
- */
-export async function pdfToText(
-  file: File,
-  onProgress?: ProgressCallback
-): Promise<ConversionResult> {
-  const startTime = Date.now();
-
-  try {
-    const text = await extractTextFromPDF(file, onProgress);
-    const blob = new Blob([text], { type: 'text/plain' });
-
-    return {
-      success: true,
-      blob,
-      duration: Date.now() - startTime,
-    };
-  } catch (error) {
-    console.error('[PDF Converter] PDF to text error:', error);
-
-    return {
-      success: false,
-      error: error instanceof Error ? error.message : 'Failed to extract text from PDF',
-      duration: Date.now() - startTime,
-    };
-  }
-}
-
-/**
- * Convert PDF to Markdown
- */
-export async function pdfToMarkdown(
-  file: File,
-  onProgress?: ProgressCallback
-): Promise<ConversionResult> {
-  const startTime = Date.now();
-
-  try {
-    const text = await extractTextFromPDF(file, (progress) => {
-      if (onProgress) onProgress(progress * 0.9); // Use 90% for extraction
-    });
-
-    // Basic text to markdown conversion (paragraphs)
-    const markdown = text
-      .split('\n\n')
-      .filter(p => p.trim())
-      .join('\n\n');
-
-    if (onProgress) onProgress(100);
-
-    const blob = new Blob([markdown], { type: 'text/markdown' });
-
-    return {
-      success: true,
-      blob,
-      duration: Date.now() - startTime,
-    };
-  } catch (error) {
-    console.error('[PDF Converter] PDF to markdown error:', error);
-
-    return {
-      success: false,
-      error: error instanceof Error ? error.message : 'Failed to convert PDF to Markdown',
-      duration: Date.now() - startTime,
-    };
-  }
-}
-
-/**
- * Convert text to PDF
- */
-export async function textToPDF(
-  text: string,
-  filename: string = 'document.pdf',
-  onProgress?: ProgressCallback
-): Promise<Blob> {
-  if (onProgress) onProgress(20);
-
-  const doc = new jsPDF({
-    orientation: 'portrait',
-    unit: 'mm',
-    format: 'a4',
-  });
-
-  if (onProgress) onProgress(40);
-
-  // Set font and size
-  doc.setFont('helvetica');
-  doc.setFontSize(12);
-
-  // Page dimensions
-  const pageWidth = doc.internal.pageSize.getWidth();
-  const pageHeight = doc.internal.pageSize.getHeight();
-  const margin = 20;
-  const maxWidth = pageWidth - 2 * margin;
-  const lineHeight = 7;
-  let y = margin;
-
-  if (onProgress) onProgress(60);
-
-  // Split text into lines
-  const lines = doc.splitTextToSize(text, maxWidth);
-
-  // Add lines to PDF
-  for (let i = 0; i < lines.length; i++) {
-    // Check if we need a new page
-    if (y + lineHeight > pageHeight - margin) {
-      doc.addPage();
-      y = margin;
-    }
-
-    doc.text(lines[i], margin, y);
-    y += lineHeight;
-
-    // Update progress
-    if (onProgress && i % 10 === 0) {
-      const progress = 60 + (i / lines.length) * 30;
-      onProgress(Math.round(progress));
-    }
-  }
-
-  if (onProgress) onProgress(90);
-
-  // Generate PDF blob
-  const pdfBlob = doc.output('blob');
-
-  if (onProgress) onProgress(100);
-
-  return pdfBlob;
-}
-
-/**
- * Convert Markdown to PDF
- */
-export async function markdownToPDF(
-  file: File,
-  onProgress?: ProgressCallback
-): Promise<ConversionResult> {
-  const startTime = Date.now();
-
-  try {
-    if (onProgress) onProgress(10);
-
-    // Read markdown content
-    const markdown = await file.text();
-
-    if (onProgress) onProgress(20);
-
-    // Import marked for markdown parsing
-    const { marked } = await import('marked');
-
-    // Parse markdown to HTML
-    const html = await marked.parse(markdown);
-
-    if (onProgress) onProgress(40);
-
-    // Strip HTML tags for plain text
-    const text = html
-      .replace(/<[^>]*>/g, '')
-      .replace(/&nbsp;/g, ' ')
-      .replace(/&quot;/g, '"')
-      .replace(/&apos;/g, "'")
-      .replace(/&lt;/g, '<')
-      .replace(/&gt;/g, '>')
-      .replace(/&amp;/g, '&');
-
-    if (onProgress) onProgress(60);
-
-    // Generate PDF
-    const pdfBlob = await textToPDF(text, file.name.replace(/\.md$/, '.pdf'), (progress) => {
-      if (onProgress) onProgress(60 + progress * 0.4);
-    });
-
-    return {
-      success: true,
-      blob: pdfBlob,
-      duration: Date.now() - startTime,
-    };
-  } catch (error) {
-    console.error('[PDF Converter] Markdown to PDF error:', error);
-
-    return {
-      success: false,
-      error: error instanceof Error ? error.message : 'Failed to convert Markdown to PDF',
-      duration: Date.now() - startTime,
-    };
-  }
-}
-
-/**
- * Convert HTML to PDF
- */
-export async function htmlToPDF(
-  file: File,
-  onProgress?: ProgressCallback
-): Promise<ConversionResult> {
-  const startTime = Date.now();
-
-  try {
-    if (onProgress) onProgress(10);
-
-    // Read HTML content
-    const html = await file.text();
-
-    if (onProgress) onProgress(30);
-
-    // Strip HTML tags for plain text
-    const text = html
-      .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
-      .replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '')
-      .replace(/<[^>]*>/g, ' ')
-      .replace(/&nbsp;/g, ' ')
-      .replace(/&quot;/g, '"')
-      .replace(/&apos;/g, "'")
-      .replace(/&lt;/g, '<')
-      .replace(/&gt;/g, '>')
-      .replace(/&amp;/g, '&')
-      .replace(/\s+/g, ' ')
-      .trim();
-
-    if (onProgress) onProgress(50);
-
-    // Generate PDF
-    const pdfBlob = await textToPDF(text, file.name.replace(/\.html?$/, '.pdf'), (progress) => {
-      if (onProgress) onProgress(50 + progress * 0.5);
-    });
-
-    return {
-      success: true,
-      blob: pdfBlob,
-      duration: Date.now() - startTime,
-    };
-  } catch (error) {
-    console.error('[PDF Converter] HTML to PDF error:', error);
-
-    return {
-      success: false,
-      error: error instanceof Error ? error.message : 'Failed to convert HTML to PDF',
-      duration: Date.now() - startTime,
-    };
-  }
-}
-
-/**
- * Convert plain text to PDF
- */
-export async function plainTextToPDF(
-  file: File,
-  onProgress?: ProgressCallback
-): Promise<ConversionResult> {
-  const startTime = Date.now();
-
-  try {
-    if (onProgress) onProgress(10);
-
-    const text = await file.text();
-
-    if (onProgress) onProgress(30);
-
-    const pdfBlob = await textToPDF(text, file.name.replace(/\.txt$/, '.pdf'), (progress) => {
-      if (onProgress) onProgress(30 + progress * 0.7);
-    });
-
-    return {
-      success: true,
-      blob: pdfBlob,
-      duration: Date.now() - startTime,
-    };
-  } catch (error) {
-    console.error('[PDF Converter] Text to PDF error:', error);
-
-    return {
-      success: false,
-      error: error instanceof Error ? error.message : 'Failed to convert text to PDF',
-      duration: Date.now() - startTime,
-    };
-  }
-}
--- a/lib/utils/formatMappings.ts
+++ b/lib/utils/formatMappings.ts
@@ -162,53 +162,6 @@ export const SUPPORTED_FORMATS: ConversionFormat[] = [
    converter: 'imagemagick',
    description: 'Scalable Vector Graphics',
  },
-
-  // Document formats (Pandoc - future implementation)
-  {
-    id: 'pdf',
-    name: 'PDF',
-    extension: 'pdf',
-    mimeType: 'application/pdf',
-    category: 'document',
-    converter: 'pandoc',
-    description: 'Portable Document Format',
-  },
-  {
-    id: 'docx',
-    name: 'DOCX',
-    extension: 'docx',
-    mimeType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
-    category: 'document',
-    converter: 'pandoc',
-    description: 'Microsoft Word document',
-  },
-  {
-    id: 'markdown',
-    name: 'Markdown',
-    extension: 'md',
-    mimeType: 'text/markdown',
-    category: 'document',
-    converter: 'pandoc',
-    description: 'Markdown text',
-  },
-  {
-    id: 'html',
-    name: 'HTML',
-    extension: 'html',
-    mimeType: 'text/html',
-    category: 'document',
-    converter: 'pandoc',
-    description: 'HyperText Markup Language',
-  },
-  {
-    id: 'txt',
-    name: 'Plain Text',
-    extension: 'txt',
-    mimeType: 'text/plain',
-    category: 'document',
-    converter: 'pandoc',
-    description: 'Plain text file',
-  },
 ];

 /**
--- a/lib/wasm/wasmLoader.ts
+++ b/lib/wasm/wasmLoader.ts
@@ -7,7 +7,6 @@ import type { ConverterEngine, WASMModuleState } from '@/types/conversion';
 const moduleState: WASMModuleState = {
  ffmpeg: false,
  imagemagick: false,
-  pandoc: false,
 };

 /**
@@ -15,7 +14,6 @@ const moduleState: WASMModuleState = {
 */
 let ffmpegInstance: FFmpeg | null = null;
 let imagemagickInstance: any = null;
-let pandocInstance: any = null;

 /**
 * Load FFmpeg WASM module
@@ -75,33 +73,6 @@ export async function loadImageMagick(): Promise<any> {
  }
 }

-/**
- * Load Pandoc converter (uses pure JavaScript libraries, not WASM)
- * Note: We use marked + turndown instead of actual Pandoc WASM
- */
-export async function loadPandoc(): Promise<any> {
-  if (pandocInstance && moduleState.pandoc) {
-    return pandocInstance;
-  }
-
-  try {
-    // Import the converter libraries
-    const [marked, turndown] = await Promise.all([
-      import('marked'),
-      import('turndown'),
-    ]);
-
-    pandocInstance = { marked, turndown };
-    moduleState.pandoc = true;
-    console.log('Document converter loaded successfully');
-
-    return pandocInstance;
-  } catch (error) {
-    console.error('Failed to load document converter:', error);
-    throw new Error('Failed to load document converter');
-  }
-}
-
 /**
 * Get loaded module state
 */
@@ -125,8 +96,6 @@ export async function loadModule(engine: ConverterEngine): Promise<any> {
      return loadFFmpeg();
    case 'imagemagick':
      return loadImageMagick();
-    case 'pandoc':
-      return loadPandoc();
    default:
      throw new Error(`Unknown converter engine: ${engine}`);
  }
@@ -148,10 +117,5 @@ export function unloadAll(): void {
    moduleState.imagemagick = false;
  }

-  if (pandocInstance) {
-    pandocInstance = null;
-    moduleState.pandoc = false;
-  }
-
  console.log('All WASM modules unloaded');
 }