convert-ui/lib/converters/pdfService.ts

import { jsPDF } from 'jspdf';
import type { ConversionOptions, ProgressCallback, ConversionResult } from '@/types/conversion';

/**
 * Extract text from PDF file
 */
export async function extractTextFromPDF(file: File, onProgress?: ProgressCallback): Promise<string> {
  if (onProgress) onProgress(10);

  // Dynamically import pdfjs-dist (client-side only)
  const pdfjsLib = await import('pdfjs-dist');

  // Set worker source
  pdfjsLib.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjsLib.version}/pdf.worker.min.mjs`;

  if (onProgress) onProgress(20);

  // Read file as ArrayBuffer
  const arrayBuffer = await file.arrayBuffer();

  if (onProgress) onProgress(30);

  // Load PDF document
  const loadingTask = pdfjsLib.getDocument({ data: arrayBuffer });
  const pdf = await loadingTask.promise;

  if (onProgress) onProgress(50);

  const numPages = pdf.numPages;
  let fullText = '';

  // Extract text from each page
  for (let pageNum = 1; pageNum <= numPages; pageNum++) {
    const page = await pdf.getPage(pageNum);
    const textContent = await page.getTextContent();

    // Combine text items
    const pageText = textContent.items
      .map((item: any) => item.str)
      .join(' ');

    fullText += pageText + '\n\n';

    // Update progress
    if (onProgress) {
      const progress = 50 + (pageNum / numPages) * 40;
      onProgress(Math.round(progress));
    }
  }

  if (onProgress) onProgress(100);

  return fullText.trim();
}

/**
 * Convert PDF to text
 */
export async function pdfToText(
  file: File,
  onProgress?: ProgressCallback
): Promise<ConversionResult> {
  const startTime = Date.now();

  try {
    const text = await extractTextFromPDF(file, onProgress);
    const blob = new Blob([text], { type: 'text/plain' });

    return {
      success: true,
      blob,
      duration: Date.now() - startTime,
    };
  } catch (error) {
    console.error('[PDF Converter] PDF to text error:', error);

    return {
      success: false,
      error: error instanceof Error ? error.message : 'Failed to extract text from PDF',
      duration: Date.now() - startTime,
    };
  }
}

/**
 * Convert PDF to Markdown
 */
export async function pdfToMarkdown(
  file: File,
  onProgress?: ProgressCallback
): Promise<ConversionResult> {
  const startTime = Date.now();

  try {
    const text = await extractTextFromPDF(file, (progress) => {
      if (onProgress) onProgress(progress * 0.9); // Use 90% for extraction
    });

    // Basic text to markdown conversion (paragraphs)
    const markdown = text
      .split('\n\n')
      .filter(p => p.trim())
      .join('\n\n');

    if (onProgress) onProgress(100);

    const blob = new Blob([markdown], { type: 'text/markdown' });

    return {
      success: true,
      blob,
      duration: Date.now() - startTime,
    };
  } catch (error) {
    console.error('[PDF Converter] PDF to markdown error:', error);

    return {
      success: false,
      error: error instanceof Error ? error.message : 'Failed to convert PDF to Markdown',
      duration: Date.now() - startTime,
    };
  }
}

/**
 * Convert text to PDF
 */
export async function textToPDF(
  text: string,
  filename: string = 'document.pdf',
  onProgress?: ProgressCallback
): Promise<Blob> {
  if (onProgress) onProgress(20);

  const doc = new jsPDF({
    orientation: 'portrait',
    unit: 'mm',
    format: 'a4',
  });

  if (onProgress) onProgress(40);

  // Set font and size
  doc.setFont('helvetica');
  doc.setFontSize(12);

  // Page dimensions
  const pageWidth = doc.internal.pageSize.getWidth();
  const pageHeight = doc.internal.pageSize.getHeight();
  const margin = 20;
  const maxWidth = pageWidth - 2 * margin;
  const lineHeight = 7;
  let y = margin;

  if (onProgress) onProgress(60);

  // Split text into lines
  const lines = doc.splitTextToSize(text, maxWidth);

  // Add lines to PDF
  for (let i = 0; i < lines.length; i++) {
    // Check if we need a new page
    if (y + lineHeight > pageHeight - margin) {
      doc.addPage();
      y = margin;
    }

    doc.text(lines[i], margin, y);
    y += lineHeight;

    // Update progress
    if (onProgress && i % 10 === 0) {
      const progress = 60 + (i / lines.length) * 30;
      onProgress(Math.round(progress));
    }
  }

  if (onProgress) onProgress(90);

  // Generate PDF blob
  const pdfBlob = doc.output('blob');

  if (onProgress) onProgress(100);

  return pdfBlob;
}

/**
 * Convert Markdown to PDF
 */
export async function markdownToPDF(
  file: File,
  onProgress?: ProgressCallback
): Promise<ConversionResult> {
  const startTime = Date.now();

  try {
    if (onProgress) onProgress(10);

    // Read markdown content
    const markdown = await file.text();

    if (onProgress) onProgress(20);

    // Import marked for markdown parsing
    const { marked } = await import('marked');

    // Parse markdown to HTML
    const html = await marked.parse(markdown);

    if (onProgress) onProgress(40);

    // Strip HTML tags for plain text
    const text = html
      .replace(/<[^>]*>/g, '')
      .replace(/&nbsp;/g, ' ')
      .replace(/&quot;/g, '"')
      .replace(/&apos;/g, "'")
      .replace(/&lt;/g, '<')
      .replace(/&gt;/g, '>')
      .replace(/&amp;/g, '&');

    if (onProgress) onProgress(60);

    // Generate PDF
    const pdfBlob = await textToPDF(text, file.name.replace(/\.md$/, '.pdf'), (progress) => {
      if (onProgress) onProgress(60 + progress * 0.4);
    });

    return {
      success: true,
      blob: pdfBlob,
      duration: Date.now() - startTime,
    };
  } catch (error) {
    console.error('[PDF Converter] Markdown to PDF error:', error);

    return {
      success: false,
      error: error instanceof Error ? error.message : 'Failed to convert Markdown to PDF',
      duration: Date.now() - startTime,
    };
  }
}

/**
 * Convert HTML to PDF
 */
export async function htmlToPDF(
  file: File,
  onProgress?: ProgressCallback
): Promise<ConversionResult> {
  const startTime = Date.now();

  try {
    if (onProgress) onProgress(10);

    // Read HTML content
    const html = await file.text();

    if (onProgress) onProgress(30);

    // Strip HTML tags for plain text
    const text = html
      .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
      .replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '')
      .replace(/<[^>]*>/g, ' ')
      .replace(/&nbsp;/g, ' ')
      .replace(/&quot;/g, '"')
      .replace(/&apos;/g, "'")
      .replace(/&lt;/g, '<')
      .replace(/&gt;/g, '>')
      .replace(/&amp;/g, '&')
      .replace(/\s+/g, ' ')
      .trim();

    if (onProgress) onProgress(50);

    // Generate PDF
    const pdfBlob = await textToPDF(text, file.name.replace(/\.html?$/, '.pdf'), (progress) => {
      if (onProgress) onProgress(50 + progress * 0.5);
    });

    return {
      success: true,
      blob: pdfBlob,
      duration: Date.now() - startTime,
    };
  } catch (error) {
    console.error('[PDF Converter] HTML to PDF error:', error);

    return {
      success: false,
      error: error instanceof Error ? error.message : 'Failed to convert HTML to PDF',
      duration: Date.now() - startTime,
    };
  }
}

/**
 * Convert plain text to PDF
 */
export async function plainTextToPDF(
  file: File,
  onProgress?: ProgressCallback
): Promise<ConversionResult> {
  const startTime = Date.now();

  try {
    if (onProgress) onProgress(10);

    const text = await file.text();

    if (onProgress) onProgress(30);

    const pdfBlob = await textToPDF(text, file.name.replace(/\.txt$/, '.pdf'), (progress) => {
      if (onProgress) onProgress(30 + progress * 0.7);
    });

    return {
      success: true,
      blob: pdfBlob,
      duration: Date.now() - startTime,
    };
  } catch (error) {
    console.error('[PDF Converter] Text to PDF error:', error);

    return {
      success: false,
      error: error instanceof Error ? error.message : 'Failed to convert text to PDF',
      duration: Date.now() - startTime,
    };
  }
}