import { jsPDF } from 'jspdf'; import type { ConversionOptions, ProgressCallback, ConversionResult } from '@/types/conversion'; /** * Extract text from PDF file */ export async function extractTextFromPDF(file: File, onProgress?: ProgressCallback): Promise { if (onProgress) onProgress(10); // Dynamically import pdfjs-dist (client-side only) const pdfjsLib = await import('pdfjs-dist'); // Set worker source pdfjsLib.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjsLib.version}/pdf.worker.min.mjs`; if (onProgress) onProgress(20); // Read file as ArrayBuffer const arrayBuffer = await file.arrayBuffer(); if (onProgress) onProgress(30); // Load PDF document const loadingTask = pdfjsLib.getDocument({ data: arrayBuffer }); const pdf = await loadingTask.promise; if (onProgress) onProgress(50); const numPages = pdf.numPages; let fullText = ''; // Extract text from each page for (let pageNum = 1; pageNum <= numPages; pageNum++) { const page = await pdf.getPage(pageNum); const textContent = await page.getTextContent(); // Combine text items const pageText = textContent.items .map((item: any) => item.str) .join(' '); fullText += pageText + '\n\n'; // Update progress if (onProgress) { const progress = 50 + (pageNum / numPages) * 40; onProgress(Math.round(progress)); } } if (onProgress) onProgress(100); return fullText.trim(); } /** * Convert PDF to text */ export async function pdfToText( file: File, onProgress?: ProgressCallback ): Promise { const startTime = Date.now(); try { const text = await extractTextFromPDF(file, onProgress); const blob = new Blob([text], { type: 'text/plain' }); return { success: true, blob, duration: Date.now() - startTime, }; } catch (error) { console.error('[PDF Converter] PDF to text error:', error); return { success: false, error: error instanceof Error ? error.message : 'Failed to extract text from PDF', duration: Date.now() - startTime, }; } } /** * Convert PDF to Markdown */ export async function pdfToMarkdown( file: File, onProgress?: ProgressCallback ): Promise { const startTime = Date.now(); try { const text = await extractTextFromPDF(file, (progress) => { if (onProgress) onProgress(progress * 0.9); // Use 90% for extraction }); // Basic text to markdown conversion (paragraphs) const markdown = text .split('\n\n') .filter(p => p.trim()) .join('\n\n'); if (onProgress) onProgress(100); const blob = new Blob([markdown], { type: 'text/markdown' }); return { success: true, blob, duration: Date.now() - startTime, }; } catch (error) { console.error('[PDF Converter] PDF to markdown error:', error); return { success: false, error: error instanceof Error ? error.message : 'Failed to convert PDF to Markdown', duration: Date.now() - startTime, }; } } /** * Convert text to PDF */ export async function textToPDF( text: string, filename: string = 'document.pdf', onProgress?: ProgressCallback ): Promise { if (onProgress) onProgress(20); const doc = new jsPDF({ orientation: 'portrait', unit: 'mm', format: 'a4', }); if (onProgress) onProgress(40); // Set font and size doc.setFont('helvetica'); doc.setFontSize(12); // Page dimensions const pageWidth = doc.internal.pageSize.getWidth(); const pageHeight = doc.internal.pageSize.getHeight(); const margin = 20; const maxWidth = pageWidth - 2 * margin; const lineHeight = 7; let y = margin; if (onProgress) onProgress(60); // Split text into lines const lines = doc.splitTextToSize(text, maxWidth); // Add lines to PDF for (let i = 0; i < lines.length; i++) { // Check if we need a new page if (y + lineHeight > pageHeight - margin) { doc.addPage(); y = margin; } doc.text(lines[i], margin, y); y += lineHeight; // Update progress if (onProgress && i % 10 === 0) { const progress = 60 + (i / lines.length) * 30; onProgress(Math.round(progress)); } } if (onProgress) onProgress(90); // Generate PDF blob const pdfBlob = doc.output('blob'); if (onProgress) onProgress(100); return pdfBlob; } /** * Convert Markdown to PDF */ export async function markdownToPDF( file: File, onProgress?: ProgressCallback ): Promise { const startTime = Date.now(); try { if (onProgress) onProgress(10); // Read markdown content const markdown = await file.text(); if (onProgress) onProgress(20); // Import marked for markdown parsing const { marked } = await import('marked'); // Parse markdown to HTML const html = await marked.parse(markdown); if (onProgress) onProgress(40); // Strip HTML tags for plain text const text = html .replace(/<[^>]*>/g, '') .replace(/ /g, ' ') .replace(/"/g, '"') .replace(/'/g, "'") .replace(/</g, '<') .replace(/>/g, '>') .replace(/&/g, '&'); if (onProgress) onProgress(60); // Generate PDF const pdfBlob = await textToPDF(text, file.name.replace(/\.md$/, '.pdf'), (progress) => { if (onProgress) onProgress(60 + progress * 0.4); }); return { success: true, blob: pdfBlob, duration: Date.now() - startTime, }; } catch (error) { console.error('[PDF Converter] Markdown to PDF error:', error); return { success: false, error: error instanceof Error ? error.message : 'Failed to convert Markdown to PDF', duration: Date.now() - startTime, }; } } /** * Convert HTML to PDF */ export async function htmlToPDF( file: File, onProgress?: ProgressCallback ): Promise { const startTime = Date.now(); try { if (onProgress) onProgress(10); // Read HTML content const html = await file.text(); if (onProgress) onProgress(30); // Strip HTML tags for plain text const text = html .replace(/)<[^<]*)*<\/script>/gi, '') .replace(/)<[^<]*)*<\/style>/gi, '') .replace(/<[^>]*>/g, ' ') .replace(/ /g, ' ') .replace(/"/g, '"') .replace(/'/g, "'") .replace(/</g, '<') .replace(/>/g, '>') .replace(/&/g, '&') .replace(/\s+/g, ' ') .trim(); if (onProgress) onProgress(50); // Generate PDF const pdfBlob = await textToPDF(text, file.name.replace(/\.html?$/, '.pdf'), (progress) => { if (onProgress) onProgress(50 + progress * 0.5); }); return { success: true, blob: pdfBlob, duration: Date.now() - startTime, }; } catch (error) { console.error('[PDF Converter] HTML to PDF error:', error); return { success: false, error: error instanceof Error ? error.message : 'Failed to convert HTML to PDF', duration: Date.now() - startTime, }; } } /** * Convert plain text to PDF */ export async function plainTextToPDF( file: File, onProgress?: ProgressCallback ): Promise { const startTime = Date.now(); try { if (onProgress) onProgress(10); const text = await file.text(); if (onProgress) onProgress(30); const pdfBlob = await textToPDF(text, file.name.replace(/\.txt$/, '.pdf'), (progress) => { if (onProgress) onProgress(30 + progress * 0.7); }); return { success: true, blob: pdfBlob, duration: Date.now() - startTime, }; } catch (error) { console.error('[PDF Converter] Text to PDF error:', error); return { success: false, error: error instanceof Error ? error.message : 'Failed to convert text to PDF', duration: Date.now() - startTime, }; } }