import { Document, Packer, Paragraph, TextRun, HeadingLevel } from 'docx'; import type { ConversionOptions, ProgressCallback, ConversionResult } from '@/types/conversion'; /** * Extract text from DOCX file using mammoth */ export async function extractTextFromDOCX(file: File, onProgress?: ProgressCallback): Promise { if (onProgress) onProgress(10); // Dynamically import mammoth (client-side only) const mammoth = await import('mammoth'); if (onProgress) onProgress(30); // Read file as ArrayBuffer const arrayBuffer = await file.arrayBuffer(); if (onProgress) onProgress(50); // Extract text from DOCX const result = await mammoth.extractRawText({ arrayBuffer }); if (onProgress) onProgress(100); return result.value; } /** * Extract HTML from DOCX file using mammoth */ export async function extractHTMLFromDOCX(file: File, onProgress?: ProgressCallback): Promise { if (onProgress) onProgress(10); // Dynamically import mammoth (client-side only) const mammoth = await import('mammoth'); if (onProgress) onProgress(30); // Read file as ArrayBuffer const arrayBuffer = await file.arrayBuffer(); if (onProgress) onProgress(50); // Convert DOCX to HTML const result = await mammoth.convertToHtml({ arrayBuffer }); if (onProgress) onProgress(100); return result.value; } /** * Convert DOCX to plain text */ export async function docxToText( file: File, onProgress?: ProgressCallback ): Promise { const startTime = Date.now(); try { const text = await extractTextFromDOCX(file, onProgress); const blob = new Blob([text], { type: 'text/plain' }); return { success: true, blob, duration: Date.now() - startTime, }; } catch (error) { console.error('[DOCX Converter] DOCX to text error:', error); return { success: false, error: error instanceof Error ? error.message : 'Failed to extract text from DOCX', duration: Date.now() - startTime, }; } } /** * Convert DOCX to HTML */ export async function docxToHTML( file: File, onProgress?: ProgressCallback ): Promise { const startTime = Date.now(); try { const html = await extractHTMLFromDOCX(file, (progress) => { if (onProgress) onProgress(progress * 0.9); }); // Wrap in full HTML document const fullHTML = ` Converted Document ${html} `; if (onProgress) onProgress(100); const blob = new Blob([fullHTML], { type: 'text/html' }); return { success: true, blob, duration: Date.now() - startTime, }; } catch (error) { console.error('[DOCX Converter] DOCX to HTML error:', error); return { success: false, error: error instanceof Error ? error.message : 'Failed to convert DOCX to HTML', duration: Date.now() - startTime, }; } } /** * Convert DOCX to Markdown */ export async function docxToMarkdown( file: File, onProgress?: ProgressCallback ): Promise { const startTime = Date.now(); try { // First convert to HTML const html = await extractHTMLFromDOCX(file, (progress) => { if (onProgress) onProgress(progress * 0.7); }); if (onProgress) onProgress(80); // Import turndown for HTML to Markdown const TurndownService = (await import('turndown')).default; const turndownService = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced', bulletListMarker: '-', }); const markdown = turndownService.turndown(html); if (onProgress) onProgress(100); const blob = new Blob([markdown], { type: 'text/markdown' }); return { success: true, blob, duration: Date.now() - startTime, }; } catch (error) { console.error('[DOCX Converter] DOCX to Markdown error:', error); return { success: false, error: error instanceof Error ? error.message : 'Failed to convert DOCX to Markdown', duration: Date.now() - startTime, }; } } /** * Create DOCX from text content */ async function createDOCXFromText(text: string, onProgress?: ProgressCallback): Promise { if (onProgress) onProgress(20); // Split text into paragraphs const paragraphs = text.split('\n\n').filter(p => p.trim()); if (onProgress) onProgress(40); // Create document with paragraphs const doc = new Document({ sections: [ { properties: {}, children: paragraphs.map((para) => { return new Paragraph({ children: [new TextRun(para.trim())], spacing: { after: 200, }, }); }), }, ], }); if (onProgress) onProgress(70); // Generate DOCX blob const blob = await Packer.toBlob(doc); if (onProgress) onProgress(100); return blob; } /** * Create DOCX from Markdown */ async function createDOCXFromMarkdown(markdown: string, onProgress?: ProgressCallback): Promise { if (onProgress) onProgress(10); // Parse markdown and create structured document const lines = markdown.split('\n'); const children: Paragraph[] = []; let currentParagraph: string[] = []; for (const line of lines) { if (line.startsWith('# ')) { // Heading 1 if (currentParagraph.length > 0) { children.push(new Paragraph({ children: [new TextRun(currentParagraph.join(' '))], spacing: { after: 200 }, })); currentParagraph = []; } children.push(new Paragraph({ text: line.substring(2), heading: HeadingLevel.HEADING_1, spacing: { before: 240, after: 120 }, })); } else if (line.startsWith('## ')) { // Heading 2 if (currentParagraph.length > 0) { children.push(new Paragraph({ children: [new TextRun(currentParagraph.join(' '))], spacing: { after: 200 }, })); currentParagraph = []; } children.push(new Paragraph({ text: line.substring(3), heading: HeadingLevel.HEADING_2, spacing: { before: 200, after: 100 }, })); } else if (line.startsWith('### ')) { // Heading 3 if (currentParagraph.length > 0) { children.push(new Paragraph({ children: [new TextRun(currentParagraph.join(' '))], spacing: { after: 200 }, })); currentParagraph = []; } children.push(new Paragraph({ text: line.substring(4), heading: HeadingLevel.HEADING_3, spacing: { before: 160, after: 80 }, })); } else if (line.trim() === '') { // Empty line - paragraph break if (currentParagraph.length > 0) { children.push(new Paragraph({ children: [new TextRun(currentParagraph.join(' '))], spacing: { after: 200 }, })); currentParagraph = []; } } else { // Regular text currentParagraph.push(line); } } // Add remaining paragraph if (currentParagraph.length > 0) { children.push(new Paragraph({ children: [new TextRun(currentParagraph.join(' '))], spacing: { after: 200 }, })); } if (onProgress) onProgress(60); const doc = new Document({ sections: [ { properties: {}, children, }, ], }); if (onProgress) onProgress(80); const blob = await Packer.toBlob(doc); if (onProgress) onProgress(100); return blob; } /** * Convert plain text to DOCX */ export async function textToDOCX( file: File, onProgress?: ProgressCallback ): Promise { const startTime = Date.now(); try { if (onProgress) onProgress(10); const text = await file.text(); if (onProgress) onProgress(20); const blob = await createDOCXFromText(text, (progress) => { if (onProgress) onProgress(20 + progress * 0.8); }); return { success: true, blob, duration: Date.now() - startTime, }; } catch (error) { console.error('[DOCX Converter] Text to DOCX error:', error); return { success: false, error: error instanceof Error ? error.message : 'Failed to convert text to DOCX', duration: Date.now() - startTime, }; } } /** * Convert Markdown to DOCX */ export async function markdownToDOCX( file: File, onProgress?: ProgressCallback ): Promise { const startTime = Date.now(); try { if (onProgress) onProgress(10); const markdown = await file.text(); if (onProgress) onProgress(20); const blob = await createDOCXFromMarkdown(markdown, (progress) => { if (onProgress) onProgress(20 + progress * 0.8); }); return { success: true, blob, duration: Date.now() - startTime, }; } catch (error) { console.error('[DOCX Converter] Markdown to DOCX error:', error); return { success: false, error: error instanceof Error ? error.message : 'Failed to convert Markdown to DOCX', duration: Date.now() - startTime, }; } } /** * Convert HTML to DOCX */ export async function htmlToDOCX( file: File, onProgress?: ProgressCallback ): Promise { const startTime = Date.now(); try { if (onProgress) onProgress(10); const html = await file.text(); if (onProgress) onProgress(20); // Strip HTML tags to get plain text const text = html .replace(/)<[^<]*)*<\/script>/gi, '') .replace(/)<[^<]*)*<\/style>/gi, '') .replace(/<[^>]*>/g, ' ') .replace(/ /g, ' ') .replace(/"/g, '"') .replace(/'/g, "'") .replace(/</g, '<') .replace(/>/g, '>') .replace(/&/g, '&') .replace(/\s+/g, ' ') .trim(); if (onProgress) onProgress(50); const blob = await createDOCXFromText(text, (progress) => { if (onProgress) onProgress(50 + progress * 0.5); }); return { success: true, blob, duration: Date.now() - startTime, }; } catch (error) { console.error('[DOCX Converter] HTML to DOCX error:', error); return { success: false, error: error instanceof Error ? error.message : 'Failed to convert HTML to DOCX', duration: Date.now() - startTime, }; } }