diff --git a/README.md b/README.md index 4f775dc..c0f72ff 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ A modern, browser-based file conversion application built with Next.js 16, Tailw - **🎬 Video Conversion** - Convert between MP4, WebM, AVI, MOV, MKV, and GIF - **🎵 Audio Conversion** - Convert between MP3, WAV, OGG, AAC, and FLAC - **🖼️ Image Conversion** - Convert between PNG, JPG, WebP, GIF, BMP, TIFF, and SVG -- **📄 Document Conversion** - (Coming soon) Convert between PDF, DOCX, Markdown, HTML, and TXT +- **📄 Document Conversion** - Convert between Markdown, HTML, and Plain Text - **🔒 Privacy First** - All conversions happen locally in your browser, no server uploads - **⚡ Fast & Efficient** - Powered by WebAssembly for near-native performance - **🎨 Beautiful UI** - Modern, responsive design with dark/light theme support @@ -23,6 +23,9 @@ A modern, browser-based file conversion application built with Next.js 16, Tailw - **Tailwind CSS 4** - Utility-first CSS with OKLCH color system - **FFmpeg.wasm** - Video and audio conversion - **ImageMagick WASM** - Image processing and conversion +- **Marked** - Markdown to HTML conversion +- **Turndown** - HTML to Markdown conversion +- **DOMPurify** - HTML sanitization - **Fuse.js** - Fuzzy search for format selection - **Lucide React** - Beautiful icon library @@ -111,8 +114,14 @@ convert-ui/ ### Images (ImageMagick) - **Input/Output:** PNG, JPG, WebP, GIF, BMP, TIFF, SVG -### Documents (Coming Soon) -- **Planned:** PDF, DOCX, Markdown, HTML, Plain Text +### Documents +- **Markdown → HTML** - Full GitHub Flavored Markdown support with styling +- **HTML → Markdown** - Clean conversion with formatting preservation +- **Markdown ↔ Plain Text** - Strip or add basic formatting +- **HTML → Plain Text** - Extract text content +- **Plain Text → HTML** - Convert to formatted HTML document + +**Note:** Uses lightweight JavaScript libraries (marked, turndown) instead of Pandoc WASM for fast, reliable conversions. ## How It Works diff --git a/lib/converters/pandocService.ts b/lib/converters/pandocService.ts index d2f7fff..db13f7a 100644 --- a/lib/converters/pandocService.ts +++ b/lib/converters/pandocService.ts @@ -1,7 +1,16 @@ +import { marked } from 'marked'; +import TurndownService from 'turndown'; import type { ConversionOptions, ProgressCallback, ConversionResult } from '@/types/conversion'; +// Import DOMPurify only on client side +let DOMPurify: any; +if (typeof window !== 'undefined') { + DOMPurify = require('dompurify'); +} + /** - * Convert document using Pandoc (placeholder - not yet implemented) + * Convert document using Markdown/HTML converters + * Note: This uses lightweight JS libraries instead of Pandoc WASM (which isn't widely available) */ export async function convertWithPandoc( file: File, @@ -9,21 +18,283 @@ export async function convertWithPandoc( options: ConversionOptions = {}, onProgress?: ProgressCallback ): Promise { - // TODO: Implement Pandoc WASM conversion when available - // For now, return an error + const startTime = Date.now(); - if (onProgress) onProgress(0); + try { + if (onProgress) onProgress(10); - return { - success: false, - error: 'Pandoc WASM converter is not yet implemented. Document conversion coming soon!', - }; + // Read file content as text + const text = await file.text(); + + if (onProgress) onProgress(30); + + // Detect input format from file extension or content + const inputExt = file.name.split('.').pop()?.toLowerCase(); + let result: string; + + if (onProgress) onProgress(50); + + // Perform conversion based on input and output formats + if (inputExt === 'md' || inputExt === 'markdown') { + // Markdown input + if (outputFormat === 'html') { + result = await markdownToHtml(text); + } else if (outputFormat === 'txt') { + result = markdownToText(text); + } else { + throw new Error(`Conversion from Markdown to ${outputFormat} not supported`); + } + } else if (inputExt === 'html' || inputExt === 'htm') { + // HTML input + if (outputFormat === 'md' || outputFormat === 'markdown') { + result = await htmlToMarkdown(text); + } else if (outputFormat === 'txt') { + result = htmlToText(text); + } else { + throw new Error(`Conversion from HTML to ${outputFormat} not supported`); + } + } else if (inputExt === 'txt') { + // Plain text input + if (outputFormat === 'md' || outputFormat === 'markdown') { + result = textToMarkdown(text); + } else if (outputFormat === 'html') { + result = textToHtml(text); + } else { + throw new Error(`Conversion from TXT to ${outputFormat} not supported`); + } + } else { + throw new Error(`Input format ${inputExt} not supported`); + } + + if (onProgress) onProgress(90); + + // Create blob from result + const blob = new Blob([result], { type: getMimeType(outputFormat) }); + + if (onProgress) onProgress(100); + + const duration = Date.now() - startTime; + + return { + success: true, + blob, + duration, + }; + } catch (error) { + console.error('[Document Converter] Conversion error:', error); + + return { + success: false, + error: error instanceof Error ? error.message : 'Unknown conversion error', + duration: Date.now() - startTime, + }; + } } /** - * Convert Markdown to HTML (placeholder) + * Convert Markdown to HTML */ -export async function markdownToHtml( +async function markdownToHtml(markdown: string): Promise { + // Configure marked options + marked.setOptions({ + gfm: true, // GitHub Flavored Markdown + breaks: true, // Convert \n to
+ }); + + const html = await marked.parse(markdown); + + // Sanitize HTML for security + const sanitized = DOMPurify.sanitize(html); + + // Wrap in basic HTML document + return ` + + + + + Converted Document + + + +${sanitized} + +`; +} + +/** + * Convert HTML to Markdown + */ +async function htmlToMarkdown(html: string): Promise { + // Sanitize HTML first + const sanitized = DOMPurify.sanitize(html); + + // Configure TurndownService + const turndownService = new TurndownService({ + headingStyle: 'atx', // Use # for headings + codeBlockStyle: 'fenced', // Use ``` for code blocks + bulletListMarker: '-', // Use - for bullet lists + }); + + const markdown = turndownService.turndown(sanitized); + return markdown; +} + +/** + * Convert Markdown to plain text (strip formatting) + */ +function markdownToText(markdown: string): string { + // Remove markdown syntax + let text = markdown + // Remove headers + .replace(/^#{1,6}\s+/gm, '') + // Remove bold/italic + .replace(/(\*\*|__)(.*?)\1/g, '$2') + .replace(/(\*|_)(.*?)\1/g, '$2') + // Remove links + .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') + // Remove images + .replace(/!\[([^\]]*)\]\([^)]+\)/g, '$1') + // Remove code blocks + .replace(/```[\s\S]*?```/g, '') + // Remove inline code + .replace(/`([^`]+)`/g, '$1') + // Remove blockquotes + .replace(/^>\s+/gm, '') + // Remove horizontal rules + .replace(/^-{3,}$/gm, '') + // Clean up multiple newlines + .replace(/\n{3,}/g, '\n\n'); + + return text.trim(); +} + +/** + * Convert HTML to plain text + */ +function htmlToText(html: string): string { + // Sanitize HTML first + const sanitized = DOMPurify.sanitize(html, { ALLOWED_TAGS: [] }); + + // Clean up whitespace + return sanitized + .replace(/\s+/g, ' ') + .trim(); +} + +/** + * Convert plain text to Markdown + */ +function textToMarkdown(text: string): string { + // Add basic markdown formatting + // Treat lines as paragraphs + return text + .split('\n\n') + .filter(p => p.trim()) + .join('\n\n'); +} + +/** + * Convert plain text to HTML + */ +function textToHtml(text: string): string { + // Escape HTML entities + const escaped = text + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); + + // Convert newlines to paragraphs + const paragraphs = escaped + .split('\n\n') + .filter(p => p.trim()) + .map(p => `

${p.replace(/\n/g, '
')}

`) + .join('\n'); + + return ` + + + + + Converted Document + + + +${paragraphs} + +`; +} + +/** + * Get MIME type for output format + */ +function getMimeType(format: string): string { + const mimeTypes: Record = { + html: 'text/html', + htm: 'text/html', + md: 'text/markdown', + markdown: 'text/markdown', + txt: 'text/plain', + }; + + return mimeTypes[format.toLowerCase()] || 'text/plain'; +} + +/** + * Convert Markdown to HTML (convenience function) + */ +export async function markdownToHtmlFile( file: File, onProgress?: ProgressCallback ): Promise { @@ -31,9 +302,9 @@ export async function markdownToHtml( } /** - * Convert HTML to Markdown (placeholder) + * Convert HTML to Markdown (convenience function) */ -export async function htmlToMarkdown( +export async function htmlToMarkdownFile( file: File, onProgress?: ProgressCallback ): Promise { diff --git a/lib/wasm/wasmLoader.ts b/lib/wasm/wasmLoader.ts index 9f20863..fc19f09 100644 --- a/lib/wasm/wasmLoader.ts +++ b/lib/wasm/wasmLoader.ts @@ -76,16 +76,30 @@ export async function loadImageMagick(): Promise { } /** - * Load Pandoc WASM module (placeholder for future implementation) + * Load Pandoc converter (uses pure JavaScript libraries, not WASM) + * Note: We use marked + turndown instead of actual Pandoc WASM */ export async function loadPandoc(): Promise { if (pandocInstance && moduleState.pandoc) { return pandocInstance; } - // TODO: Implement Pandoc WASM loading when available - // For now, throw an error - throw new Error('Pandoc WASM module is not yet implemented'); + try { + // Import the converter libraries + const [marked, turndown] = await Promise.all([ + import('marked'), + import('turndown'), + ]); + + pandocInstance = { marked, turndown }; + moduleState.pandoc = true; + console.log('Document converter loaded successfully'); + + return pandocInstance; + } catch (error) { + console.error('Failed to load document converter:', error); + throw new Error('Failed to load document converter'); + } } /** diff --git a/package.json b/package.json index 11cefb8..a821438 100644 --- a/package.json +++ b/package.json @@ -13,18 +13,22 @@ "@ffmpeg/util": "^0.12.1", "@imagemagick/magick-wasm": "^0.0.30", "clsx": "^2.1.1", + "dompurify": "^3.2.2", "fuse.js": "^7.1.0", "lucide-react": "^0.553.0", + "marked": "^15.0.4", "next": "^16.0.0", "react": "^19.0.0", "react-dom": "^19.0.0", - "tailwind-merge": "^3.3.1" + "tailwind-merge": "^3.3.1", + "turndown": "^7.2.0" }, "devDependencies": { "@tailwindcss/postcss": "^4.1.17", "@types/node": "^22", "@types/react": "^19", "@types/react-dom": "^19", + "@types/turndown": "^5.0.5", "eslint": "^9", "eslint-config-next": "^16.0.0", "tailwindcss": "^4.0.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 1c25381..ad9756f 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -20,12 +20,18 @@ importers: clsx: specifier: ^2.1.1 version: 2.1.1 + dompurify: + specifier: ^3.2.2 + version: 3.3.0 fuse.js: specifier: ^7.1.0 version: 7.1.0 lucide-react: specifier: ^0.553.0 version: 0.553.0(react@19.2.0) + marked: + specifier: ^15.0.4 + version: 15.0.12 next: specifier: ^16.0.0 version: 16.0.3(@babel/core@7.28.5)(react-dom@19.2.0(react@19.2.0))(react@19.2.0) @@ -38,6 +44,9 @@ importers: tailwind-merge: specifier: ^3.3.1 version: 3.4.0 + turndown: + specifier: ^7.2.0 + version: 7.2.2 devDependencies: '@tailwindcss/postcss': specifier: ^4.1.17 @@ -51,6 +60,9 @@ importers: '@types/react-dom': specifier: ^19 version: 19.2.3(@types/react@19.2.5) + '@types/turndown': + specifier: ^5.0.5 + version: 5.0.6 eslint: specifier: ^9 version: 9.39.1(jiti@2.6.1) @@ -368,6 +380,9 @@ packages: '@jridgewell/trace-mapping@0.3.31': resolution: {integrity: sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==} + '@mixmark-io/domino@2.2.0': + resolution: {integrity: sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==} + '@napi-rs/wasm-runtime@0.2.12': resolution: {integrity: sha512-ZVWUcfwY4E/yPitQJl481FjFo3K22D6qF0DuFH6Y/nbnE11GY5uguDxZMGXPQ8WQ0128MXQD7TnfHyK4oWoIJQ==} @@ -558,6 +573,12 @@ packages: '@types/react@19.2.5': resolution: {integrity: sha512-keKxkZMqnDicuvFoJbzrhbtdLSPhj/rZThDlKWCDbgXmUg0rEUFtRssDXKYmtXluZlIqiC5VqkCgRwzuyLHKHw==} + '@types/trusted-types@2.0.7': + resolution: {integrity: sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==} + + '@types/turndown@5.0.6': + resolution: {integrity: sha512-ru00MoyeeouE5BX4gRL+6m/BsDfbRayOskWqUvh7CLGW+UXxHQItqALa38kKnOiZPqJrtzJUgAC2+F0rL1S4Pg==} + '@typescript-eslint/eslint-plugin@8.46.4': resolution: {integrity: sha512-R48VhmTJqplNyDxCyqqVkFSZIx1qX6PzwqgcXn1olLrzxcSBDlOsbtcnQuQhNtnNiJ4Xe5gREI1foajYaYU2Vg==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} @@ -910,6 +931,9 @@ packages: resolution: {integrity: sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==} engines: {node: '>=0.10.0'} + dompurify@3.3.0: + resolution: {integrity: sha512-r+f6MYR1gGN1eJv0TVQbhA7if/U7P87cdPl3HN5rikqaBSBxLiCb/b9O+2eG0cxz0ghyU+mU1QkbsOwERMYlWQ==} + dunder-proto@1.0.1: resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==} engines: {node: '>= 0.4'} @@ -1509,6 +1533,11 @@ packages: magic-string@0.30.21: resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==} + marked@15.0.12: + resolution: {integrity: sha512-8dD6FusOQSrpv9Z1rdNMdlSgQOIP880DHqnohobOmYLElGEqAL/JvxvuxZO16r4HtjTlfPRDC1hbvxC9dPN2nA==} + engines: {node: '>= 18'} + hasBin: true + math-intrinsics@1.1.0: resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==} engines: {node: '>= 0.4'} @@ -1871,6 +1900,9 @@ packages: tslib@2.8.1: resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==} + turndown@7.2.2: + resolution: {integrity: sha512-1F7db8BiExOKxjSMU2b7if62D/XOyQyZbPKq/nUwopfgnHlqXHqQ0lvfUTeUIr1lZJzOPFn43dODyMSIfvWRKQ==} + type-check@0.4.0: resolution: {integrity: sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==} engines: {node: '>= 0.8.0'} @@ -2266,6 +2298,8 @@ snapshots: '@jridgewell/resolve-uri': 3.1.2 '@jridgewell/sourcemap-codec': 1.5.5 + '@mixmark-io/domino@2.2.0': {} + '@napi-rs/wasm-runtime@0.2.12': dependencies: '@emnapi/core': 1.7.1 @@ -2415,6 +2449,11 @@ snapshots: dependencies: csstype: 3.2.2 + '@types/trusted-types@2.0.7': + optional: true + + '@types/turndown@5.0.6': {} + '@typescript-eslint/eslint-plugin@8.46.4(@typescript-eslint/parser@8.46.4(eslint@9.39.1(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.1(jiti@2.6.1))(typescript@5.9.3)': dependencies: '@eslint-community/regexpp': 4.12.2 @@ -2788,6 +2827,10 @@ snapshots: dependencies: esutils: 2.0.3 + dompurify@3.3.0: + optionalDependencies: + '@types/trusted-types': 2.0.7 + dunder-proto@1.0.1: dependencies: call-bind-apply-helpers: 1.0.2 @@ -3520,6 +3563,8 @@ snapshots: dependencies: '@jridgewell/sourcemap-codec': 1.5.5 + marked@15.0.12: {} + math-intrinsics@1.1.0: {} merge2@1.4.1: {} @@ -3947,6 +3992,10 @@ snapshots: tslib@2.8.1: {} + turndown@7.2.2: + dependencies: + '@mixmark-io/domino': 2.2.0 + type-check@0.4.0: dependencies: prelude-ls: 1.2.1