refactor: remove all document conversion support, keep only media conversions

This commit completely removes document conversion functionality to focus
exclusively on media file conversions (video, audio, images).

Changes:
- Remove all document converter services (pandocService.ts, pdfService.ts, docxService.ts)
- Uninstall document-related packages: marked, turndown, dompurify, jspdf, pdfjs-dist, docx, mammoth, @types/turndown
- Remove document formats (PDF, DOCX, Markdown, HTML, TXT) from formatMappings.ts
- Remove pandoc converter from FileConverter.tsx
- Remove pandoc loader and references from wasmLoader.ts
- Update TypeScript types to remove 'pandoc' from ConverterEngine and 'document' from FileCategory
- Remove pandoc from WASMModuleState interface
- Update README.md to remove all document conversion documentation
- Update UI descriptions to reflect media-only conversions

Supported conversions now:
- Video: MP4, WebM, AVI, MOV, MKV, GIF
- Audio: MP3, WAV, OGG, AAC, FLAC
- Images: PNG, JPG, WebP, GIF, BMP, TIFF, SVG

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-17 11:35:20 +01:00
parent de3997f4df
commit 594a0ca314
10 changed files with 6 additions and 1867 deletions

View File

@@ -1,13 +1,12 @@
# Convert UI
A modern, browser-based file conversion application built with Next.js 16, Tailwind CSS 4, and WebAssembly. Convert videos, images, and documents directly in your browser without uploading files to any server.
A modern, browser-based file conversion application built with Next.js 16, Tailwind CSS 4, and WebAssembly. Convert videos, audio, and images directly in your browser without uploading files to any server.
## Features
- **🎬 Video Conversion** - Convert between MP4, WebM, AVI, MOV, MKV, and GIF
- **🎵 Audio Conversion** - Convert between MP3, WAV, OGG, AAC, and FLAC
- **🖼️ Image Conversion** - Convert between PNG, JPG, WebP, GIF, BMP, TIFF, and SVG
- **📄 Document Conversion** - Convert between PDF, Markdown, HTML, and Plain Text
- **🔒 Privacy First** - All conversions happen locally in your browser, no server uploads
- **⚡ Fast & Efficient** - Powered by WebAssembly for near-native performance
- **🎨 Beautiful UI** - Modern, responsive design with dark/light theme support
@@ -23,13 +22,6 @@ A modern, browser-based file conversion application built with Next.js 16, Tailw
- **Tailwind CSS 4** - Utility-first CSS with OKLCH color system
- **FFmpeg.wasm** - Video and audio conversion
- **ImageMagick WASM** - Image processing and conversion
- **Marked** - Markdown to HTML conversion
- **Turndown** - HTML to Markdown conversion
- **DOMPurify** - HTML sanitization
- **jsPDF** - PDF generation
- **PDF.js** - PDF text extraction
- **docx** - DOCX document generation
- **mammoth** - DOCX document reading
- **Fuse.js** - Fuzzy search for format selection
- **Lucide React** - Beautiful icon library
@@ -92,8 +84,7 @@ convert-ui/
├── lib/
│ ├── converters/ # Conversion services
│ │ ├── ffmpegService.ts # Video/audio conversion
│ │ ── imagemagickService.ts # Image conversion
│ │ └── pandocService.ts # Document conversion (placeholder)
│ │ ── imagemagickService.ts # Image conversion
│ ├── wasm/
│ │ └── wasmLoader.ts # WASM module lazy loading
│ ├── storage/
@@ -118,31 +109,6 @@ convert-ui/
### Images (ImageMagick)
- **Input/Output:** PNG, JPG, WebP, GIF, BMP, TIFF, SVG
### Documents
- **PDF → Text/Markdown** - Extract text from PDF files with page-by-page processing
- **Markdown/HTML/Text → PDF** - Generate formatted PDF documents
- **DOCX → Text/HTML/Markdown** - Extract content from Word documents
- **Markdown/HTML/Text → DOCX** - Create formatted Word documents with headings
- **Markdown → HTML** - Full GitHub Flavored Markdown support with styling
- **HTML → Markdown** - Clean conversion with formatting preservation
- **Markdown ↔ Plain Text** - Strip or add basic formatting
- **HTML → Plain Text** - Extract text content
- **Plain Text → HTML** - Convert to formatted HTML document
**Supported PDF Operations:**
- Read PDFs and extract all text content
- Convert extracted text to Markdown or plain text
- Create PDFs from Markdown, HTML, or plain text
- Automatic pagination and formatting
**Supported DOCX Operations:**
- Read DOCX files and extract text, HTML, or Markdown
- Create DOCX files from Markdown with proper heading levels (H1-H3)
- Create DOCX files from HTML or plain text
- Automatic paragraph formatting and spacing
**Note:** Uses PDF.js for reading and jsPDF for generation. Uses mammoth for DOCX reading and docx library for generation. Lightweight JavaScript libraries (marked, turndown) used instead of Pandoc WASM for fast, reliable conversions.
## How It Works
1. **File Upload** - Users can drag-and-drop or click to select a file

View File

@@ -16,7 +16,6 @@ import {
} from '@/lib/utils/formatMappings';
import { convertWithFFmpeg } from '@/lib/converters/ffmpegService';
import { convertWithImageMagick } from '@/lib/converters/imagemagickService';
import { convertWithPandoc } from '@/lib/converters/pandocService';
import { addToHistory } from '@/lib/storage/history';
import type { ConversionJob, ConversionFormat } from '@/types/conversion';
@@ -111,12 +110,6 @@ export function FileConverter() {
);
break;
case 'pandoc':
result = await convertWithPandoc(selectedFile, outputFormat.extension, {}, (progress) => {
setConversionJob((prev) => prev && { ...prev, progress });
});
break;
default:
throw new Error(`Unknown converter: ${outputFormat.converter}`);
}
@@ -184,7 +177,7 @@ export function FileConverter() {
<CardHeader>
<CardTitle>File Converter</CardTitle>
<CardDescription>
Convert videos, images, and documents directly in your browser using WebAssembly
Convert videos, audio, and images directly in your browser using WebAssembly
</CardDescription>
</CardHeader>
<CardContent className="space-y-6">

View File

@@ -1,443 +0,0 @@
import { Document, Packer, Paragraph, TextRun, HeadingLevel } from 'docx';
import type { ConversionOptions, ProgressCallback, ConversionResult } from '@/types/conversion';
/**
* Extract text from DOCX file using mammoth
*/
export async function extractTextFromDOCX(file: File, onProgress?: ProgressCallback): Promise<string> {
if (onProgress) onProgress(10);
// Dynamically import mammoth (client-side only)
const mammoth = await import('mammoth');
if (onProgress) onProgress(30);
// Read file as ArrayBuffer
const arrayBuffer = await file.arrayBuffer();
if (onProgress) onProgress(50);
// Extract text from DOCX
const result = await mammoth.extractRawText({ arrayBuffer });
if (onProgress) onProgress(100);
return result.value;
}
/**
* Extract HTML from DOCX file using mammoth
*/
export async function extractHTMLFromDOCX(file: File, onProgress?: ProgressCallback): Promise<string> {
if (onProgress) onProgress(10);
// Dynamically import mammoth (client-side only)
const mammoth = await import('mammoth');
if (onProgress) onProgress(30);
// Read file as ArrayBuffer
const arrayBuffer = await file.arrayBuffer();
if (onProgress) onProgress(50);
// Convert DOCX to HTML
const result = await mammoth.convertToHtml({ arrayBuffer });
if (onProgress) onProgress(100);
return result.value;
}
/**
* Convert DOCX to plain text
*/
export async function docxToText(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
const text = await extractTextFromDOCX(file, onProgress);
const blob = new Blob([text], { type: 'text/plain' });
return {
success: true,
blob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[DOCX Converter] DOCX to text error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to extract text from DOCX',
duration: Date.now() - startTime,
};
}
}
/**
* Convert DOCX to HTML
*/
export async function docxToHTML(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
const html = await extractHTMLFromDOCX(file, (progress) => {
if (onProgress) onProgress(progress * 0.9);
});
// Wrap in full HTML document
const fullHTML = `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Converted Document</title>
<style>
body {
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
line-height: 1.6;
max-width: 800px;
margin: 2rem auto;
padding: 0 1rem;
color: #333;
}
</style>
</head>
<body>
${html}
</body>
</html>`;
if (onProgress) onProgress(100);
const blob = new Blob([fullHTML], { type: 'text/html' });
return {
success: true,
blob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[DOCX Converter] DOCX to HTML error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to convert DOCX to HTML',
duration: Date.now() - startTime,
};
}
}
/**
* Convert DOCX to Markdown
*/
export async function docxToMarkdown(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
// First convert to HTML
const html = await extractHTMLFromDOCX(file, (progress) => {
if (onProgress) onProgress(progress * 0.7);
});
if (onProgress) onProgress(80);
// Import turndown for HTML to Markdown
const TurndownService = (await import('turndown')).default;
const turndownService = new TurndownService({
headingStyle: 'atx',
codeBlockStyle: 'fenced',
bulletListMarker: '-',
});
const markdown = turndownService.turndown(html);
if (onProgress) onProgress(100);
const blob = new Blob([markdown], { type: 'text/markdown' });
return {
success: true,
blob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[DOCX Converter] DOCX to Markdown error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to convert DOCX to Markdown',
duration: Date.now() - startTime,
};
}
}
/**
* Create DOCX from text content
*/
async function createDOCXFromText(text: string, onProgress?: ProgressCallback): Promise<Blob> {
if (onProgress) onProgress(20);
// Split text into paragraphs
const paragraphs = text.split('\n\n').filter(p => p.trim());
if (onProgress) onProgress(40);
// Create document with paragraphs
const doc = new Document({
sections: [
{
properties: {},
children: paragraphs.map((para) => {
return new Paragraph({
children: [new TextRun(para.trim())],
spacing: {
after: 200,
},
});
}),
},
],
});
if (onProgress) onProgress(70);
// Generate DOCX blob
const blob = await Packer.toBlob(doc);
if (onProgress) onProgress(100);
return blob;
}
/**
* Create DOCX from Markdown
*/
async function createDOCXFromMarkdown(markdown: string, onProgress?: ProgressCallback): Promise<Blob> {
if (onProgress) onProgress(10);
// Parse markdown and create structured document
const lines = markdown.split('\n');
const children: Paragraph[] = [];
let currentParagraph: string[] = [];
for (const line of lines) {
if (line.startsWith('# ')) {
// Heading 1
if (currentParagraph.length > 0) {
children.push(new Paragraph({
children: [new TextRun(currentParagraph.join(' '))],
spacing: { after: 200 },
}));
currentParagraph = [];
}
children.push(new Paragraph({
text: line.substring(2),
heading: HeadingLevel.HEADING_1,
spacing: { before: 240, after: 120 },
}));
} else if (line.startsWith('## ')) {
// Heading 2
if (currentParagraph.length > 0) {
children.push(new Paragraph({
children: [new TextRun(currentParagraph.join(' '))],
spacing: { after: 200 },
}));
currentParagraph = [];
}
children.push(new Paragraph({
text: line.substring(3),
heading: HeadingLevel.HEADING_2,
spacing: { before: 200, after: 100 },
}));
} else if (line.startsWith('### ')) {
// Heading 3
if (currentParagraph.length > 0) {
children.push(new Paragraph({
children: [new TextRun(currentParagraph.join(' '))],
spacing: { after: 200 },
}));
currentParagraph = [];
}
children.push(new Paragraph({
text: line.substring(4),
heading: HeadingLevel.HEADING_3,
spacing: { before: 160, after: 80 },
}));
} else if (line.trim() === '') {
// Empty line - paragraph break
if (currentParagraph.length > 0) {
children.push(new Paragraph({
children: [new TextRun(currentParagraph.join(' '))],
spacing: { after: 200 },
}));
currentParagraph = [];
}
} else {
// Regular text
currentParagraph.push(line);
}
}
// Add remaining paragraph
if (currentParagraph.length > 0) {
children.push(new Paragraph({
children: [new TextRun(currentParagraph.join(' '))],
spacing: { after: 200 },
}));
}
if (onProgress) onProgress(60);
const doc = new Document({
sections: [
{
properties: {},
children,
},
],
});
if (onProgress) onProgress(80);
const blob = await Packer.toBlob(doc);
if (onProgress) onProgress(100);
return blob;
}
/**
* Convert plain text to DOCX
*/
export async function textToDOCX(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
if (onProgress) onProgress(10);
const text = await file.text();
if (onProgress) onProgress(20);
const blob = await createDOCXFromText(text, (progress) => {
if (onProgress) onProgress(20 + progress * 0.8);
});
return {
success: true,
blob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[DOCX Converter] Text to DOCX error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to convert text to DOCX',
duration: Date.now() - startTime,
};
}
}
/**
* Convert Markdown to DOCX
*/
export async function markdownToDOCX(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
if (onProgress) onProgress(10);
const markdown = await file.text();
if (onProgress) onProgress(20);
const blob = await createDOCXFromMarkdown(markdown, (progress) => {
if (onProgress) onProgress(20 + progress * 0.8);
});
return {
success: true,
blob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[DOCX Converter] Markdown to DOCX error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to convert Markdown to DOCX',
duration: Date.now() - startTime,
};
}
}
/**
* Convert HTML to DOCX
*/
export async function htmlToDOCX(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
if (onProgress) onProgress(10);
const html = await file.text();
if (onProgress) onProgress(20);
// Strip HTML tags to get plain text
const text = html
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '')
.replace(/<[^>]*>/g, ' ')
.replace(/&nbsp;/g, ' ')
.replace(/&quot;/g, '"')
.replace(/&apos;/g, "'")
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
.replace(/\s+/g, ' ')
.trim();
if (onProgress) onProgress(50);
const blob = await createDOCXFromText(text, (progress) => {
if (onProgress) onProgress(50 + progress * 0.5);
});
return {
success: true,
blob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[DOCX Converter] HTML to DOCX error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to convert HTML to DOCX',
duration: Date.now() - startTime,
};
}
}

View File

@@ -1,379 +0,0 @@
import { marked } from 'marked';
import TurndownService from 'turndown';
import type { ConversionOptions, ProgressCallback, ConversionResult } from '@/types/conversion';
import {
pdfToText,
pdfToMarkdown,
markdownToPDF,
htmlToPDF,
plainTextToPDF,
} from './pdfService';
import {
docxToText,
docxToHTML,
docxToMarkdown,
textToDOCX,
markdownToDOCX,
htmlToDOCX,
} from './docxService';
// Import DOMPurify only on client side
let DOMPurify: any;
if (typeof window !== 'undefined') {
DOMPurify = require('dompurify');
}
/**
* Convert document using Markdown/HTML converters
* Note: This uses lightweight JS libraries instead of Pandoc WASM (which isn't widely available)
*/
export async function convertWithPandoc(
file: File,
outputFormat: string,
options: ConversionOptions = {},
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
if (onProgress) onProgress(10);
// Read file content as text
const text = await file.text();
if (onProgress) onProgress(30);
// Detect input format from file extension or content
const inputExt = file.name.split('.').pop()?.toLowerCase();
let result: string;
if (onProgress) onProgress(50);
// Handle PDF conversions
if (inputExt === 'pdf') {
// PDF input
if (outputFormat === 'txt') {
return await pdfToText(file, onProgress);
} else if (outputFormat === 'md' || outputFormat === 'markdown') {
return await pdfToMarkdown(file, onProgress);
} else {
throw new Error(`Conversion from PDF to ${outputFormat} not supported`);
}
}
// Handle DOCX conversions
if (inputExt === 'docx') {
// DOCX input
if (outputFormat === 'txt') {
return await docxToText(file, onProgress);
} else if (outputFormat === 'html') {
return await docxToHTML(file, onProgress);
} else if (outputFormat === 'md' || outputFormat === 'markdown') {
return await docxToMarkdown(file, onProgress);
} else {
throw new Error(`Conversion from DOCX to ${outputFormat} not supported`);
}
}
// Handle conversions TO PDF
if (outputFormat === 'pdf') {
if (inputExt === 'md' || inputExt === 'markdown') {
return await markdownToPDF(file, onProgress);
} else if (inputExt === 'html' || inputExt === 'htm') {
return await htmlToPDF(file, onProgress);
} else if (inputExt === 'txt') {
return await plainTextToPDF(file, onProgress);
} else {
throw new Error(`Conversion from ${inputExt} to PDF not supported`);
}
}
// Handle conversions TO DOCX
if (outputFormat === 'docx') {
if (inputExt === 'md' || inputExt === 'markdown') {
return await markdownToDOCX(file, onProgress);
} else if (inputExt === 'html' || inputExt === 'htm') {
return await htmlToDOCX(file, onProgress);
} else if (inputExt === 'txt') {
return await textToDOCX(file, onProgress);
} else {
throw new Error(`Conversion from ${inputExt} to DOCX not supported`);
}
}
// Perform conversion based on input and output formats
if (inputExt === 'md' || inputExt === 'markdown') {
// Markdown input
if (outputFormat === 'html') {
result = await markdownToHtml(text);
} else if (outputFormat === 'txt') {
result = markdownToText(text);
} else {
throw new Error(`Conversion from Markdown to ${outputFormat} not supported`);
}
} else if (inputExt === 'html' || inputExt === 'htm') {
// HTML input
if (outputFormat === 'md' || outputFormat === 'markdown') {
result = await htmlToMarkdown(text);
} else if (outputFormat === 'txt') {
result = htmlToText(text);
} else {
throw new Error(`Conversion from HTML to ${outputFormat} not supported`);
}
} else if (inputExt === 'txt') {
// Plain text input
if (outputFormat === 'md' || outputFormat === 'markdown') {
result = textToMarkdown(text);
} else if (outputFormat === 'html') {
result = textToHtml(text);
} else {
throw new Error(`Conversion from TXT to ${outputFormat} not supported`);
}
} else {
throw new Error(`Input format ${inputExt} not supported`);
}
if (onProgress) onProgress(90);
// Create blob from result
const blob = new Blob([result], { type: getMimeType(outputFormat) });
if (onProgress) onProgress(100);
const duration = Date.now() - startTime;
return {
success: true,
blob,
duration,
};
} catch (error) {
console.error('[Document Converter] Conversion error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Unknown conversion error',
duration: Date.now() - startTime,
};
}
}
/**
* Convert Markdown to HTML
*/
async function markdownToHtml(markdown: string): Promise<string> {
// Configure marked options
marked.setOptions({
gfm: true, // GitHub Flavored Markdown
breaks: true, // Convert \n to <br>
});
const html = await marked.parse(markdown);
// Sanitize HTML for security
const sanitized = DOMPurify.sanitize(html);
// Wrap in basic HTML document
return `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Converted Document</title>
<style>
body {
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
line-height: 1.6;
max-width: 800px;
margin: 2rem auto;
padding: 0 1rem;
color: #333;
}
pre {
background: #f4f4f4;
border: 1px solid #ddd;
border-radius: 4px;
padding: 1rem;
overflow-x: auto;
}
code {
background: #f4f4f4;
padding: 0.2rem 0.4rem;
border-radius: 3px;
font-family: 'Courier New', monospace;
}
blockquote {
border-left: 4px solid #ddd;
margin: 1rem 0;
padding-left: 1rem;
color: #666;
}
table {
border-collapse: collapse;
width: 100%;
margin: 1rem 0;
}
th, td {
border: 1px solid #ddd;
padding: 0.5rem;
text-align: left;
}
th {
background: #f4f4f4;
}
</style>
</head>
<body>
${sanitized}
</body>
</html>`;
}
/**
* Convert HTML to Markdown
*/
async function htmlToMarkdown(html: string): Promise<string> {
// Sanitize HTML first
const sanitized = DOMPurify.sanitize(html);
// Configure TurndownService
const turndownService = new TurndownService({
headingStyle: 'atx', // Use # for headings
codeBlockStyle: 'fenced', // Use ``` for code blocks
bulletListMarker: '-', // Use - for bullet lists
});
const markdown = turndownService.turndown(sanitized);
return markdown;
}
/**
* Convert Markdown to plain text (strip formatting)
*/
function markdownToText(markdown: string): string {
// Remove markdown syntax
let text = markdown
// Remove headers
.replace(/^#{1,6}\s+/gm, '')
// Remove bold/italic
.replace(/(\*\*|__)(.*?)\1/g, '$2')
.replace(/(\*|_)(.*?)\1/g, '$2')
// Remove links
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
// Remove images
.replace(/!\[([^\]]*)\]\([^)]+\)/g, '$1')
// Remove code blocks
.replace(/```[\s\S]*?```/g, '')
// Remove inline code
.replace(/`([^`]+)`/g, '$1')
// Remove blockquotes
.replace(/^>\s+/gm, '')
// Remove horizontal rules
.replace(/^-{3,}$/gm, '')
// Clean up multiple newlines
.replace(/\n{3,}/g, '\n\n');
return text.trim();
}
/**
* Convert HTML to plain text
*/
function htmlToText(html: string): string {
// Sanitize HTML first
const sanitized = DOMPurify.sanitize(html, { ALLOWED_TAGS: [] });
// Clean up whitespace
return sanitized
.replace(/\s+/g, ' ')
.trim();
}
/**
* Convert plain text to Markdown
*/
function textToMarkdown(text: string): string {
// Add basic markdown formatting
// Treat lines as paragraphs
return text
.split('\n\n')
.filter(p => p.trim())
.join('\n\n');
}
/**
* Convert plain text to HTML
*/
function textToHtml(text: string): string {
// Escape HTML entities
const escaped = text
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#039;');
// Convert newlines to paragraphs
const paragraphs = escaped
.split('\n\n')
.filter(p => p.trim())
.map(p => ` <p>${p.replace(/\n/g, '<br>')}</p>`)
.join('\n');
return `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Converted Document</title>
<style>
body {
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
line-height: 1.6;
max-width: 800px;
margin: 2rem auto;
padding: 0 1rem;
color: #333;
}
</style>
</head>
<body>
${paragraphs}
</body>
</html>`;
}
/**
* Get MIME type for output format
*/
function getMimeType(format: string): string {
const mimeTypes: Record<string, string> = {
html: 'text/html',
htm: 'text/html',
md: 'text/markdown',
markdown: 'text/markdown',
txt: 'text/plain',
};
return mimeTypes[format.toLowerCase()] || 'text/plain';
}
/**
* Convert Markdown to HTML (convenience function)
*/
export async function markdownToHtmlFile(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
return convertWithPandoc(file, 'html', {}, onProgress);
}
/**
* Convert HTML to Markdown (convenience function)
*/
export async function htmlToMarkdownFile(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
return convertWithPandoc(file, 'md', {}, onProgress);
}

View File

@@ -1,334 +0,0 @@
import { jsPDF } from 'jspdf';
import type { ConversionOptions, ProgressCallback, ConversionResult } from '@/types/conversion';
/**
* Extract text from PDF file
*/
export async function extractTextFromPDF(file: File, onProgress?: ProgressCallback): Promise<string> {
if (onProgress) onProgress(10);
// Dynamically import pdfjs-dist (client-side only)
const pdfjsLib = await import('pdfjs-dist');
// Set worker source
pdfjsLib.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjsLib.version}/pdf.worker.min.mjs`;
if (onProgress) onProgress(20);
// Read file as ArrayBuffer
const arrayBuffer = await file.arrayBuffer();
if (onProgress) onProgress(30);
// Load PDF document
const loadingTask = pdfjsLib.getDocument({ data: arrayBuffer });
const pdf = await loadingTask.promise;
if (onProgress) onProgress(50);
const numPages = pdf.numPages;
let fullText = '';
// Extract text from each page
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
const page = await pdf.getPage(pageNum);
const textContent = await page.getTextContent();
// Combine text items
const pageText = textContent.items
.map((item: any) => item.str)
.join(' ');
fullText += pageText + '\n\n';
// Update progress
if (onProgress) {
const progress = 50 + (pageNum / numPages) * 40;
onProgress(Math.round(progress));
}
}
if (onProgress) onProgress(100);
return fullText.trim();
}
/**
* Convert PDF to text
*/
export async function pdfToText(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
const text = await extractTextFromPDF(file, onProgress);
const blob = new Blob([text], { type: 'text/plain' });
return {
success: true,
blob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[PDF Converter] PDF to text error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to extract text from PDF',
duration: Date.now() - startTime,
};
}
}
/**
* Convert PDF to Markdown
*/
export async function pdfToMarkdown(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
const text = await extractTextFromPDF(file, (progress) => {
if (onProgress) onProgress(progress * 0.9); // Use 90% for extraction
});
// Basic text to markdown conversion (paragraphs)
const markdown = text
.split('\n\n')
.filter(p => p.trim())
.join('\n\n');
if (onProgress) onProgress(100);
const blob = new Blob([markdown], { type: 'text/markdown' });
return {
success: true,
blob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[PDF Converter] PDF to markdown error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to convert PDF to Markdown',
duration: Date.now() - startTime,
};
}
}
/**
* Convert text to PDF
*/
export async function textToPDF(
text: string,
filename: string = 'document.pdf',
onProgress?: ProgressCallback
): Promise<Blob> {
if (onProgress) onProgress(20);
const doc = new jsPDF({
orientation: 'portrait',
unit: 'mm',
format: 'a4',
});
if (onProgress) onProgress(40);
// Set font and size
doc.setFont('helvetica');
doc.setFontSize(12);
// Page dimensions
const pageWidth = doc.internal.pageSize.getWidth();
const pageHeight = doc.internal.pageSize.getHeight();
const margin = 20;
const maxWidth = pageWidth - 2 * margin;
const lineHeight = 7;
let y = margin;
if (onProgress) onProgress(60);
// Split text into lines
const lines = doc.splitTextToSize(text, maxWidth);
// Add lines to PDF
for (let i = 0; i < lines.length; i++) {
// Check if we need a new page
if (y + lineHeight > pageHeight - margin) {
doc.addPage();
y = margin;
}
doc.text(lines[i], margin, y);
y += lineHeight;
// Update progress
if (onProgress && i % 10 === 0) {
const progress = 60 + (i / lines.length) * 30;
onProgress(Math.round(progress));
}
}
if (onProgress) onProgress(90);
// Generate PDF blob
const pdfBlob = doc.output('blob');
if (onProgress) onProgress(100);
return pdfBlob;
}
/**
* Convert Markdown to PDF
*/
export async function markdownToPDF(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
if (onProgress) onProgress(10);
// Read markdown content
const markdown = await file.text();
if (onProgress) onProgress(20);
// Import marked for markdown parsing
const { marked } = await import('marked');
// Parse markdown to HTML
const html = await marked.parse(markdown);
if (onProgress) onProgress(40);
// Strip HTML tags for plain text
const text = html
.replace(/<[^>]*>/g, '')
.replace(/&nbsp;/g, ' ')
.replace(/&quot;/g, '"')
.replace(/&apos;/g, "'")
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&');
if (onProgress) onProgress(60);
// Generate PDF
const pdfBlob = await textToPDF(text, file.name.replace(/\.md$/, '.pdf'), (progress) => {
if (onProgress) onProgress(60 + progress * 0.4);
});
return {
success: true,
blob: pdfBlob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[PDF Converter] Markdown to PDF error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to convert Markdown to PDF',
duration: Date.now() - startTime,
};
}
}
/**
* Convert HTML to PDF
*/
export async function htmlToPDF(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
if (onProgress) onProgress(10);
// Read HTML content
const html = await file.text();
if (onProgress) onProgress(30);
// Strip HTML tags for plain text
const text = html
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '')
.replace(/<[^>]*>/g, ' ')
.replace(/&nbsp;/g, ' ')
.replace(/&quot;/g, '"')
.replace(/&apos;/g, "'")
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
.replace(/\s+/g, ' ')
.trim();
if (onProgress) onProgress(50);
// Generate PDF
const pdfBlob = await textToPDF(text, file.name.replace(/\.html?$/, '.pdf'), (progress) => {
if (onProgress) onProgress(50 + progress * 0.5);
});
return {
success: true,
blob: pdfBlob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[PDF Converter] HTML to PDF error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to convert HTML to PDF',
duration: Date.now() - startTime,
};
}
}
/**
* Convert plain text to PDF
*/
export async function plainTextToPDF(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
if (onProgress) onProgress(10);
const text = await file.text();
if (onProgress) onProgress(30);
const pdfBlob = await textToPDF(text, file.name.replace(/\.txt$/, '.pdf'), (progress) => {
if (onProgress) onProgress(30 + progress * 0.7);
});
return {
success: true,
blob: pdfBlob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[PDF Converter] Text to PDF error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to convert text to PDF',
duration: Date.now() - startTime,
};
}
}

View File

@@ -162,53 +162,6 @@ export const SUPPORTED_FORMATS: ConversionFormat[] = [
converter: 'imagemagick',
description: 'Scalable Vector Graphics',
},
// Document formats (Pandoc - future implementation)
{
id: 'pdf',
name: 'PDF',
extension: 'pdf',
mimeType: 'application/pdf',
category: 'document',
converter: 'pandoc',
description: 'Portable Document Format',
},
{
id: 'docx',
name: 'DOCX',
extension: 'docx',
mimeType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
category: 'document',
converter: 'pandoc',
description: 'Microsoft Word document',
},
{
id: 'markdown',
name: 'Markdown',
extension: 'md',
mimeType: 'text/markdown',
category: 'document',
converter: 'pandoc',
description: 'Markdown text',
},
{
id: 'html',
name: 'HTML',
extension: 'html',
mimeType: 'text/html',
category: 'document',
converter: 'pandoc',
description: 'HyperText Markup Language',
},
{
id: 'txt',
name: 'Plain Text',
extension: 'txt',
mimeType: 'text/plain',
category: 'document',
converter: 'pandoc',
description: 'Plain text file',
},
];
/**

View File

@@ -7,7 +7,6 @@ import type { ConverterEngine, WASMModuleState } from '@/types/conversion';
const moduleState: WASMModuleState = {
ffmpeg: false,
imagemagick: false,
pandoc: false,
};
/**
@@ -15,7 +14,6 @@ const moduleState: WASMModuleState = {
*/
let ffmpegInstance: FFmpeg | null = null;
let imagemagickInstance: any = null;
let pandocInstance: any = null;
/**
* Load FFmpeg WASM module
@@ -75,33 +73,6 @@ export async function loadImageMagick(): Promise<any> {
}
}
/**
* Load Pandoc converter (uses pure JavaScript libraries, not WASM)
* Note: We use marked + turndown instead of actual Pandoc WASM
*/
export async function loadPandoc(): Promise<any> {
if (pandocInstance && moduleState.pandoc) {
return pandocInstance;
}
try {
// Import the converter libraries
const [marked, turndown] = await Promise.all([
import('marked'),
import('turndown'),
]);
pandocInstance = { marked, turndown };
moduleState.pandoc = true;
console.log('Document converter loaded successfully');
return pandocInstance;
} catch (error) {
console.error('Failed to load document converter:', error);
throw new Error('Failed to load document converter');
}
}
/**
* Get loaded module state
*/
@@ -125,8 +96,6 @@ export async function loadModule(engine: ConverterEngine): Promise<any> {
return loadFFmpeg();
case 'imagemagick':
return loadImageMagick();
case 'pandoc':
return loadPandoc();
default:
throw new Error(`Unknown converter engine: ${engine}`);
}
@@ -148,10 +117,5 @@ export function unloadAll(): void {
moduleState.imagemagick = false;
}
if (pandocInstance) {
pandocInstance = null;
moduleState.pandoc = false;
}
console.log('All WASM modules unloaded');
}

View File

@@ -13,26 +13,18 @@
"@ffmpeg/util": "^0.12.1",
"@imagemagick/magick-wasm": "^0.0.30",
"clsx": "^2.1.1",
"docx": "^9.0.4",
"dompurify": "^3.2.2",
"fuse.js": "^7.1.0",
"jspdf": "^2.5.2",
"lucide-react": "^0.553.0",
"mammoth": "^1.8.0",
"marked": "^15.0.4",
"next": "^16.0.0",
"pdfjs-dist": "^4.10.38",
"react": "^19.0.0",
"react-dom": "^19.0.0",
"tailwind-merge": "^3.3.1",
"turndown": "^7.2.0"
"tailwind-merge": "^3.3.1"
},
"devDependencies": {
"@tailwindcss/postcss": "^4.1.17",
"@types/node": "^22",
"@types/react": "^19",
"@types/react-dom": "^19",
"@types/turndown": "^5.0.5",
"eslint": "^9",
"eslint-config-next": "^16.0.0",
"tailwindcss": "^4.0.0",

567
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,12 +1,12 @@
/**
* Supported converter engines
*/
export type ConverterEngine = 'ffmpeg' | 'imagemagick' | 'pandoc';
export type ConverterEngine = 'ffmpeg' | 'imagemagick';
/**
* File category based on type
*/
export type FileCategory = 'video' | 'audio' | 'image' | 'document';
export type FileCategory = 'video' | 'audio' | 'image';
/**
* Conversion status
@@ -65,11 +65,6 @@ export interface ConversionOptions {
imageHeight?: number;
imageFormat?: string;
// Document options
documentPageSize?: string;
documentMargins?: string;
documentStyles?: boolean;
// Generic options
[key: string]: string | number | boolean | undefined;
}
@@ -108,7 +103,6 @@ export interface FormatPreset {
export interface WASMModuleState {
ffmpeg: boolean;
imagemagick: boolean;
pandoc: boolean;
}
/**