refactor: remove all document conversion support, keep only media conversions

This commit completely removes document conversion functionality to focus
exclusively on media file conversions (video, audio, images).

Changes:
- Remove all document converter services (pandocService.ts, pdfService.ts, docxService.ts)
- Uninstall document-related packages: marked, turndown, dompurify, jspdf, pdfjs-dist, docx, mammoth, @types/turndown
- Remove document formats (PDF, DOCX, Markdown, HTML, TXT) from formatMappings.ts
- Remove pandoc converter from FileConverter.tsx
- Remove pandoc loader and references from wasmLoader.ts
- Update TypeScript types to remove 'pandoc' from ConverterEngine and 'document' from FileCategory
- Remove pandoc from WASMModuleState interface
- Update README.md to remove all document conversion documentation
- Update UI descriptions to reflect media-only conversions

Supported conversions now:
- Video: MP4, WebM, AVI, MOV, MKV, GIF
- Audio: MP3, WAV, OGG, AAC, FLAC
- Images: PNG, JPG, WebP, GIF, BMP, TIFF, SVG

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-17 11:35:20 +01:00
parent de3997f4df
commit 594a0ca314
10 changed files with 6 additions and 1867 deletions

View File

@@ -1,443 +0,0 @@
import { Document, Packer, Paragraph, TextRun, HeadingLevel } from 'docx';
import type { ConversionOptions, ProgressCallback, ConversionResult } from '@/types/conversion';
/**
* Extract text from DOCX file using mammoth
*/
export async function extractTextFromDOCX(file: File, onProgress?: ProgressCallback): Promise<string> {
if (onProgress) onProgress(10);
// Dynamically import mammoth (client-side only)
const mammoth = await import('mammoth');
if (onProgress) onProgress(30);
// Read file as ArrayBuffer
const arrayBuffer = await file.arrayBuffer();
if (onProgress) onProgress(50);
// Extract text from DOCX
const result = await mammoth.extractRawText({ arrayBuffer });
if (onProgress) onProgress(100);
return result.value;
}
/**
* Extract HTML from DOCX file using mammoth
*/
export async function extractHTMLFromDOCX(file: File, onProgress?: ProgressCallback): Promise<string> {
if (onProgress) onProgress(10);
// Dynamically import mammoth (client-side only)
const mammoth = await import('mammoth');
if (onProgress) onProgress(30);
// Read file as ArrayBuffer
const arrayBuffer = await file.arrayBuffer();
if (onProgress) onProgress(50);
// Convert DOCX to HTML
const result = await mammoth.convertToHtml({ arrayBuffer });
if (onProgress) onProgress(100);
return result.value;
}
/**
* Convert DOCX to plain text
*/
export async function docxToText(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
const text = await extractTextFromDOCX(file, onProgress);
const blob = new Blob([text], { type: 'text/plain' });
return {
success: true,
blob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[DOCX Converter] DOCX to text error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to extract text from DOCX',
duration: Date.now() - startTime,
};
}
}
/**
* Convert DOCX to HTML
*/
export async function docxToHTML(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
const html = await extractHTMLFromDOCX(file, (progress) => {
if (onProgress) onProgress(progress * 0.9);
});
// Wrap in full HTML document
const fullHTML = `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Converted Document</title>
<style>
body {
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
line-height: 1.6;
max-width: 800px;
margin: 2rem auto;
padding: 0 1rem;
color: #333;
}
</style>
</head>
<body>
${html}
</body>
</html>`;
if (onProgress) onProgress(100);
const blob = new Blob([fullHTML], { type: 'text/html' });
return {
success: true,
blob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[DOCX Converter] DOCX to HTML error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to convert DOCX to HTML',
duration: Date.now() - startTime,
};
}
}
/**
* Convert DOCX to Markdown
*/
export async function docxToMarkdown(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
// First convert to HTML
const html = await extractHTMLFromDOCX(file, (progress) => {
if (onProgress) onProgress(progress * 0.7);
});
if (onProgress) onProgress(80);
// Import turndown for HTML to Markdown
const TurndownService = (await import('turndown')).default;
const turndownService = new TurndownService({
headingStyle: 'atx',
codeBlockStyle: 'fenced',
bulletListMarker: '-',
});
const markdown = turndownService.turndown(html);
if (onProgress) onProgress(100);
const blob = new Blob([markdown], { type: 'text/markdown' });
return {
success: true,
blob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[DOCX Converter] DOCX to Markdown error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to convert DOCX to Markdown',
duration: Date.now() - startTime,
};
}
}
/**
* Create DOCX from text content
*/
async function createDOCXFromText(text: string, onProgress?: ProgressCallback): Promise<Blob> {
if (onProgress) onProgress(20);
// Split text into paragraphs
const paragraphs = text.split('\n\n').filter(p => p.trim());
if (onProgress) onProgress(40);
// Create document with paragraphs
const doc = new Document({
sections: [
{
properties: {},
children: paragraphs.map((para) => {
return new Paragraph({
children: [new TextRun(para.trim())],
spacing: {
after: 200,
},
});
}),
},
],
});
if (onProgress) onProgress(70);
// Generate DOCX blob
const blob = await Packer.toBlob(doc);
if (onProgress) onProgress(100);
return blob;
}
/**
* Create DOCX from Markdown
*/
async function createDOCXFromMarkdown(markdown: string, onProgress?: ProgressCallback): Promise<Blob> {
if (onProgress) onProgress(10);
// Parse markdown and create structured document
const lines = markdown.split('\n');
const children: Paragraph[] = [];
let currentParagraph: string[] = [];
for (const line of lines) {
if (line.startsWith('# ')) {
// Heading 1
if (currentParagraph.length > 0) {
children.push(new Paragraph({
children: [new TextRun(currentParagraph.join(' '))],
spacing: { after: 200 },
}));
currentParagraph = [];
}
children.push(new Paragraph({
text: line.substring(2),
heading: HeadingLevel.HEADING_1,
spacing: { before: 240, after: 120 },
}));
} else if (line.startsWith('## ')) {
// Heading 2
if (currentParagraph.length > 0) {
children.push(new Paragraph({
children: [new TextRun(currentParagraph.join(' '))],
spacing: { after: 200 },
}));
currentParagraph = [];
}
children.push(new Paragraph({
text: line.substring(3),
heading: HeadingLevel.HEADING_2,
spacing: { before: 200, after: 100 },
}));
} else if (line.startsWith('### ')) {
// Heading 3
if (currentParagraph.length > 0) {
children.push(new Paragraph({
children: [new TextRun(currentParagraph.join(' '))],
spacing: { after: 200 },
}));
currentParagraph = [];
}
children.push(new Paragraph({
text: line.substring(4),
heading: HeadingLevel.HEADING_3,
spacing: { before: 160, after: 80 },
}));
} else if (line.trim() === '') {
// Empty line - paragraph break
if (currentParagraph.length > 0) {
children.push(new Paragraph({
children: [new TextRun(currentParagraph.join(' '))],
spacing: { after: 200 },
}));
currentParagraph = [];
}
} else {
// Regular text
currentParagraph.push(line);
}
}
// Add remaining paragraph
if (currentParagraph.length > 0) {
children.push(new Paragraph({
children: [new TextRun(currentParagraph.join(' '))],
spacing: { after: 200 },
}));
}
if (onProgress) onProgress(60);
const doc = new Document({
sections: [
{
properties: {},
children,
},
],
});
if (onProgress) onProgress(80);
const blob = await Packer.toBlob(doc);
if (onProgress) onProgress(100);
return blob;
}
/**
* Convert plain text to DOCX
*/
export async function textToDOCX(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
if (onProgress) onProgress(10);
const text = await file.text();
if (onProgress) onProgress(20);
const blob = await createDOCXFromText(text, (progress) => {
if (onProgress) onProgress(20 + progress * 0.8);
});
return {
success: true,
blob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[DOCX Converter] Text to DOCX error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to convert text to DOCX',
duration: Date.now() - startTime,
};
}
}
/**
* Convert Markdown to DOCX
*/
export async function markdownToDOCX(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
if (onProgress) onProgress(10);
const markdown = await file.text();
if (onProgress) onProgress(20);
const blob = await createDOCXFromMarkdown(markdown, (progress) => {
if (onProgress) onProgress(20 + progress * 0.8);
});
return {
success: true,
blob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[DOCX Converter] Markdown to DOCX error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to convert Markdown to DOCX',
duration: Date.now() - startTime,
};
}
}
/**
* Convert HTML to DOCX
*/
export async function htmlToDOCX(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
if (onProgress) onProgress(10);
const html = await file.text();
if (onProgress) onProgress(20);
// Strip HTML tags to get plain text
const text = html
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '')
.replace(/<[^>]*>/g, ' ')
.replace(/&nbsp;/g, ' ')
.replace(/&quot;/g, '"')
.replace(/&apos;/g, "'")
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
.replace(/\s+/g, ' ')
.trim();
if (onProgress) onProgress(50);
const blob = await createDOCXFromText(text, (progress) => {
if (onProgress) onProgress(50 + progress * 0.5);
});
return {
success: true,
blob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[DOCX Converter] HTML to DOCX error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to convert HTML to DOCX',
duration: Date.now() - startTime,
};
}
}

View File

@@ -1,379 +0,0 @@
import { marked } from 'marked';
import TurndownService from 'turndown';
import type { ConversionOptions, ProgressCallback, ConversionResult } from '@/types/conversion';
import {
pdfToText,
pdfToMarkdown,
markdownToPDF,
htmlToPDF,
plainTextToPDF,
} from './pdfService';
import {
docxToText,
docxToHTML,
docxToMarkdown,
textToDOCX,
markdownToDOCX,
htmlToDOCX,
} from './docxService';
// Import DOMPurify only on client side
let DOMPurify: any;
if (typeof window !== 'undefined') {
DOMPurify = require('dompurify');
}
/**
* Convert document using Markdown/HTML converters
* Note: This uses lightweight JS libraries instead of Pandoc WASM (which isn't widely available)
*/
export async function convertWithPandoc(
file: File,
outputFormat: string,
options: ConversionOptions = {},
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
if (onProgress) onProgress(10);
// Read file content as text
const text = await file.text();
if (onProgress) onProgress(30);
// Detect input format from file extension or content
const inputExt = file.name.split('.').pop()?.toLowerCase();
let result: string;
if (onProgress) onProgress(50);
// Handle PDF conversions
if (inputExt === 'pdf') {
// PDF input
if (outputFormat === 'txt') {
return await pdfToText(file, onProgress);
} else if (outputFormat === 'md' || outputFormat === 'markdown') {
return await pdfToMarkdown(file, onProgress);
} else {
throw new Error(`Conversion from PDF to ${outputFormat} not supported`);
}
}
// Handle DOCX conversions
if (inputExt === 'docx') {
// DOCX input
if (outputFormat === 'txt') {
return await docxToText(file, onProgress);
} else if (outputFormat === 'html') {
return await docxToHTML(file, onProgress);
} else if (outputFormat === 'md' || outputFormat === 'markdown') {
return await docxToMarkdown(file, onProgress);
} else {
throw new Error(`Conversion from DOCX to ${outputFormat} not supported`);
}
}
// Handle conversions TO PDF
if (outputFormat === 'pdf') {
if (inputExt === 'md' || inputExt === 'markdown') {
return await markdownToPDF(file, onProgress);
} else if (inputExt === 'html' || inputExt === 'htm') {
return await htmlToPDF(file, onProgress);
} else if (inputExt === 'txt') {
return await plainTextToPDF(file, onProgress);
} else {
throw new Error(`Conversion from ${inputExt} to PDF not supported`);
}
}
// Handle conversions TO DOCX
if (outputFormat === 'docx') {
if (inputExt === 'md' || inputExt === 'markdown') {
return await markdownToDOCX(file, onProgress);
} else if (inputExt === 'html' || inputExt === 'htm') {
return await htmlToDOCX(file, onProgress);
} else if (inputExt === 'txt') {
return await textToDOCX(file, onProgress);
} else {
throw new Error(`Conversion from ${inputExt} to DOCX not supported`);
}
}
// Perform conversion based on input and output formats
if (inputExt === 'md' || inputExt === 'markdown') {
// Markdown input
if (outputFormat === 'html') {
result = await markdownToHtml(text);
} else if (outputFormat === 'txt') {
result = markdownToText(text);
} else {
throw new Error(`Conversion from Markdown to ${outputFormat} not supported`);
}
} else if (inputExt === 'html' || inputExt === 'htm') {
// HTML input
if (outputFormat === 'md' || outputFormat === 'markdown') {
result = await htmlToMarkdown(text);
} else if (outputFormat === 'txt') {
result = htmlToText(text);
} else {
throw new Error(`Conversion from HTML to ${outputFormat} not supported`);
}
} else if (inputExt === 'txt') {
// Plain text input
if (outputFormat === 'md' || outputFormat === 'markdown') {
result = textToMarkdown(text);
} else if (outputFormat === 'html') {
result = textToHtml(text);
} else {
throw new Error(`Conversion from TXT to ${outputFormat} not supported`);
}
} else {
throw new Error(`Input format ${inputExt} not supported`);
}
if (onProgress) onProgress(90);
// Create blob from result
const blob = new Blob([result], { type: getMimeType(outputFormat) });
if (onProgress) onProgress(100);
const duration = Date.now() - startTime;
return {
success: true,
blob,
duration,
};
} catch (error) {
console.error('[Document Converter] Conversion error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Unknown conversion error',
duration: Date.now() - startTime,
};
}
}
/**
* Convert Markdown to HTML
*/
async function markdownToHtml(markdown: string): Promise<string> {
// Configure marked options
marked.setOptions({
gfm: true, // GitHub Flavored Markdown
breaks: true, // Convert \n to <br>
});
const html = await marked.parse(markdown);
// Sanitize HTML for security
const sanitized = DOMPurify.sanitize(html);
// Wrap in basic HTML document
return `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Converted Document</title>
<style>
body {
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
line-height: 1.6;
max-width: 800px;
margin: 2rem auto;
padding: 0 1rem;
color: #333;
}
pre {
background: #f4f4f4;
border: 1px solid #ddd;
border-radius: 4px;
padding: 1rem;
overflow-x: auto;
}
code {
background: #f4f4f4;
padding: 0.2rem 0.4rem;
border-radius: 3px;
font-family: 'Courier New', monospace;
}
blockquote {
border-left: 4px solid #ddd;
margin: 1rem 0;
padding-left: 1rem;
color: #666;
}
table {
border-collapse: collapse;
width: 100%;
margin: 1rem 0;
}
th, td {
border: 1px solid #ddd;
padding: 0.5rem;
text-align: left;
}
th {
background: #f4f4f4;
}
</style>
</head>
<body>
${sanitized}
</body>
</html>`;
}
/**
* Convert HTML to Markdown
*/
async function htmlToMarkdown(html: string): Promise<string> {
// Sanitize HTML first
const sanitized = DOMPurify.sanitize(html);
// Configure TurndownService
const turndownService = new TurndownService({
headingStyle: 'atx', // Use # for headings
codeBlockStyle: 'fenced', // Use ``` for code blocks
bulletListMarker: '-', // Use - for bullet lists
});
const markdown = turndownService.turndown(sanitized);
return markdown;
}
/**
* Convert Markdown to plain text (strip formatting)
*/
function markdownToText(markdown: string): string {
// Remove markdown syntax
let text = markdown
// Remove headers
.replace(/^#{1,6}\s+/gm, '')
// Remove bold/italic
.replace(/(\*\*|__)(.*?)\1/g, '$2')
.replace(/(\*|_)(.*?)\1/g, '$2')
// Remove links
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
// Remove images
.replace(/!\[([^\]]*)\]\([^)]+\)/g, '$1')
// Remove code blocks
.replace(/```[\s\S]*?```/g, '')
// Remove inline code
.replace(/`([^`]+)`/g, '$1')
// Remove blockquotes
.replace(/^>\s+/gm, '')
// Remove horizontal rules
.replace(/^-{3,}$/gm, '')
// Clean up multiple newlines
.replace(/\n{3,}/g, '\n\n');
return text.trim();
}
/**
* Convert HTML to plain text
*/
function htmlToText(html: string): string {
// Sanitize HTML first
const sanitized = DOMPurify.sanitize(html, { ALLOWED_TAGS: [] });
// Clean up whitespace
return sanitized
.replace(/\s+/g, ' ')
.trim();
}
/**
* Convert plain text to Markdown
*/
function textToMarkdown(text: string): string {
// Add basic markdown formatting
// Treat lines as paragraphs
return text
.split('\n\n')
.filter(p => p.trim())
.join('\n\n');
}
/**
* Convert plain text to HTML
*/
function textToHtml(text: string): string {
// Escape HTML entities
const escaped = text
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#039;');
// Convert newlines to paragraphs
const paragraphs = escaped
.split('\n\n')
.filter(p => p.trim())
.map(p => ` <p>${p.replace(/\n/g, '<br>')}</p>`)
.join('\n');
return `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Converted Document</title>
<style>
body {
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
line-height: 1.6;
max-width: 800px;
margin: 2rem auto;
padding: 0 1rem;
color: #333;
}
</style>
</head>
<body>
${paragraphs}
</body>
</html>`;
}
/**
* Get MIME type for output format
*/
function getMimeType(format: string): string {
const mimeTypes: Record<string, string> = {
html: 'text/html',
htm: 'text/html',
md: 'text/markdown',
markdown: 'text/markdown',
txt: 'text/plain',
};
return mimeTypes[format.toLowerCase()] || 'text/plain';
}
/**
* Convert Markdown to HTML (convenience function)
*/
export async function markdownToHtmlFile(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
return convertWithPandoc(file, 'html', {}, onProgress);
}
/**
* Convert HTML to Markdown (convenience function)
*/
export async function htmlToMarkdownFile(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
return convertWithPandoc(file, 'md', {}, onProgress);
}

View File

@@ -1,334 +0,0 @@
import { jsPDF } from 'jspdf';
import type { ConversionOptions, ProgressCallback, ConversionResult } from '@/types/conversion';
/**
* Extract text from PDF file
*/
export async function extractTextFromPDF(file: File, onProgress?: ProgressCallback): Promise<string> {
if (onProgress) onProgress(10);
// Dynamically import pdfjs-dist (client-side only)
const pdfjsLib = await import('pdfjs-dist');
// Set worker source
pdfjsLib.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjsLib.version}/pdf.worker.min.mjs`;
if (onProgress) onProgress(20);
// Read file as ArrayBuffer
const arrayBuffer = await file.arrayBuffer();
if (onProgress) onProgress(30);
// Load PDF document
const loadingTask = pdfjsLib.getDocument({ data: arrayBuffer });
const pdf = await loadingTask.promise;
if (onProgress) onProgress(50);
const numPages = pdf.numPages;
let fullText = '';
// Extract text from each page
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
const page = await pdf.getPage(pageNum);
const textContent = await page.getTextContent();
// Combine text items
const pageText = textContent.items
.map((item: any) => item.str)
.join(' ');
fullText += pageText + '\n\n';
// Update progress
if (onProgress) {
const progress = 50 + (pageNum / numPages) * 40;
onProgress(Math.round(progress));
}
}
if (onProgress) onProgress(100);
return fullText.trim();
}
/**
* Convert PDF to text
*/
export async function pdfToText(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
const text = await extractTextFromPDF(file, onProgress);
const blob = new Blob([text], { type: 'text/plain' });
return {
success: true,
blob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[PDF Converter] PDF to text error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to extract text from PDF',
duration: Date.now() - startTime,
};
}
}
/**
* Convert PDF to Markdown
*/
export async function pdfToMarkdown(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
const text = await extractTextFromPDF(file, (progress) => {
if (onProgress) onProgress(progress * 0.9); // Use 90% for extraction
});
// Basic text to markdown conversion (paragraphs)
const markdown = text
.split('\n\n')
.filter(p => p.trim())
.join('\n\n');
if (onProgress) onProgress(100);
const blob = new Blob([markdown], { type: 'text/markdown' });
return {
success: true,
blob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[PDF Converter] PDF to markdown error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to convert PDF to Markdown',
duration: Date.now() - startTime,
};
}
}
/**
* Convert text to PDF
*/
export async function textToPDF(
text: string,
filename: string = 'document.pdf',
onProgress?: ProgressCallback
): Promise<Blob> {
if (onProgress) onProgress(20);
const doc = new jsPDF({
orientation: 'portrait',
unit: 'mm',
format: 'a4',
});
if (onProgress) onProgress(40);
// Set font and size
doc.setFont('helvetica');
doc.setFontSize(12);
// Page dimensions
const pageWidth = doc.internal.pageSize.getWidth();
const pageHeight = doc.internal.pageSize.getHeight();
const margin = 20;
const maxWidth = pageWidth - 2 * margin;
const lineHeight = 7;
let y = margin;
if (onProgress) onProgress(60);
// Split text into lines
const lines = doc.splitTextToSize(text, maxWidth);
// Add lines to PDF
for (let i = 0; i < lines.length; i++) {
// Check if we need a new page
if (y + lineHeight > pageHeight - margin) {
doc.addPage();
y = margin;
}
doc.text(lines[i], margin, y);
y += lineHeight;
// Update progress
if (onProgress && i % 10 === 0) {
const progress = 60 + (i / lines.length) * 30;
onProgress(Math.round(progress));
}
}
if (onProgress) onProgress(90);
// Generate PDF blob
const pdfBlob = doc.output('blob');
if (onProgress) onProgress(100);
return pdfBlob;
}
/**
* Convert Markdown to PDF
*/
export async function markdownToPDF(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
if (onProgress) onProgress(10);
// Read markdown content
const markdown = await file.text();
if (onProgress) onProgress(20);
// Import marked for markdown parsing
const { marked } = await import('marked');
// Parse markdown to HTML
const html = await marked.parse(markdown);
if (onProgress) onProgress(40);
// Strip HTML tags for plain text
const text = html
.replace(/<[^>]*>/g, '')
.replace(/&nbsp;/g, ' ')
.replace(/&quot;/g, '"')
.replace(/&apos;/g, "'")
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&');
if (onProgress) onProgress(60);
// Generate PDF
const pdfBlob = await textToPDF(text, file.name.replace(/\.md$/, '.pdf'), (progress) => {
if (onProgress) onProgress(60 + progress * 0.4);
});
return {
success: true,
blob: pdfBlob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[PDF Converter] Markdown to PDF error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to convert Markdown to PDF',
duration: Date.now() - startTime,
};
}
}
/**
* Convert HTML to PDF
*/
export async function htmlToPDF(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
if (onProgress) onProgress(10);
// Read HTML content
const html = await file.text();
if (onProgress) onProgress(30);
// Strip HTML tags for plain text
const text = html
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '')
.replace(/<[^>]*>/g, ' ')
.replace(/&nbsp;/g, ' ')
.replace(/&quot;/g, '"')
.replace(/&apos;/g, "'")
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
.replace(/\s+/g, ' ')
.trim();
if (onProgress) onProgress(50);
// Generate PDF
const pdfBlob = await textToPDF(text, file.name.replace(/\.html?$/, '.pdf'), (progress) => {
if (onProgress) onProgress(50 + progress * 0.5);
});
return {
success: true,
blob: pdfBlob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[PDF Converter] HTML to PDF error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to convert HTML to PDF',
duration: Date.now() - startTime,
};
}
}
/**
* Convert plain text to PDF
*/
export async function plainTextToPDF(
file: File,
onProgress?: ProgressCallback
): Promise<ConversionResult> {
const startTime = Date.now();
try {
if (onProgress) onProgress(10);
const text = await file.text();
if (onProgress) onProgress(30);
const pdfBlob = await textToPDF(text, file.name.replace(/\.txt$/, '.pdf'), (progress) => {
if (onProgress) onProgress(30 + progress * 0.7);
});
return {
success: true,
blob: pdfBlob,
duration: Date.now() - startTime,
};
} catch (error) {
console.error('[PDF Converter] Text to PDF error:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Failed to convert text to PDF',
duration: Date.now() - startTime,
};
}
}

View File

@@ -162,53 +162,6 @@ export const SUPPORTED_FORMATS: ConversionFormat[] = [
converter: 'imagemagick',
description: 'Scalable Vector Graphics',
},
// Document formats (Pandoc - future implementation)
{
id: 'pdf',
name: 'PDF',
extension: 'pdf',
mimeType: 'application/pdf',
category: 'document',
converter: 'pandoc',
description: 'Portable Document Format',
},
{
id: 'docx',
name: 'DOCX',
extension: 'docx',
mimeType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
category: 'document',
converter: 'pandoc',
description: 'Microsoft Word document',
},
{
id: 'markdown',
name: 'Markdown',
extension: 'md',
mimeType: 'text/markdown',
category: 'document',
converter: 'pandoc',
description: 'Markdown text',
},
{
id: 'html',
name: 'HTML',
extension: 'html',
mimeType: 'text/html',
category: 'document',
converter: 'pandoc',
description: 'HyperText Markup Language',
},
{
id: 'txt',
name: 'Plain Text',
extension: 'txt',
mimeType: 'text/plain',
category: 'document',
converter: 'pandoc',
description: 'Plain text file',
},
];
/**

View File

@@ -7,7 +7,6 @@ import type { ConverterEngine, WASMModuleState } from '@/types/conversion';
const moduleState: WASMModuleState = {
ffmpeg: false,
imagemagick: false,
pandoc: false,
};
/**
@@ -15,7 +14,6 @@ const moduleState: WASMModuleState = {
*/
let ffmpegInstance: FFmpeg | null = null;
let imagemagickInstance: any = null;
let pandocInstance: any = null;
/**
* Load FFmpeg WASM module
@@ -75,33 +73,6 @@ export async function loadImageMagick(): Promise<any> {
}
}
/**
* Load Pandoc converter (uses pure JavaScript libraries, not WASM)
* Note: We use marked + turndown instead of actual Pandoc WASM
*/
export async function loadPandoc(): Promise<any> {
if (pandocInstance && moduleState.pandoc) {
return pandocInstance;
}
try {
// Import the converter libraries
const [marked, turndown] = await Promise.all([
import('marked'),
import('turndown'),
]);
pandocInstance = { marked, turndown };
moduleState.pandoc = true;
console.log('Document converter loaded successfully');
return pandocInstance;
} catch (error) {
console.error('Failed to load document converter:', error);
throw new Error('Failed to load document converter');
}
}
/**
* Get loaded module state
*/
@@ -125,8 +96,6 @@ export async function loadModule(engine: ConverterEngine): Promise<any> {
return loadFFmpeg();
case 'imagemagick':
return loadImageMagick();
case 'pandoc':
return loadPandoc();
default:
throw new Error(`Unknown converter engine: ${engine}`);
}
@@ -148,10 +117,5 @@ export function unloadAll(): void {
moduleState.imagemagick = false;
}
if (pandocInstance) {
pandocInstance = null;
moduleState.pandoc = false;
}
console.log('All WASM modules unloaded');
}