feat: add document conversion support (Markdown, HTML, Plain Text)
- Add marked for Markdown to HTML conversion with GFM support - Add turndown for HTML to Markdown conversion - Add DOMPurify for HTML sanitization (security) - Support Markdown ↔ HTML ↔ Plain Text conversions - Add styled HTML output with responsive design - Use client-side only DOMPurify to fix SSR issues Supported conversions: - Markdown → HTML (with code syntax, tables, blockquotes) - HTML → Markdown (clean formatting preservation) - Markdown/HTML → Plain Text (strip formatting) - Plain Text → HTML/Markdown (basic formatting) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +1,16 @@
|
||||
import { marked } from 'marked';
|
||||
import TurndownService from 'turndown';
|
||||
import type { ConversionOptions, ProgressCallback, ConversionResult } from '@/types/conversion';
|
||||
|
||||
// Import DOMPurify only on client side
|
||||
let DOMPurify: any;
|
||||
if (typeof window !== 'undefined') {
|
||||
DOMPurify = require('dompurify');
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert document using Pandoc (placeholder - not yet implemented)
|
||||
* Convert document using Markdown/HTML converters
|
||||
* Note: This uses lightweight JS libraries instead of Pandoc WASM (which isn't widely available)
|
||||
*/
|
||||
export async function convertWithPandoc(
|
||||
file: File,
|
||||
@@ -9,21 +18,283 @@ export async function convertWithPandoc(
|
||||
options: ConversionOptions = {},
|
||||
onProgress?: ProgressCallback
|
||||
): Promise<ConversionResult> {
|
||||
// TODO: Implement Pandoc WASM conversion when available
|
||||
// For now, return an error
|
||||
const startTime = Date.now();
|
||||
|
||||
if (onProgress) onProgress(0);
|
||||
try {
|
||||
if (onProgress) onProgress(10);
|
||||
|
||||
return {
|
||||
success: false,
|
||||
error: 'Pandoc WASM converter is not yet implemented. Document conversion coming soon!',
|
||||
};
|
||||
// Read file content as text
|
||||
const text = await file.text();
|
||||
|
||||
if (onProgress) onProgress(30);
|
||||
|
||||
// Detect input format from file extension or content
|
||||
const inputExt = file.name.split('.').pop()?.toLowerCase();
|
||||
let result: string;
|
||||
|
||||
if (onProgress) onProgress(50);
|
||||
|
||||
// Perform conversion based on input and output formats
|
||||
if (inputExt === 'md' || inputExt === 'markdown') {
|
||||
// Markdown input
|
||||
if (outputFormat === 'html') {
|
||||
result = await markdownToHtml(text);
|
||||
} else if (outputFormat === 'txt') {
|
||||
result = markdownToText(text);
|
||||
} else {
|
||||
throw new Error(`Conversion from Markdown to ${outputFormat} not supported`);
|
||||
}
|
||||
} else if (inputExt === 'html' || inputExt === 'htm') {
|
||||
// HTML input
|
||||
if (outputFormat === 'md' || outputFormat === 'markdown') {
|
||||
result = await htmlToMarkdown(text);
|
||||
} else if (outputFormat === 'txt') {
|
||||
result = htmlToText(text);
|
||||
} else {
|
||||
throw new Error(`Conversion from HTML to ${outputFormat} not supported`);
|
||||
}
|
||||
} else if (inputExt === 'txt') {
|
||||
// Plain text input
|
||||
if (outputFormat === 'md' || outputFormat === 'markdown') {
|
||||
result = textToMarkdown(text);
|
||||
} else if (outputFormat === 'html') {
|
||||
result = textToHtml(text);
|
||||
} else {
|
||||
throw new Error(`Conversion from TXT to ${outputFormat} not supported`);
|
||||
}
|
||||
} else {
|
||||
throw new Error(`Input format ${inputExt} not supported`);
|
||||
}
|
||||
|
||||
if (onProgress) onProgress(90);
|
||||
|
||||
// Create blob from result
|
||||
const blob = new Blob([result], { type: getMimeType(outputFormat) });
|
||||
|
||||
if (onProgress) onProgress(100);
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
return {
|
||||
success: true,
|
||||
blob,
|
||||
duration,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('[Document Converter] Conversion error:', error);
|
||||
|
||||
return {
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : 'Unknown conversion error',
|
||||
duration: Date.now() - startTime,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Markdown to HTML (placeholder)
|
||||
* Convert Markdown to HTML
|
||||
*/
|
||||
export async function markdownToHtml(
|
||||
async function markdownToHtml(markdown: string): Promise<string> {
|
||||
// Configure marked options
|
||||
marked.setOptions({
|
||||
gfm: true, // GitHub Flavored Markdown
|
||||
breaks: true, // Convert \n to <br>
|
||||
});
|
||||
|
||||
const html = await marked.parse(markdown);
|
||||
|
||||
// Sanitize HTML for security
|
||||
const sanitized = DOMPurify.sanitize(html);
|
||||
|
||||
// Wrap in basic HTML document
|
||||
return `<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Converted Document</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||
line-height: 1.6;
|
||||
max-width: 800px;
|
||||
margin: 2rem auto;
|
||||
padding: 0 1rem;
|
||||
color: #333;
|
||||
}
|
||||
pre {
|
||||
background: #f4f4f4;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 4px;
|
||||
padding: 1rem;
|
||||
overflow-x: auto;
|
||||
}
|
||||
code {
|
||||
background: #f4f4f4;
|
||||
padding: 0.2rem 0.4rem;
|
||||
border-radius: 3px;
|
||||
font-family: 'Courier New', monospace;
|
||||
}
|
||||
blockquote {
|
||||
border-left: 4px solid #ddd;
|
||||
margin: 1rem 0;
|
||||
padding-left: 1rem;
|
||||
color: #666;
|
||||
}
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
width: 100%;
|
||||
margin: 1rem 0;
|
||||
}
|
||||
th, td {
|
||||
border: 1px solid #ddd;
|
||||
padding: 0.5rem;
|
||||
text-align: left;
|
||||
}
|
||||
th {
|
||||
background: #f4f4f4;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
${sanitized}
|
||||
</body>
|
||||
</html>`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert HTML to Markdown
|
||||
*/
|
||||
async function htmlToMarkdown(html: string): Promise<string> {
|
||||
// Sanitize HTML first
|
||||
const sanitized = DOMPurify.sanitize(html);
|
||||
|
||||
// Configure TurndownService
|
||||
const turndownService = new TurndownService({
|
||||
headingStyle: 'atx', // Use # for headings
|
||||
codeBlockStyle: 'fenced', // Use ``` for code blocks
|
||||
bulletListMarker: '-', // Use - for bullet lists
|
||||
});
|
||||
|
||||
const markdown = turndownService.turndown(sanitized);
|
||||
return markdown;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Markdown to plain text (strip formatting)
|
||||
*/
|
||||
function markdownToText(markdown: string): string {
|
||||
// Remove markdown syntax
|
||||
let text = markdown
|
||||
// Remove headers
|
||||
.replace(/^#{1,6}\s+/gm, '')
|
||||
// Remove bold/italic
|
||||
.replace(/(\*\*|__)(.*?)\1/g, '$2')
|
||||
.replace(/(\*|_)(.*?)\1/g, '$2')
|
||||
// Remove links
|
||||
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
|
||||
// Remove images
|
||||
.replace(/!\[([^\]]*)\]\([^)]+\)/g, '$1')
|
||||
// Remove code blocks
|
||||
.replace(/```[\s\S]*?```/g, '')
|
||||
// Remove inline code
|
||||
.replace(/`([^`]+)`/g, '$1')
|
||||
// Remove blockquotes
|
||||
.replace(/^>\s+/gm, '')
|
||||
// Remove horizontal rules
|
||||
.replace(/^-{3,}$/gm, '')
|
||||
// Clean up multiple newlines
|
||||
.replace(/\n{3,}/g, '\n\n');
|
||||
|
||||
return text.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert HTML to plain text
|
||||
*/
|
||||
function htmlToText(html: string): string {
|
||||
// Sanitize HTML first
|
||||
const sanitized = DOMPurify.sanitize(html, { ALLOWED_TAGS: [] });
|
||||
|
||||
// Clean up whitespace
|
||||
return sanitized
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert plain text to Markdown
|
||||
*/
|
||||
function textToMarkdown(text: string): string {
|
||||
// Add basic markdown formatting
|
||||
// Treat lines as paragraphs
|
||||
return text
|
||||
.split('\n\n')
|
||||
.filter(p => p.trim())
|
||||
.join('\n\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert plain text to HTML
|
||||
*/
|
||||
function textToHtml(text: string): string {
|
||||
// Escape HTML entities
|
||||
const escaped = text
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, ''');
|
||||
|
||||
// Convert newlines to paragraphs
|
||||
const paragraphs = escaped
|
||||
.split('\n\n')
|
||||
.filter(p => p.trim())
|
||||
.map(p => ` <p>${p.replace(/\n/g, '<br>')}</p>`)
|
||||
.join('\n');
|
||||
|
||||
return `<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Converted Document</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||
line-height: 1.6;
|
||||
max-width: 800px;
|
||||
margin: 2rem auto;
|
||||
padding: 0 1rem;
|
||||
color: #333;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
${paragraphs}
|
||||
</body>
|
||||
</html>`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get MIME type for output format
|
||||
*/
|
||||
function getMimeType(format: string): string {
|
||||
const mimeTypes: Record<string, string> = {
|
||||
html: 'text/html',
|
||||
htm: 'text/html',
|
||||
md: 'text/markdown',
|
||||
markdown: 'text/markdown',
|
||||
txt: 'text/plain',
|
||||
};
|
||||
|
||||
return mimeTypes[format.toLowerCase()] || 'text/plain';
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Markdown to HTML (convenience function)
|
||||
*/
|
||||
export async function markdownToHtmlFile(
|
||||
file: File,
|
||||
onProgress?: ProgressCallback
|
||||
): Promise<ConversionResult> {
|
||||
@@ -31,9 +302,9 @@ export async function markdownToHtml(
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert HTML to Markdown (placeholder)
|
||||
* Convert HTML to Markdown (convenience function)
|
||||
*/
|
||||
export async function htmlToMarkdown(
|
||||
export async function htmlToMarkdownFile(
|
||||
file: File,
|
||||
onProgress?: ProgressCallback
|
||||
): Promise<ConversionResult> {
|
||||
|
||||
Reference in New Issue
Block a user