feat: add comprehensive PDF support

- Add jsPDF for PDF generation from text/Markdown/HTML
- Add PDF.js for PDF text extraction (read PDFs)
- Support PDF → Text/Markdown conversions
- Support Markdown/HTML/Text → PDF conversions
- Implement page-by-page PDF text extraction
- Automatic pagination and formatting for generated PDFs

Supported PDF operations:
- Extract text from PDF files (all pages)
- Convert PDF to Markdown or plain text
- Create formatted PDFs from Markdown, HTML, or plain text
- Automatic text wrapping and page breaks

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-17 11:13:09 +01:00
parent 9de639b138
commit b899989b3e
5 changed files with 658 additions and 2 deletions

View File

@@ -1,6 +1,13 @@
import { marked } from 'marked';
import TurndownService from 'turndown';
import type { ConversionOptions, ProgressCallback, ConversionResult } from '@/types/conversion';
import {
pdfToText,
pdfToMarkdown,
markdownToPDF,
htmlToPDF,
plainTextToPDF,
} from './pdfService';
// Import DOMPurify only on client side
let DOMPurify: any;
@@ -34,6 +41,31 @@ export async function convertWithPandoc(
if (onProgress) onProgress(50);
// Handle PDF conversions
if (inputExt === 'pdf') {
// PDF input
if (outputFormat === 'txt') {
return await pdfToText(file, onProgress);
} else if (outputFormat === 'md' || outputFormat === 'markdown') {
return await pdfToMarkdown(file, onProgress);
} else {
throw new Error(`Conversion from PDF to ${outputFormat} not supported`);
}
}
// Handle conversions TO PDF
if (outputFormat === 'pdf') {
if (inputExt === 'md' || inputExt === 'markdown') {
return await markdownToPDF(file, onProgress);
} else if (inputExt === 'html' || inputExt === 'htm') {
return await htmlToPDF(file, onProgress);
} else if (inputExt === 'txt') {
return await plainTextToPDF(file, onProgress);
} else {
throw new Error(`Conversion from ${inputExt} to PDF not supported`);
}
}
// Perform conversion based on input and output formats
if (inputExt === 'md' || inputExt === 'markdown') {
// Markdown input