From 2abcda73e82b6db3b87694bff85abba8a01d7f07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kr=C3=BCger?= Date: Sun, 3 May 2026 19:08:35 +0200 Subject: [PATCH] Add Ghostscript post-processing to pdf.js for KDP compliance - Puppeteer writes to a temp kaleidoskop-raw.pdf - Ghostscript re-processes with -dPDFSETTINGS=/prepress to fully embed all fonts and prevent subsetting (KDP checklist item 6) - PDF info dict is written via PostScript pdfmarks, with title/author/ subject encoded as UTF-16BE hex to support German umlauts (item 13) - File size drops from ~108 MB to ~9 MB (placeholder art; will grow with real 300 DPI illustrations but stays well under KDP's 650 MB limit) - GS failure falls back gracefully to the raw Puppeteer PDF Co-Authored-By: Claude Sonnet 4.6 --- scripts/pdf.js | 97 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 87 insertions(+), 10 deletions(-) diff --git a/scripts/pdf.js b/scripts/pdf.js index b5b810a..ce03c65 100644 --- a/scripts/pdf.js +++ b/scripts/pdf.js @@ -1,16 +1,76 @@ import puppeteer from 'puppeteer'; import { resolve, dirname } from 'path'; import { fileURLToPath } from 'url'; -import { access } from 'fs/promises'; +import { access, stat, readFile, writeFile, rename, unlink } from 'fs/promises'; +import { execFile } from 'child_process'; +import { promisify } from 'util'; +import matter from 'gray-matter'; +const execFileAsync = promisify(execFile); const __dir = dirname(fileURLToPath(import.meta.url)); +const root = resolve(__dir, '..'); +const inputPath = resolve(root, 'output', 'book.html'); +const rawPath = resolve(root, 'output', 'kaleidoskop-raw.pdf'); +const marksPath = resolve(root, 'output', 'pdfmarks.ps'); +const outputPath = resolve(root, 'output', 'kaleidoskop.pdf'); async function fileExists(path) { try { await access(path); return true; } catch { return false; } } -const root = resolve(__dir, '..'); -const inputPath = resolve(root, 'output', 'book.html'); -const outputPath = resolve(root, 'output', 'kaleidoskop.pdf'); + +// Encode a JS string as UTF-16BE hex for PostScript pdfmarks (handles umlauts etc.) +function toUtf16BeHex(str) { + const hex = []; + for (let i = 0; i < str.length; i++) { + const cp = str.charCodeAt(i); + hex.push(((cp >> 8) & 0xff).toString(16).padStart(2, '0')); + hex.push((cp & 0xff).toString(16).padStart(2, '0')); + } + return 'FEFF' + hex.join('').toUpperCase(); +} + +function buildPdfmarks({ title, author, subject }) { + return [ + '[ /Title <' + toUtf16BeHex(title) + '>', + ' /Author <' + toUtf16BeHex(author) + '>', + ' /Subject <' + toUtf16BeHex(subject) + '>', + ' /Creator (Puppeteer + Ghostscript)', + ' /Producer (kaleidoskop build pipeline)', + ' /DOCINFO pdfmark', + ].join('\n'); +} + +async function runGhostscript({ title, author, subject }) { + const marks = buildPdfmarks({ title, author, subject }); + await writeFile(marksPath, marks, 'utf-8'); + + const gs = 'gs'; + const args = [ + '-dBATCH', '-dNOPAUSE', '-dQUIET', + '-sDEVICE=pdfwrite', + '-dCompatibilityLevel=1.4', + '-dPDFSETTINGS=/prepress', + '-dEmbedAllFonts=true', + '-dSubsetFonts=false', + '-dDownsampleColorImages=false', + '-dDownsampleGrayImages=false', + '-dDownsampleMonoImages=false', + `-sOutputFile=${outputPath}`, + marksPath, + rawPath, + ]; + + try { + await execFileAsync(gs, args); + } catch (err) { + console.error('Ghostscript failed:', err.message); + // Fall back to raw PDF so the build still produces output + await rename(rawPath, outputPath); + console.warn('Falling back to raw Puppeteer PDF (no GS post-processing).'); + return false; + } + return true; +} async function generate() { try { @@ -20,9 +80,13 @@ async function generate() { process.exit(1); } - // On ARM64 (e.g. WSL2 on Apple Silicon / Raspberry Pi), Puppeteer's bundled - // Chrome is x86-64 and won't run. Use the system Chromium instead: - // sudo apt-get install -y chromium + // Read metadata from front-matter for PDF info dict + const fmRaw = await readFile(resolve(root, 'content', '00-front-matter.md'), 'utf-8'); + const { data: fm } = matter(fmRaw); + const title = fm.title || 'Das Kaleidoskop der Schlummerwelten'; + const author = fm.author || ''; + const subject = fm.subtitle || 'Kinderbuch'; + const systemChromium = '/usr/bin/chromium'; const useSystem = await fileExists(systemChromium); @@ -35,9 +99,9 @@ async function generate() { await page.goto(`file://${inputPath}`, { waitUntil: 'networkidle0' }); - // 8.75 × 8.75 inches = trim (8.5×8.5) + 0.125in bleed on each side + // Write to temp file first so Ghostscript can read it await page.pdf({ - path: outputPath, + path: rawPath, width: '8.75in', height: '8.75in', printBackground: true, @@ -45,7 +109,20 @@ async function generate() { }); await browser.close(); - console.log(`PDF written to output/kaleidoskop.pdf`); + + const rawSize = (await stat(rawPath)).size; + console.log(`Raw PDF: ${(rawSize / 1_048_576).toFixed(1)} MB`); + + console.log('Running Ghostscript (font embedding + metadata)…'); + const gsOk = await runGhostscript({ title, author, subject }); + + if (gsOk) { + const finalSize = (await stat(outputPath)).size; + console.log(`Final PDF: ${(finalSize / 1_048_576).toFixed(1)} MB`); + await unlink(rawPath).catch(() => {}); + await unlink(marksPath).catch(() => {}); + console.log('PDF written to output/kaleidoskop.pdf (fonts embedded, metadata set)'); + } } generate().catch(err => { console.error(err); process.exit(1); });