Add Ghostscript post-processing to pdf.js for KDP compliance
- Puppeteer writes to a temp kaleidoskop-raw.pdf - Ghostscript re-processes with -dPDFSETTINGS=/prepress to fully embed all fonts and prevent subsetting (KDP checklist item 6) - PDF info dict is written via PostScript pdfmarks, with title/author/ subject encoded as UTF-16BE hex to support German umlauts (item 13) - File size drops from ~108 MB to ~9 MB (placeholder art; will grow with real 300 DPI illustrations but stays well under KDP's 650 MB limit) - GS failure falls back gracefully to the raw Puppeteer PDF Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+87
-10
@@ -1,16 +1,76 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
import { resolve, dirname } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { access } from 'fs/promises';
|
||||
import { access, stat, readFile, writeFile, rename, unlink } from 'fs/promises';
|
||||
import { execFile } from 'child_process';
|
||||
import { promisify } from 'util';
|
||||
import matter from 'gray-matter';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
const __dir = dirname(fileURLToPath(import.meta.url));
|
||||
const root = resolve(__dir, '..');
|
||||
const inputPath = resolve(root, 'output', 'book.html');
|
||||
const rawPath = resolve(root, 'output', 'kaleidoskop-raw.pdf');
|
||||
const marksPath = resolve(root, 'output', 'pdfmarks.ps');
|
||||
const outputPath = resolve(root, 'output', 'kaleidoskop.pdf');
|
||||
|
||||
async function fileExists(path) {
|
||||
try { await access(path); return true; } catch { return false; }
|
||||
}
|
||||
const root = resolve(__dir, '..');
|
||||
const inputPath = resolve(root, 'output', 'book.html');
|
||||
const outputPath = resolve(root, 'output', 'kaleidoskop.pdf');
|
||||
|
||||
// Encode a JS string as UTF-16BE hex for PostScript pdfmarks (handles umlauts etc.)
|
||||
function toUtf16BeHex(str) {
|
||||
const hex = [];
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
const cp = str.charCodeAt(i);
|
||||
hex.push(((cp >> 8) & 0xff).toString(16).padStart(2, '0'));
|
||||
hex.push((cp & 0xff).toString(16).padStart(2, '0'));
|
||||
}
|
||||
return 'FEFF' + hex.join('').toUpperCase();
|
||||
}
|
||||
|
||||
function buildPdfmarks({ title, author, subject }) {
|
||||
return [
|
||||
'[ /Title <' + toUtf16BeHex(title) + '>',
|
||||
' /Author <' + toUtf16BeHex(author) + '>',
|
||||
' /Subject <' + toUtf16BeHex(subject) + '>',
|
||||
' /Creator (Puppeteer + Ghostscript)',
|
||||
' /Producer (kaleidoskop build pipeline)',
|
||||
' /DOCINFO pdfmark',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
async function runGhostscript({ title, author, subject }) {
|
||||
const marks = buildPdfmarks({ title, author, subject });
|
||||
await writeFile(marksPath, marks, 'utf-8');
|
||||
|
||||
const gs = 'gs';
|
||||
const args = [
|
||||
'-dBATCH', '-dNOPAUSE', '-dQUIET',
|
||||
'-sDEVICE=pdfwrite',
|
||||
'-dCompatibilityLevel=1.4',
|
||||
'-dPDFSETTINGS=/prepress',
|
||||
'-dEmbedAllFonts=true',
|
||||
'-dSubsetFonts=false',
|
||||
'-dDownsampleColorImages=false',
|
||||
'-dDownsampleGrayImages=false',
|
||||
'-dDownsampleMonoImages=false',
|
||||
`-sOutputFile=${outputPath}`,
|
||||
marksPath,
|
||||
rawPath,
|
||||
];
|
||||
|
||||
try {
|
||||
await execFileAsync(gs, args);
|
||||
} catch (err) {
|
||||
console.error('Ghostscript failed:', err.message);
|
||||
// Fall back to raw PDF so the build still produces output
|
||||
await rename(rawPath, outputPath);
|
||||
console.warn('Falling back to raw Puppeteer PDF (no GS post-processing).');
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
async function generate() {
|
||||
try {
|
||||
@@ -20,9 +80,13 @@ async function generate() {
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// On ARM64 (e.g. WSL2 on Apple Silicon / Raspberry Pi), Puppeteer's bundled
|
||||
// Chrome is x86-64 and won't run. Use the system Chromium instead:
|
||||
// sudo apt-get install -y chromium
|
||||
// Read metadata from front-matter for PDF info dict
|
||||
const fmRaw = await readFile(resolve(root, 'content', '00-front-matter.md'), 'utf-8');
|
||||
const { data: fm } = matter(fmRaw);
|
||||
const title = fm.title || 'Das Kaleidoskop der Schlummerwelten';
|
||||
const author = fm.author || '';
|
||||
const subject = fm.subtitle || 'Kinderbuch';
|
||||
|
||||
const systemChromium = '/usr/bin/chromium';
|
||||
const useSystem = await fileExists(systemChromium);
|
||||
|
||||
@@ -35,9 +99,9 @@ async function generate() {
|
||||
|
||||
await page.goto(`file://${inputPath}`, { waitUntil: 'networkidle0' });
|
||||
|
||||
// 8.75 × 8.75 inches = trim (8.5×8.5) + 0.125in bleed on each side
|
||||
// Write to temp file first so Ghostscript can read it
|
||||
await page.pdf({
|
||||
path: outputPath,
|
||||
path: rawPath,
|
||||
width: '8.75in',
|
||||
height: '8.75in',
|
||||
printBackground: true,
|
||||
@@ -45,7 +109,20 @@ async function generate() {
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
console.log(`PDF written to output/kaleidoskop.pdf`);
|
||||
|
||||
const rawSize = (await stat(rawPath)).size;
|
||||
console.log(`Raw PDF: ${(rawSize / 1_048_576).toFixed(1)} MB`);
|
||||
|
||||
console.log('Running Ghostscript (font embedding + metadata)…');
|
||||
const gsOk = await runGhostscript({ title, author, subject });
|
||||
|
||||
if (gsOk) {
|
||||
const finalSize = (await stat(outputPath)).size;
|
||||
console.log(`Final PDF: ${(finalSize / 1_048_576).toFixed(1)} MB`);
|
||||
await unlink(rawPath).catch(() => {});
|
||||
await unlink(marksPath).catch(() => {});
|
||||
console.log('PDF written to output/kaleidoskop.pdf (fonts embedded, metadata set)');
|
||||
}
|
||||
}
|
||||
|
||||
generate().catch(err => { console.error(err); process.exit(1); });
|
||||
|
||||
Reference in New Issue
Block a user