Add Ghostscript post-processing to pdf.js for KDP compliance
- Puppeteer writes to a temp kaleidoskop-raw.pdf - Ghostscript re-processes with -dPDFSETTINGS=/prepress to fully embed all fonts and prevent subsetting (KDP checklist item 6) - PDF info dict is written via PostScript pdfmarks, with title/author/ subject encoded as UTF-16BE hex to support German umlauts (item 13) - File size drops from ~108 MB to ~9 MB (placeholder art; will grow with real 300 DPI illustrations but stays well under KDP's 650 MB limit) - GS failure falls back gracefully to the raw Puppeteer PDF Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+87
-10
@@ -1,16 +1,76 @@
|
|||||||
import puppeteer from 'puppeteer';
|
import puppeteer from 'puppeteer';
|
||||||
import { resolve, dirname } from 'path';
|
import { resolve, dirname } from 'path';
|
||||||
import { fileURLToPath } from 'url';
|
import { fileURLToPath } from 'url';
|
||||||
import { access } from 'fs/promises';
|
import { access, stat, readFile, writeFile, rename, unlink } from 'fs/promises';
|
||||||
|
import { execFile } from 'child_process';
|
||||||
|
import { promisify } from 'util';
|
||||||
|
import matter from 'gray-matter';
|
||||||
|
|
||||||
|
const execFileAsync = promisify(execFile);
|
||||||
const __dir = dirname(fileURLToPath(import.meta.url));
|
const __dir = dirname(fileURLToPath(import.meta.url));
|
||||||
|
const root = resolve(__dir, '..');
|
||||||
|
const inputPath = resolve(root, 'output', 'book.html');
|
||||||
|
const rawPath = resolve(root, 'output', 'kaleidoskop-raw.pdf');
|
||||||
|
const marksPath = resolve(root, 'output', 'pdfmarks.ps');
|
||||||
|
const outputPath = resolve(root, 'output', 'kaleidoskop.pdf');
|
||||||
|
|
||||||
async function fileExists(path) {
|
async function fileExists(path) {
|
||||||
try { await access(path); return true; } catch { return false; }
|
try { await access(path); return true; } catch { return false; }
|
||||||
}
|
}
|
||||||
const root = resolve(__dir, '..');
|
|
||||||
const inputPath = resolve(root, 'output', 'book.html');
|
// Encode a JS string as UTF-16BE hex for PostScript pdfmarks (handles umlauts etc.)
|
||||||
const outputPath = resolve(root, 'output', 'kaleidoskop.pdf');
|
function toUtf16BeHex(str) {
|
||||||
|
const hex = [];
|
||||||
|
for (let i = 0; i < str.length; i++) {
|
||||||
|
const cp = str.charCodeAt(i);
|
||||||
|
hex.push(((cp >> 8) & 0xff).toString(16).padStart(2, '0'));
|
||||||
|
hex.push((cp & 0xff).toString(16).padStart(2, '0'));
|
||||||
|
}
|
||||||
|
return 'FEFF' + hex.join('').toUpperCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildPdfmarks({ title, author, subject }) {
|
||||||
|
return [
|
||||||
|
'[ /Title <' + toUtf16BeHex(title) + '>',
|
||||||
|
' /Author <' + toUtf16BeHex(author) + '>',
|
||||||
|
' /Subject <' + toUtf16BeHex(subject) + '>',
|
||||||
|
' /Creator (Puppeteer + Ghostscript)',
|
||||||
|
' /Producer (kaleidoskop build pipeline)',
|
||||||
|
' /DOCINFO pdfmark',
|
||||||
|
].join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runGhostscript({ title, author, subject }) {
|
||||||
|
const marks = buildPdfmarks({ title, author, subject });
|
||||||
|
await writeFile(marksPath, marks, 'utf-8');
|
||||||
|
|
||||||
|
const gs = 'gs';
|
||||||
|
const args = [
|
||||||
|
'-dBATCH', '-dNOPAUSE', '-dQUIET',
|
||||||
|
'-sDEVICE=pdfwrite',
|
||||||
|
'-dCompatibilityLevel=1.4',
|
||||||
|
'-dPDFSETTINGS=/prepress',
|
||||||
|
'-dEmbedAllFonts=true',
|
||||||
|
'-dSubsetFonts=false',
|
||||||
|
'-dDownsampleColorImages=false',
|
||||||
|
'-dDownsampleGrayImages=false',
|
||||||
|
'-dDownsampleMonoImages=false',
|
||||||
|
`-sOutputFile=${outputPath}`,
|
||||||
|
marksPath,
|
||||||
|
rawPath,
|
||||||
|
];
|
||||||
|
|
||||||
|
try {
|
||||||
|
await execFileAsync(gs, args);
|
||||||
|
} catch (err) {
|
||||||
|
console.error('Ghostscript failed:', err.message);
|
||||||
|
// Fall back to raw PDF so the build still produces output
|
||||||
|
await rename(rawPath, outputPath);
|
||||||
|
console.warn('Falling back to raw Puppeteer PDF (no GS post-processing).');
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
async function generate() {
|
async function generate() {
|
||||||
try {
|
try {
|
||||||
@@ -20,9 +80,13 @@ async function generate() {
|
|||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// On ARM64 (e.g. WSL2 on Apple Silicon / Raspberry Pi), Puppeteer's bundled
|
// Read metadata from front-matter for PDF info dict
|
||||||
// Chrome is x86-64 and won't run. Use the system Chromium instead:
|
const fmRaw = await readFile(resolve(root, 'content', '00-front-matter.md'), 'utf-8');
|
||||||
// sudo apt-get install -y chromium
|
const { data: fm } = matter(fmRaw);
|
||||||
|
const title = fm.title || 'Das Kaleidoskop der Schlummerwelten';
|
||||||
|
const author = fm.author || '';
|
||||||
|
const subject = fm.subtitle || 'Kinderbuch';
|
||||||
|
|
||||||
const systemChromium = '/usr/bin/chromium';
|
const systemChromium = '/usr/bin/chromium';
|
||||||
const useSystem = await fileExists(systemChromium);
|
const useSystem = await fileExists(systemChromium);
|
||||||
|
|
||||||
@@ -35,9 +99,9 @@ async function generate() {
|
|||||||
|
|
||||||
await page.goto(`file://${inputPath}`, { waitUntil: 'networkidle0' });
|
await page.goto(`file://${inputPath}`, { waitUntil: 'networkidle0' });
|
||||||
|
|
||||||
// 8.75 × 8.75 inches = trim (8.5×8.5) + 0.125in bleed on each side
|
// Write to temp file first so Ghostscript can read it
|
||||||
await page.pdf({
|
await page.pdf({
|
||||||
path: outputPath,
|
path: rawPath,
|
||||||
width: '8.75in',
|
width: '8.75in',
|
||||||
height: '8.75in',
|
height: '8.75in',
|
||||||
printBackground: true,
|
printBackground: true,
|
||||||
@@ -45,7 +109,20 @@ async function generate() {
|
|||||||
});
|
});
|
||||||
|
|
||||||
await browser.close();
|
await browser.close();
|
||||||
console.log(`PDF written to output/kaleidoskop.pdf`);
|
|
||||||
|
const rawSize = (await stat(rawPath)).size;
|
||||||
|
console.log(`Raw PDF: ${(rawSize / 1_048_576).toFixed(1)} MB`);
|
||||||
|
|
||||||
|
console.log('Running Ghostscript (font embedding + metadata)…');
|
||||||
|
const gsOk = await runGhostscript({ title, author, subject });
|
||||||
|
|
||||||
|
if (gsOk) {
|
||||||
|
const finalSize = (await stat(outputPath)).size;
|
||||||
|
console.log(`Final PDF: ${(finalSize / 1_048_576).toFixed(1)} MB`);
|
||||||
|
await unlink(rawPath).catch(() => {});
|
||||||
|
await unlink(marksPath).catch(() => {});
|
||||||
|
console.log('PDF written to output/kaleidoskop.pdf (fonts embedded, metadata set)');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
generate().catch(err => { console.error(err); process.exit(1); });
|
generate().catch(err => { console.error(err); process.exit(1); });
|
||||||
|
|||||||
Reference in New Issue
Block a user