diff --git a/.dockerignore b/.dockerignore index 40fa7c8..4af68bf 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,6 +1,10 @@ -# do not copy/add files below to docker image on-build -.git -.gitignore -.dockerignore -Dockerfile -/examples +node_modules +examples +.git +.github +.gitignore +.dockerignore +.editorconfig +Dockerfile* +README.md +LICENSE diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..30e5eb9 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,43 @@ +# http://editorconfig.org + +root = true + +[*] +charset = utf-8 +end_of_line = lf +indent_size = 4 +indent_style = tab +insert_final_newline = true +tab_width = 4 +trim_trailing_whitespace = true + +# Overwrite specific file types +[*.{yml,yaml}] +indent_size = 2 +indent_style = space + +[*.php] +ij_php_spaces_around_pipe_in_union_type = true + +[*.ts] +ij_typescript_chained_call_dot_on_new_line = true + +# Overwrite for poeditor +[resources/language/*.json] +indent_style = space +indent_size = 4 + +# Overwrite specifics from vendors +[Component/**.php] +indent_size = 4 +indent_style = tab + +# Ignore vendor path +[vendor/**] +root = unset +charset = none +end_of_line = none +indent_style = none +insert_final_newline = none +tab_width = none +trim_trailing_whitespace = none diff --git a/.gitignore b/.gitignore index b8ade52..9d0873c 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,11 @@ typings/ # Yarn Integrity file .yarn-integrity +# IDE files +.idea + # dotenv environment variables file .env +# generated files +*.pdf diff --git a/Dockerfile b/Dockerfile index 2b70681..de33f92 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,41 @@ -FROM surnet/alpine-node-wkhtmltopdf:8.11.3-0.12.5-full-font +FROM oven/bun:1-alpine as base +WORKDIR /usr/src/app -COPY index.js . +FROM base AS install +# Install all dependencies +RUN mkdir -p /temp/all +COPY package.json bun.lockb /temp/all/ +RUN cd /temp/all && bun install --frozen-lockfile +# Install prod dependencies +RUN mkdir -p /temp/prod +COPY package.json bun.lockb /temp/prod/ +RUN cd /temp/prod && bun install --frozen-lockfile --production -EXPOSE 8000 -CMD ["node", "index.js"] +FROM base AS prerelease +COPY --from=install /temp/all/node_modules node_modules +COPY . . + +FROM surnet/alpine-wkhtmltopdf:3.19.0-0.12.6-small as wkhtmltopdf + +FROM base AS release +RUN apk add --no-cache \ + libstdc++ \ + libx11 \ + libxrender \ + libxext \ + libssl3 \ + ca-certificates \ + fontconfig \ + freetype \ + ttf-dejavu \ + ttf-droid \ + ttf-freefont \ + ttf-liberation +COPY --from=wkhtmltopdf /bin /usr/local/bin +COPY --from=install /temp/prod/node_modules node_modules +COPY --from=prerelease /usr/src/app/src . +COPY --from=prerelease /usr/src/app/package.json . + +USER bun +EXPOSE 8000/tcp +CMD ["bun", "run", "index.ts"] diff --git a/bun.lockb b/bun.lockb new file mode 100755 index 0000000..0b54baa Binary files /dev/null and b/bun.lockb differ diff --git a/bunfig.toml b/bunfig.toml new file mode 100644 index 0000000..6ba8258 --- /dev/null +++ b/bunfig.toml @@ -0,0 +1 @@ +telemetry = false diff --git a/examples/html2pdf.php b/examples/html2pdf.php deleted file mode 100644 index 38369b2..0000000 --- a/examples/html2pdf.php +++ /dev/null @@ -1,19 +0,0 @@ -Hello world from PHP'; -$file = 'test.pdf'; - -$pdf = simple_curl_post($url, $html); -file_put_contents($file, $pdf); - -function simple_curl_post($url, $body) -{ - $ch = curl_init($url); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($ch, CURLOPT_POSTFIELDS, $body); - $response = curl_exec($ch); - curl_close($ch); - - return $response; -} diff --git a/examples/html2pdf.py b/examples/html2pdf.py deleted file mode 100644 index 5f57280..0000000 --- a/examples/html2pdf.py +++ /dev/null @@ -1,8 +0,0 @@ -import urllib2; - -url = "http://localhost:8000" -html = "

Hello world from Python

" -file = "test.pdf" - -pdf = urllib2.urlopen(url, html).read() -open(file, "w").write(pdf) diff --git a/examples/html2pdf.sh b/examples/html2pdf.sh old mode 100644 new mode 100755 index 0e027af..23f131a --- a/examples/html2pdf.sh +++ b/examples/html2pdf.sh @@ -1,2 +1,2 @@ #!/usr/bin/env bash -curl http://localhost:8000 -d '

Hello world from SHELL

' > test.pdf +curl http://localhost:8000 -H 'Content-Type: text/html' -d '

Hello world from SHELL

' > "$(dirname $0)/test.pdf" diff --git a/index.js b/index.js deleted file mode 100644 index 93d0fc9..0000000 --- a/index.js +++ /dev/null @@ -1,88 +0,0 @@ -const http = require('http'); -const urlParser = require('url').parse; -const spawn = require('child_process').spawn; -const tempDir = require('os').tmpdir(); -const fileSystem = require('fs'); - -const server = http.createServer((request, response) => { - - const requestPath = urlParser(request.url).pathname; - generatePdf(request, response); - -}).listen(80); - -server.on('error', function (e) { - console.log(e); -}); - -const generatePdf = (request, response) => { - const requestBody = []; - const clientId = (Math.random() * 0x100000000 + 1).toString(36); - - console.info({ - 'timestamp': (new Date).toISOString(), - 'client': clientId, - 'module': 'request', - 'message': 'connected', - }); - - request.on('data', (chunk) => { - requestBody.push(chunk); - }); - - request.on('end', () => { - const tempFile = tempDir + '/' + clientId + '.pdf'; - const wkhtmltopdf = spawn('wkhtmltopdf', [ - '--quiet', - '--print-media-type', - '--no-outline', - '-', - tempFile, - ]); - - wkhtmltopdf.stdin.end( - Buffer.concat(requestBody).toString() - ); - - wkhtmltopdf.on('exit', (code) => { - console.info({ - 'timestamp': (new Date).toISOString(), - 'client': clientId, - 'module': 'wkhtmltopdf', - 'message': 'exitted with ' + code, - }); - - if (code !== 0) { - response.writeHead(500); - response.end(); - return; - } - - response.writeHead(200); - fileSystem.createReadStream(tempFile).pipe(response).on('end', () => { - fileSystem.unlinkSync(tempFile); - }); - }); - - wkhtmltopdf.stderr.on('data', (chunk) => { - console.warn({ - 'timestamp': (new Date).toISOString(), - 'client': clientId, - 'module': 'wkhtmltopdf', - 'message': chunk.toString(), - }); - }); - }); - - request.on('error', (error) => { - console.warn({ - 'timestamp': (new Date).toISOString(), - 'client': clientId, - 'module': 'request', - 'message': error, - }); - - response.writeHead(400); - response.end(); - }); -}; diff --git a/package.json b/package.json new file mode 100644 index 0000000..fdd7d6d --- /dev/null +++ b/package.json @@ -0,0 +1,15 @@ +{ + "name": "html-pdf-export", + "module": "src/index.ts", + "type": "module", + "dependencies": { + "nanoid": "^5.0.6", + "pino": "^8.19.0" + }, + "devDependencies": { + "@types/bun": "latest" + }, + "peerDependencies": { + "typescript": "^5.0.0" + } +} diff --git a/src/html-to-pdf-client.ts b/src/html-to-pdf-client.ts new file mode 100644 index 0000000..db2231d --- /dev/null +++ b/src/html-to-pdf-client.ts @@ -0,0 +1,24 @@ +import { spawn, which } from 'bun'; + +export type HtmlToPdfClient = (req: Request, outputPath: string) => Promise; +export const htmlToPdfClient: HtmlToPdfClient = async (req, outputPath) => { + const bin = which('wkhtmltopdf'); + if (bin === null) { + throw new Error('Missing HTML to PDF binary'); + } + const proc = spawn( + ['wkhtmltopdf', '--quiet', '--print-media-type', '--no-outline', '-', outputPath], + {stdin: req, stderr: 'pipe'}, + ); + + const exitCode = await proc.exited; + const errors: string = await Bun.readableStreamToText(proc.stderr); + if (errors) { + throw new Error(errors); + } + + // if no errors but unsuccessful exit code, throw a generic error + if (exitCode !== 0) { + throw new Error(`Failed to convert HTML to PDF, the process exited with code ${exitCode}`); + } +}; diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..16c4b64 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,6 @@ +import { createServer } from './server.ts'; +import { trapShutdown } from './shutdown.ts'; + +const server = createServer(); + +trapShutdown(async () => server.stop()); diff --git a/src/logger.ts b/src/logger.ts new file mode 100644 index 0000000..5e31a8d --- /dev/null +++ b/src/logger.ts @@ -0,0 +1,7 @@ +import pino, { type Logger as PinoLogger } from 'pino'; + +export const loggerUsingPino = () => pino({ + name: 'html-pdf-export', +}); + +export type Logger = () => PinoLogger; diff --git a/src/server.test.ts b/src/server.test.ts new file mode 100644 index 0000000..160759c --- /dev/null +++ b/src/server.test.ts @@ -0,0 +1,63 @@ +import { write } from 'bun'; +import { afterEach, beforeEach, expect, mock, test } from 'bun:test'; +import pino, { type Logger, type LoggerExtras } from 'pino'; +import type { HtmlToPdfClient } from './html-to-pdf-client.ts'; +import { createServer } from './server.ts'; + +mock.module('nanoid', () => ({nanoid: () => 'fake-random-id'})); + +const port = 0; // 0 means give a random unassigned port +const host = 'http://localhost'; +const method = 'POST'; +const body = "

Hello world

"; +const headers = {'content-type': 'text/html'}; +const logger = { + info: mock() as pino.LogFn, + error: mock() as pino.LogFn, + child: mock() as LoggerExtras['child'], +} as Logger; +const htmlToPdfClient: HtmlToPdfClient = async (req, outputPath) => { + const html = await req.text(); + await write(outputPath, html); +}; + +let server: ReturnType; +beforeEach(() => server = createServer({port, htmlToPdfClient, logger: () => logger})); +afterEach(() => server.stop()); + +test('logs request id', async () => { + await server.fetch(new Request(host)); + expect(logger.child).toHaveBeenCalledWith({requestId: 'fake-random-id'}); +}); + +const invalidRequestMethods = ['GET', 'HEAD', 'PUT', 'DELETE', 'CONNECT', 'OPTIONS', 'TRACE', 'PATCH']; +test.each(invalidRequestMethods)('cannot do %s requests', async method => { + const res = await server.fetch(new Request(host, {method})); + expect(res.status).toBe(405); + expect(logger.error).toHaveBeenCalledWith('Invalid request method'); +}); + +test('requires a request body', async () => { + const res = await server.fetch(new Request(host, {method})); + expect(res.status).toBe(400); + expect(logger.error).toHaveBeenCalledWith('Missing request body'); +}); + +test('requires a content-type request header', async () => { + const res = await server.fetch(new Request(host, {method, body})); + expect(res.status).toBe(400); + expect(logger.error).toHaveBeenCalledWith('Missing content-type request header'); +}); + +test('requires a request with text/html content-type header', async () => { + const res = await server.fetch(new Request(host, {method, body, headers: {'content-type': ''}})); + expect(res.status).toBe(400); + expect(logger.error).toHaveBeenCalledWith('Invalid content-type request header'); +}); + +test('success', async () => { + const res = await server.fetch(new Request(host, {method, body, headers})); + expect(res.status).toBe(200); + expect(await res.text()).toBe(body); + expect(res.headers.get('content-type')).toBe('application/pdf'); +}); diff --git a/src/server.ts b/src/server.ts new file mode 100644 index 0000000..59d5452 --- /dev/null +++ b/src/server.ts @@ -0,0 +1,70 @@ +import { file } from 'bun'; +import { nanoid } from 'nanoid'; +import { mkdir, unlink } from 'node:fs/promises'; +import { tmpdir } from 'os'; +import { htmlToPdfClient, type HtmlToPdfClient } from './html-to-pdf-client.ts'; +import { type Logger, loggerUsingPino } from './logger.ts'; + +export interface CreateServerOptions { + port?: number; + logger?: Logger; + htmlToPdfClient?: HtmlToPdfClient; +} + +export const createServer = (options?: CreateServerOptions) => { + const port = options?.port ?? 8000; + const logger = options?.logger?.() ?? loggerUsingPino(); + const client = options?.htmlToPdfClient ?? htmlToPdfClient; + + logger.info(`Listening on port ${port}...`); + + return Bun.serve({ + port, + async fetch(req) { + const requestId = nanoid(); + logger.child({requestId}); + + if (req.method !== 'POST') { + logger.error('Invalid request method'); + return new Response(null, {status: 405}); + } + + if (!req.body) { + logger.error('Missing request body'); + return new Response(null, {status: 400}); + } + + if (!req.headers.has('content-type')) { + logger.error('Missing content-type request header'); + return new Response(null, {status: 400}); + } + + if (req.headers.get('content-type') !== 'text/html') { + logger.error('Invalid content-type request header'); + return new Response(null, {status: 400}); + } + + const tmpDir = process.env.HTML_PDF_EXPORT_TMPDIR ?? tmpdir(); + if (!(await file(tmpDir).exists())) { + logger.info('Temporary file directory not found, creating a new directory'); + await mkdir(tmpDir, {recursive: true}); + } + + const outputPath = `${tmpDir}/${requestId}.pdf`; + const contentLength = req.headers.get('content-length'); + logger.info('Starting conversion of HTML to PDF', {contentLength}); + const startTime = process.hrtime(); + await client(req, outputPath); + const duration = process.hrtime(startTime); + logger.info('Done converting HTML to PDF', {contentLength, duration}); + + const output = file(outputPath); + output.stream().getReader().closed.then(() => unlink(outputPath)); + return new Response(output, {status: 200, headers: {'content-type': 'application/pdf'}}); + }, + error(err) { + logger.error(err); + return new Response(null, {status: 500}); + }, + }); +}; diff --git a/src/shutdown.ts b/src/shutdown.ts new file mode 100644 index 0000000..d401506 --- /dev/null +++ b/src/shutdown.ts @@ -0,0 +1,50 @@ +import { sleep } from 'bun'; +import { type Logger as PinoLogger } from 'pino'; +import { type Logger, loggerUsingPino } from './logger.ts'; + +class ShutdownTimedOutError extends Error { +} + +interface ShutdownOptions { + timeout?: number; + logger?: Logger; +} + +export function trapShutdown(callback: () => Promise, options?: ShutdownOptions) { + process.once("SIGINT", () => handleShutdown(callback, options)); + process.once("SIGTERM", () => handleShutdown(callback, options)); +} + +async function handleShutdown(callback: () => Promise, options?: ShutdownOptions) { + const envTimeout = process.env.HTML_PDF_EXPORT_TIMEOUT ? parseInt(process.env.HTML_PDF_EXPORT_TIMEOUT) : null; + const timeout = options?.timeout ?? envTimeout ?? 10_000; + const logger = options?.logger?.() ?? loggerUsingPino(); + const handleForceExit = forceExit(logger); + + process.on("SIGTERM", handleForceExit); + process.on("SIGINT", handleForceExit); + + try { + await Promise.race([ + sleep(timeout).then(() => { + throw new ShutdownTimedOutError(); + }), + callback(), + ]); + process.exit(0); + } catch (e) { + if (e instanceof ShutdownTimedOutError) { + logger.warn("Shutdown handler timed out, quitting forcefully"); + } else { + logger.error(e, "Error during shutdown handling"); + } + process.exit(1); + } +} + +function forceExit(logger: PinoLogger) { + return (signal: number): void => { + logger.error(`Received second signal ${signal}, exiting NOW`); + process.exit(1); + }; +} diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..0fef23a --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,27 @@ +{ + "compilerOptions": { + // Enable latest features + "lib": ["ESNext"], + "target": "ESNext", + "module": "ESNext", + "moduleDetection": "force", + "jsx": "react-jsx", + "allowJs": true, + + // Bundler mode + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": true, + "noEmit": true, + + // Best practices + "strict": true, + "skipLibCheck": true, + "noFallthroughCasesInSwitch": true, + + // Some stricter flags (disabled by default) + "noUnusedLocals": false, + "noUnusedParameters": false, + "noPropertyAccessFromIndexSignature": false + } +}