pymupdf-node

v0.0.5

Published

9 months ago

PyMuPDF with JS

0High
0Medium
0Low

artifex-js

mupdf pymupdf node artifex

pymupdf-node

PyMuPDF for Node JS

Getting Started

Install node modules

npm install pymupdf-node

Trying

Import into your source code with:

import * as PyMuPDFNode from "pymupdf-node";

Wheels should be loaded into the pymupdf module with:

const pymupdf = await PyMuPDFNode.loadPyMuPDF("node_modules/pymupdf-node/pymupdf/pymupdf-1.26.0-cp312-abi3-pyodide_2024_0_wasm32.whl");

And if you need PyMuPDF4LLM, into the pymupdf4LLM module with:

const pymupdf4LLM = await PyMuPDFNode.loadPyMuPDF4LLM("node_modules/pymupdf-node/pymupdf/pymupdf4llm-0.0.24-py3-none-any.whl");

Open a document:

let doc = pymupdf.open("test.pdf");

Copy a page:

doc.copy_page(0);

Get document as markdown:

const md = pymupdf4LLM.to_markdown(doc, {
	page_chunks: false,
	write_images: false,
	ignore_images: true,
	image_path: "",
	extract_words: false,
	show_progress: false,
});
console.log(md);

And much more!

PyMuPDF Node is PyMuPDF for JavaScript environments, but just use the PyMuPDF Python API as if it was JavaScript, see the PyMuPDF API documentation.

Full example:

import * as PyMuPDFNode from "pymupdf-node";
import * as fs from "fs";

const pymupdf = await PyMuPDFNode.loadPyMuPDF("node_modules/pymupdf-node/pymupdf/pymupdf-1.26.0-cp312-abi3-pyodide_2024_0_wasm32.whl");
const pymupdf4LLM = await PyMuPDFNode.loadPyMuPDF4LLM("node_modules/pymupdf-node/pymupdf/pymupdf4llm-0.0.24-py3-none-any.whl");

function logSection(title) {
	console.log(`\n===== ${title} =====`);
}

function printMarkdown(doc, label = "Document Snapshot") {
	logSection(`📄 Print Markdown: ${label}`);
	const identifyHeaders = new pymupdf4LLM.IdentifyHeaders(doc);
	const tocHeaders = new pymupdf4LLM.TocHeaders(doc);
	const md = pymupdf4LLM.to_markdown(doc, {
		page_chunks: false,
		write_images: false,
		ignore_images: true,
		image_path: "",
		extract_words: false,
		show_progress: false,
	});
	console.log(md);
}

function openFreshDoc() {
	return pymupdf.open("test.pdf");
}

let doc = openFreshDoc();
logSection("1. Initial Page Count");
console.log("Page count:", doc.page_count);
printMarkdown(doc, "Initial PDF");

doc = openFreshDoc();
logSection("2. Copy Page");
doc.copy_page(0);
console.log("Copied page 0 to the end");
printMarkdown(doc, "After copy_page(0)");

doc = openFreshDoc();
logSection("3. Add New Page (end)");
doc.new_page();
console.log("Added blank page at the end");
printMarkdown(doc, "After new_page()");

doc = openFreshDoc();
logSection("4. Add New Page (index 1, custom size)");
doc.new_page(1, 400, 500);
console.log("Inserted blank page at index 1 (400x500)");
printMarkdown(doc, "After new_page(1, 400, 500)");

doc = openFreshDoc();
logSection("5. Insert Page with Text");
const count = doc.insert_page({ pno: 0, text: "Inserted Page Content" });
console.log(`Inserted text page at index 0 (lines inserted: ${count})`);
printMarkdown(doc, "After insert_page()");

doc = openFreshDoc();
logSection("6. Delete Last Page");
doc.delete_page(doc.page_count - 1);
console.log("Deleted last page");
printMarkdown(doc, "After delete_page()");

doc = openFreshDoc();
logSection("7. Delete Pages by Array");
doc.delete_pages([1, 2]);
console.log("Deleted pages at index 1 and 2");
printMarkdown(doc, "After delete_pages([1, 2])");

doc = openFreshDoc();
logSection("8. Delete Page Range 0–1");
doc.delete_pages(0, 1);
console.log("Deleted pages from index 0 to 1");
printMarkdown(doc, "After delete_pages(0, 1)");

doc = openFreshDoc();
logSection("9. Set & Get Page Labels");
doc.set_page_labels([{ startpage: 0, prefix: "L-", style: "D", firstpagenum: 1 }]);
const labels = doc.get_page_labels();
console.log("Page labels:", labels);
const match = doc.get_page_numbers("L-1");
console.log("Page numbers with label 'L-1':", match);

doc = openFreshDoc();
logSection("10. Bake Document");
doc.bake(true, true);
console.log("Baked document (annotations + widgets)");

doc = openFreshDoc();
logSection("11. Scrub Metadata");
doc.scrub({ metadata: true, javascript: true });
console.log("Scrubbed metadata and javascript");

doc = openFreshDoc();
const doc2 = openFreshDoc();
logSection("12. Insert PDF Page from Another Doc");
doc.insert_pdf(doc2, { from_page: 0, to_page: 0 });
console.log("Inserted first page of another doc");
printMarkdown(doc, "After insert_pdf()");

doc = openFreshDoc();
logSection("13. Embed File");
const buffer = fs.readFileSync("test.pdf");
const xref = doc.embfile_add("sample", buffer.buffer, "test.pdf", "test.pdf", "Embedded sample PDF");
console.log("Embedded file XREF:", xref);

doc = openFreshDoc();
logSection("14. Add annot, link, text, etc");

const page = doc.load_page(0);

page.add_caret_annot([10, 10]);
page.add_text_annot([10, 30], 'This is a text annotation.');
page.add_freetext_annot([50, 30, 300, 80], 'This is a free_text annotation.', { fontsize: 14, richtext: true, border_color: [0, 0, 0], border_width: 2 });
page.add_file_annot([10, 70], fs.readFileSync('./test.pdf').buffer, 'pymupdf4node.pdf');
page.add_ink_annot([
	[[10, 90], [30, 110], [50, 90]],
]);
page.add_line_annot([10, 110], [30, 130]);
page.add_rect_annot([10, 150, 30, 170]);
page.add_circle_annot([10, 170, 30, 190]);
page.add_polyline_annot([[10, 190], [30, 210], [50, 190]]);
page.add_polyline_annot([[10, 210], [30, 230], [50, 210]]);
page.add_underline_annot({ quads: [[10, 230, 50, 240]] });
page.add_strikeout_annot({ quads: [[10, 250, 50, 260]] });
page.add_squiggly_annot({ quads: [[10, 270, 50, 280]] });
page.add_highlight_annot({ quads: [[10, 290, 50, 300]] });
page.add_stamp_annot([10, 310, 50, 350], 3);
page.add_redact_annot([10, 370, 50, 410], { text: 'pymupdf4node redact', fill: [0, 0, 0], text_color: [1, 1, 1] });
page.apply_redactions();
page.add_redact_annot([10, 430, 50, 470], { cross_out: false });

page.insert_link({ from: [10, 490, 50, 530], kind: 2, uri: 'https://pymupdf.readthedocs.io/en/latest/vars.html#linkdest-kinds' });
page.insert_text([10, 550], 'Inserted Text');

page.delete_annot(page.add_freetext_annot([310, 30, 570, 80], 'This is a free_text annotation.', { fontsize: 14, richtext: true, border_color: [0, 0, 0], border_width: 2 }));

doc.load_page(1).set_rotation(180);
doc.load_page(2).set_cropbox([10, 10, 100, 100]);

await doc.save('./pdf/output.pdf');
console.log("Please check the PDF located at the \"tests/pdf/output.pdf\" location.");

logSection('15. Find tables in page');
const results = page.find_tables();
results.tables.forEach(table => console.log(table.to_markdown()));

logSection('16. Get XML metadata');
const metadata = doc.get_xml_metadata()
console.log("XML metadata=", metadata);

Published

Vulnerabilities

Links

Maintainers

Keywords

Readme

pymupdf-node

Getting Started

Install node modules

Trying

Open a document:

Copy a page:

Get document as markdown:

And much more!

Full example: