@frasma/extractify
v1.0.0
Published
Functional utilities to extract, transform and flow your data
Maintainers
Readme
Extractify
A TypeScript library for data extraction with functional utilities and AI-powered document processing.
Features
- Functional Utilities: Pure functions for array and object manipulation
- Document Extraction: AI-powered data extraction from PDFs and images using OpenAI Vision
- LangGraph Workflow: Robust extraction pipeline with retry logic and validation
- Type Safety: Full TypeScript support with typed extraction results
- Flexible Schema: Define extraction schemas for single values and lists
Installation
npm install @frasma/extractifyDocument Extraction
Extract structured data from documents (PDFs and images) using OpenAI's Vision API.
Basic Usage
import { extractFromDocument } from "@frasma/extractify";
// Define what to extract
const schema = [
{
key: "name",
name: "Name",
description: "Full name of the person",
examples: ["John Doe", "Jane Smith"],
type: "single",
},
{
key: "email",
name: "Email",
description: "Email address",
examples: ["[email protected]"],
type: "single",
},
];
// Extract from document
const result = await extractFromDocument({
apiKey: process.env.OPENAI_API_KEY,
document: "./document.pdf", // or Buffer
schema: schema,
options: {
model: "gpt-4o",
temperature: 0.1,
includeConfidence: true,
},
});
if (result.success) {
console.log("Extracted data:", result.data);
console.log("Confidence:", result.confidence?.overall);
} else {
console.error("Extraction failed:", result.error);
}Extracting Lists (e.g., Invoice Items)
import { extractFromDocument, createListField } from "@frasma/extractify";
// Define schema with list fields
const invoiceSchema = [
{
key: "invoiceNumber",
name: "Invoice Number",
description: "Invoice identification number",
examples: ["INV-2024-001"],
type: "single",
},
{
key: "customer",
name: "Customer",
description: "Customer name",
examples: ["Acme Corp"],
type: "single",
},
{
key: "items",
name: "Items",
description: "List of purchased items",
examples: [],
type: "list",
itemSchema: [
{
key: "product",
name: "Product",
description: "Product name",
examples: ["Widget", "Gadget"],
type: "single",
},
{
key: "quantity",
name: "Quantity",
description: "Number of units",
examples: ["1", "5"],
type: "single",
},
{
key: "price",
name: "Price",
description: "Unit price",
examples: ["99.99", "149.00"],
type: "single",
},
],
},
{
key: "total",
name: "Total",
description: "Total amount",
examples: ["$599.99"],
type: "single",
},
];
// Type your extraction result
interface InvoiceData {
invoiceNumber: string;
customer: string;
items: Array<{
product: string;
quantity: string;
price: string;
}>;
total: string;
}
const result = await extractFromDocument<InvoiceData>({
apiKey: "sk-...",
document: invoiceBuffer,
schema: invoiceSchema,
});
// result.data is typed as InvoiceDataUsing Helper Functions
import { createSimpleSchema, createListField } from "@frasma/extractify";
// Quick schema creation for simple fields
const simpleSchema = createSimpleSchema([
{ key: "name", description: "Person name" },
{
key: "birthDate",
description: "Date of birth in DD/MM/YYYY format",
examples: ["15/03/1990"],
},
{ key: "address", description: "Full address" },
]);
// Create a list field
const productsField = createListField(
"products",
"Products",
"List of products in the document",
[
{ key: "name", description: "Product name" },
{ key: "sku", description: "Product SKU code" },
{ key: "price", description: "Product price" },
],
);
// Combine them
const fullSchema = [...simpleSchema, productsField];Reusable Extractor
import { createDocumentExtractor } from "@frasma/extractify";
// Create a reusable extractor with default settings
const extractor = createDocumentExtractor(process.env.OPENAI_API_KEY, {
model: "gpt-4o",
includeConfidence: true,
maxRetries: 3,
});
// Use it multiple times
const result1 = await extractor.extract(document1, schema);
const result2 = await extractor.extract(document2, schema);Extraction Options
| Option | Type | Default | Description |
| ------------------- | ------- | ---------- | -------------------------------------------- |
| model | string | 'gpt-4o' | OpenAI model to use |
| temperature | number | 0.1 | LLM temperature (lower = more deterministic) |
| maxTokens | number | 4096 | Maximum tokens for response |
| maxRetries | number | 3 | Retry attempts on API errors |
| includeConfidence | boolean | true | Include confidence scores |
| imageQuality | number | 85 | JPEG quality for image compression |
| maxImageDimension | number | 2048 | Max pixel dimension for images |
Extraction Result
interface ExtractionResult<T> {
data: T; // Extracted data
success: boolean; // Whether extraction succeeded
error?: string; // Error message if failed
confidence?: {
overall: number; // Overall confidence (0-1)
fields: Record<string, number>; // Per-field confidence
};
metadata: {
pagesProcessed: number; // Number of pages processed
processingTimeMs: number; // Processing time in ms
modelUsed: string; // Model used
tokensUsed?: {
// Token usage (if available)
prompt: number;
completion: number;
total: number;
};
documentFormat: "pdf" | "image";
};
}Functional Utilities
Core Functions
import { pipe, compose, curry } from "@frasma/extractify";
// Pipe: left-to-right composition
const result = pipe(
[1, 2, 3, 4, 5],
filter((x) => x > 2),
map((x) => x * 2),
);
// result: [6, 8, 10]
// Compose: right-to-left composition
const transform = compose(
(x: number[]) => x.map((n) => n * 2),
(x: number[]) => x.filter((n) => n > 2),
);
// Curry: convert function to curried version
const add = curry((a: number, b: number) => a + b);
const add5 = add(5);
add5(3); // 8Array Functions
import { map, filter, reduce, flatMap } from "@frasma/extractify";
// All functions are curried and composable
const double = map((x: number) => x * 2);
const evens = filter((x: number) => x % 2 === 0);
const result = pipe([1, 2, 3, 4], evens, double);
// result: [4, 8]Object Functions
import { pick, omit, merge } from "@frasma/extractify";
const user = { name: "John", email: "[email protected]", password: "secret" };
// Pick specific keys
pick(["name", "email"])(user);
// { name: 'John', email: '[email protected]' }
// Omit specific keys
omit(["password"])(user);
// { name: 'John', email: '[email protected]' }
// Deep merge objects
merge({ a: 1, b: { c: 2 } })({ b: { d: 3 }, e: 4 });
// { a: 1, b: { c: 2, d: 3 }, e: 4 }Development
# Install dependencies
npm install
# Run tests
npm test
# Run tests in watch mode
npm run test:watch
# Build
npm run build
# Type check
npm run type-check
# Lint
npm run lintRequirements
- Node.js >= 18.0.0
- OpenAI API key (for document extraction)
License
MIT © Frasma Studio
