@domeadev/tiny-parser
v0.0.1
Published
A lightweight, flexible tokenizer/parser library for JavaScript and TypeScript that makes it easy to parse text into meaningful tokens.
Readme
Tiny Parser
A lightweight, flexible tokenizer/parser library for JavaScript and TypeScript that makes it easy to parse text into meaningful tokens.
Installation
npm
npm install @domeadev/tiny-parseryarn
yarn add @domeadev/tiny-parserpnpm
pnpm add @domeadev/tiny-parserFeatures
- 🪶 Lightweight: Small footprint, zero dependencies
- 🔧 Flexible: Define custom processors to handle any text format
- 🧩 Composable: Mix and match processors for complex parsing
- 🦺 Type-safe: Written in TypeScript with strong typing
- 🧠 Smart fallbacks: Handles unparsed content gracefully
Usage
import { parse, type BaseToken, type Processor } from "@domeadev/tiny-parser";
// Define your custom token type
interface NumberToken extends BaseToken {
type: "number";
value: number;
}
// Create a processor for numbers
const numberProcessor: Processor<NumberToken> = {
start: (src) => {
const match = src.match(/^\d+/);
return match ? 0 : -1;
},
tokenizer: (src) => {
const match = src.match(/^(\d+)/);
if (match) {
return {
type: "number",
raw: match[0],
value: parseInt(match[0], 10),
};
}
return null;
},
};
// Parse some text
const tokens = parse("123 abc", [numberProcessor]);
console.log(tokens);
/*
[
{ type: 'number', raw: '123', value: 123 },
{ type: '__FALLBACK__', raw: ' abc' }
]
*/API
parse<T extends BaseToken>(input: string, processors: Processor<T>[]): (T | FallbackToken)[]
Parses the input string using the provided processors and returns an array of tokens.
input: The string to parseprocessors: An array of processors to use for parsing- Returns: An array of tokens (either custom tokens or fallback tokens)
BaseToken interface
interface BaseToken {
type: string;
raw: string;
}FallbackToken interface
interface FallbackToken extends BaseToken {
type: "__FALLBACK__";
}Processor<T> interface
interface Processor<T extends BaseToken = FallbackToken> {
start: (src: string) => number;
tokenizer: (src: string) => T | null | undefined;
}start: Function that returns the position where this processor should start parsing, or-1if it can't handle the inputtokenizer: Function that converts a portion of the input into a token, or returns null/undefined if parsing fails
Utility Functions
isToken(token: any): token is BaseToken: Checks if an object is a valid tokenisFallbackToken(token: any): token is FallbackToken: Checks if a token is a fallback tokencreateFallbackToken(raw: string): FallbackToken: Creates a new fallback token
Examples
Parsing a Simple Programming Language
import { parse, type BaseToken, type Processor } from "@domeadev/tiny-parser";
// Define token types
interface KeywordToken extends BaseToken {
type: "keyword";
}
interface IdentifierToken extends BaseToken {
type: "identifier";
}
interface NumberToken extends BaseToken {
type: "number";
value: number;
}
// Create processors
const keywordProcessor: Processor<KeywordToken> = {
start: (src) => {
const match = src.match(/^(if|else|while|for)/);
return match ? 0 : -1;
},
tokenizer: (src) => {
const match = src.match(/^(if|else|while|for)/);
if (match) {
return { type: "keyword", raw: match[0] };
}
return null;
},
};
const identifierProcessor: Processor<IdentifierToken> = {
start: (src) => {
const match = src.match(/^[a-zA-Z_][a-zA-Z0-9_]*/);
return match ? 0 : -1;
},
tokenizer: (src) => {
const match = src.match(/^([a-zA-Z_][a-zA-Z0-9_]*)/);
if (match) {
return { type: "identifier", raw: match[0] };
}
return null;
},
};
const numberProcessor: Processor<NumberToken> = {
start: (src) => {
const match = src.match(/^\d+/);
return match ? 0 : -1;
},
tokenizer: (src) => {
const match = src.match(/^(\d+)/);
if (match) {
return {
type: "number",
raw: match[0],
value: parseInt(match[0], 10),
};
}
return null;
},
};
// Parse some code
const tokens = parse("if count > 10", [
keywordProcessor,
identifierProcessor,
numberProcessor,
]);
console.log(tokens);
/*
[
{ type: 'keyword', raw: 'if' },
{ type: '__FALLBACK__', raw: ' ' },
{ type: 'identifier', raw: 'count' },
{ type: '__FALLBACK__', raw: ' > ' },
{ type: 'number', raw: '10', value: 10 }
]
*/License
MIT © domeafavour
