htsvcf
v0.1.14
Published
VCF/BCF reader for Node/Bun (N-API addon)
Readme
htsvcf
A fast Node.js library for reading VCF/BCF files, powered by HTSlib.
Installation
npm install htsvcfQuick Start
import { Reader, Writer } from "htsvcf";
const reader = new Reader("samples.vcf.gz");
// Print header info
console.log("Samples:", reader.header.samples());
const dpDef = reader.header.get("INFO", "DP");
console.log(`DP field: ${dpDef.type} (${dpDef.description})`);
// Add a custom INFO field to the header
reader.header.addInfo("HIGHQUAL", "0", "Flag", "Variant passed quality filter");
// Create a writer with the modified header
const writer = new Writer("filtered.vcf.gz", reader.header);
// Process variants
for await (const v of reader) {
// Translate variant to the writer's header (required after modifying header)
v.translate(writer.header);
// Filter by quality
if (v.qual !== null && v.qual < 30) continue;
// Set our custom flag
v.set_info("HIGHQUAL", true);
// Get per-sample data
for (const s of v.samples()) {
if (s.DP !== null && s.DP > 10) {
console.log(`${v.chrom}:${v.pos} ${s.sample_name} DP=${s.DP}`);
}
}
// Write the variant
writer.write(v);
}
writer.close();
reader.close();API
Reader
Create a reader from a VCF/BCF file path:
import { Reader, openReader } from "htsvcf";
// Synchronous constructor
const reader = new Reader("path/to/file.vcf.gz");
// Async factory (useful if you need to await initialization)
const reader = await openReader("path/to/file.vcf.gz");Iterating Records
There are two iteration modes:
- Fast synchronous iteration (recommended for max throughput):
for...of - Asynchronous iteration (doesn’t block the event loop):
for await...of
Fast synchronous iteration (recommended)
This uses nextSync() under the hood and avoids per-record Promise/task overhead.
for (const variant of reader) {
console.log(`${variant.chrom}:${variant.pos} ${variant.ref}>${variant.alt.join(",")}`);
}Asynchronous iteration
for await (const variant of reader) {
console.log(`${variant.chrom}:${variant.pos} ${variant.ref}>${variant.alt.join(",")}`);
}Manual synchronous iteration
let result;
while (!(result = reader.nextSync()).done) {
const variant = result.value;
console.log(variant.chrom, variant.pos);
}Querying Regions (requires index)
if (reader.hasIndex()) {
// String form (1-based, inclusive)
await reader.query("chr1:1000-2000");
// Numeric form (0-based)
await reader.query("chr1", 999, 2000);
for await (const variant of reader) {
// variants in region
}
}Closing
Always close the reader when done:
reader.close();Header
Access the VCF header via reader.header:
const header = reader.header;
// Get sample names
const samples = header.samples();
console.log("Samples:", samples); // ["S1", "S2", "S3"]
// Get INFO/FORMAT field definitions
const dpInfo = header.get("INFO", "DP");
if (dpInfo) {
console.log(dpInfo.id, dpInfo.type, dpInfo.number, dpInfo.description);
// "DP" "Integer" "1" "Depth"
}
// Get all header records
const records = header.records();
for (const rec of records) {
if (rec.section === "INFO") {
console.log(`INFO field: ${rec.id} (${rec.type})`);
}
}
// Add new INFO/FORMAT fields
header.addInfo("CUSTOM", "1", "Integer", "My custom field");
header.addFormat("GT", "1", "String", "Genotype");
// Get header as string
console.log(header.toString());Writer
Create a writer for VCF/BCF output:
import { Reader, Writer } from "htsvcf";
const reader = new Reader("input.vcf.gz");
// If you plan to write new INFO/FORMAT tags, add them first
reader.header.addInfo("ZZ", "1", "Integer", "Zed");
const writer = new Writer("out.vcf", reader.header, {
// optional; inferred from path by default
format: "vcf",
});
for (const v of reader) {
// Translate the variant to the writer's header before setting new fields
v.translate(writer.header);
v.set_info("ZZ", 42);
// NOTE: write() consumes the Variant
writer.write(v);
}
writer.close();
reader.close();Variant
Each variant record has the following properties and methods:
Basic Fields
const variant = reader.nextSync().value;
variant.chrom; // Chromosome (string)
variant.pos; // Position, 1-based (number)
variant.start; // Start position, 0-based (number)
variant.stop; // End position (number)
variant.id; // Variant ID (string), writable
variant.ref; // Reference allele (string)
variant.alt; // Alternate alleles (string[])
variant.qual; // Quality score (number | null), writable
variant.filter; // Filter status (string[]), writableModifying Fields
// Set variant ID
variant.id = "rs12345";
// Set quality
variant.qual = 99.5;
variant.qual = null; // Clear quality
// Set filters (filter IDs must exist in header)
variant.filter = ["PASS"];
variant.filter = ["LowQual", "LowDP"];INFO Fields
// Read INFO fields (returns typed values based on header)
const dp = variant.info("DP"); // number
const af = variant.info("AF"); // number[] for Number=A/R/G/.
const somatic = variant.info("SOMATIC"); // boolean for Flag type
const missing = variant.info("NOPE"); // undefined if not present
// Modify INFO fields
variant.set_info("DP", 42);
variant.set_info("AF", [0.1, 0.2]);
variant.set_info("SOMATIC", true);
variant.set_info("DP", null); // Clear fieldFORMAT/Sample Fields
// Get FORMAT field values for all samples (array per sample)
const dpValues = variant.format("DP"); // [10, 15, null]
const adValues = variant.format("AD"); // [[8, 2], [12, 3], [null, null]]
// Modify FORMAT fields (array with one value per sample)
variant.set_format("DP", [20, 25, 30]);
variant.set_format("AD", [[10, 5], [15, 10], [8, 2]]);
variant.set_format("DP", null); // Clear field
// Get all FORMAT data for a single sample
const s1 = variant.sample("S1");
if (s1) {
console.log(s1.sample_name); // "S1"
console.log(s1.DP); // 10
console.log(s1.AD); // [8, 2]
console.log(s1.GT); // "0/1"
console.log(s1.genotype); // { alleles: [0, 1], phase: [false] }
}
// Get all samples
const allSamples = variant.samples();
for (const sample of allSamples) {
console.log(`${sample.sample_name}: DP=${sample.DP}`);
}
// Get a subset of samples
const subset = variant.samples(["S1", "S3"]);
// Get parsed genotypes (alleles and phase info)
const gts = variant.genotypes();
// [{ alleles: [0, 1], phase: [false] }, { alleles: [1, 1], phase: [true] }, ...]
// Genotypes for a subset of samples
const gtSubset = variant.genotypes(["S1", "S2"]);
// Modify genotypes (one per sample, must match sample count)
variant.set_genotypes([
{ alleles: [0, 1], phase: [false] }, // 0/1
{ alleles: [1, 1], phase: [true] }, // 1|1
{ alleles: [null, 1], phase: [false] }, // ./1
]);String Representation
// Get VCF line representation
console.log(variant.toString());
// chr1 1000 rs123 A C 99 PASS DP=42 GT:DP 0/1:10 0/0:15Query Example
import { Reader } from "htsvcf";
const reader = new Reader("indexed.vcf.gz");
if (reader.hasIndex()) {
// Query a specific region
await reader.query("chr17:7570000-7580000");
for await (const v of reader) {
console.log(`${v.chrom}:${v.pos} ${v.ref}>${v.alt.join(",")}`);
}
}
reader.close();License
MIT
