audio

v2.2.0

Published

a month ago

Audio loading, editing, and rendering for JavaScript

Downloads

471

0High
0Medium
0Low

dfcreative

jamen

audiojs audio dsp pcm

audio

Audio in JavaScript

audio('raw.wav').trim(-30).normalize('podcast').fade(0.3, 0.5).save('clean.mp3')

Any Format — fast wasm codecs, no ffmpeg.
Streaming — playback during decode.
Immutable — safe edits, infinite undo/redo.
Page Cache — open 10Gb+ files.
Analysis — loudness, spectrum, and more.
Modular – pluggable ops, tree-shakable.
CLI — playback, unix pipes, tab completion.
Isomorphic — node / browser.
Audio-first – dB, Hz, LUFS, not bytes and indices.

Quick Start Recipes API CLI FAQ Ecosystem Plugins Architecture

Quick Start

Node

npm i audio

import audio from 'audio'
let a = audio('voice.mp3')
a.trim().normalize('podcast').fade(0.3, 0.5)
await a.save('clean.mp3')

Browser

<script type="module">
  import audio from './dist/audio.min.js'
  let a = audio('./song.mp3')
  a.trim().normalize().fade(0.5, 2)
  a.clip({ at: 60, duration: 30 }).play()   // play the chorus
</script>

Codecs load on demand via import() — map them with an import map or your bundler.

<script type="importmap">
{
  "imports": {
    "@audio/decode-mp3": "https://esm.sh/@audio/decode-mp3",
    "@audio/decode-wav": "https://esm.sh/@audio/decode-wav",
    "@audio/decode-flac": "https://esm.sh/@audio/decode-flac",
    "@audio/decode-opus": "https://esm.sh/@audio/decode-opus",
    "@audio/decode-vorbis": "https://esm.sh/@audio/decode-vorbis",
    "@audio/decode-aac": "https://esm.sh/@audio/decode-aac",
    "@audio/decode-qoa": "https://esm.sh/@audio/decode-qoa",
    "@audio/decode-aiff": "https://esm.sh/@audio/decode-aiff",
    "@audio/decode-caf": "https://esm.sh/@audio/decode-caf",
    "@audio/decode-webm": "https://esm.sh/@audio/decode-webm",
    "@audio/decode-amr": "https://esm.sh/@audio/decode-amr",
    "@audio/decode-wma": "https://esm.sh/@audio/decode-wma",
    "@audio/encode-wav": "https://esm.sh/@audio/encode-wav",
    "@audio/encode-mp3": "https://esm.sh/@audio/encode-mp3",
    "@audio/encode-flac": "https://esm.sh/@audio/encode-flac",
    "@audio/encode-opus": "https://esm.sh/@audio/encode-opus",
    "@audio/encode-ogg": "https://esm.sh/@audio/encode-ogg",
    "@audio/encode-aiff": "https://esm.sh/@audio/encode-aiff"
  }
}
</script>

CLI

npm i -g audio
audio voice.wav trim normalize podcast fade 0.3s -0.5s -o clean.mp3

Recipes

Clean up a recording

let a = audio('raw-take.wav')
a.trim(-30).normalize('podcast').fade(0.3, 0.5)
await a.save('clean.wav')

Podcast montage

let intro = audio('intro.mp3')
let body  = audio('interview.wav')
let outro = audio('outro.mp3')

body.trim().normalize('podcast')
let ep = audio([intro, body, outro])
ep.fade(0.5, 2)
await ep.save('episode.mp3')

Render a waveform

let a = audio('track.mp3')
let [mins, peaks] = await a.stat(['min', 'max'], { bins: canvas.width })
for (let i = 0; i < peaks.length; i++)
  ctx.fillRect(i, h/2 - peaks[i] * h/2, 1, (peaks[i] - mins[i]) * h/2)

Render as it decodes

let a = audio('long.flac')
a.on('data', ({ delta }) => appendBars(delta.max[0], delta.min[0]))
await a

Voiceover on music

let music = audio('bg.mp3')
let voice = audio('narration.wav')
music.gain(-12).mix(voice, { at: 2 })
await music.save('mixed.wav')

Split a long file

let a = audio('audiobook.mp3')
let [ch1, ch2, ch3] = a.split(1800, 3600)
for (let [i, ch] of [ch1, ch2, ch3].entries())
  await ch.save(`chapter-${i + 1}.mp3`)

Record from mic

let a = audio()
a.record()
await new Promise(r => setTimeout(r, 5000))
a.stop()
a.trim().normalize()
await a.save('recording.wav')

Extract features for ML

let a = audio('speech.wav')
let mfcc = await a.stat('cepstrum', { bins: 13 })
let spec = await a.stat('spectrum', { bins: 128 })
let [loud, rms] = await a.stat(['loudness', 'rms'])

Generate a tone

let a = audio.from(t => Math.sin(440 * Math.PI * 2 * t), { duration: 2 })
await a.save('440hz.wav')

Custom op

audio.op('crush', (chs, ctx) => {
  let steps = 2 ** (ctx.args[0] ?? 8)
  return chs.map(ch => ch.map(s => Math.round(s * steps) / steps))
})

a.crush(4)

Serialize and restore

let json = JSON.stringify(a)             // { source, edits, ... }
let b = audio(JSON.parse(json))           // re-decode + replay edits

Remove a section

let a = audio('interview.wav')
a.remove({ at: 120, duration: 15 })     // cut 2:00–2:15
a.fade(0.1, { at: 120 })                // smooth the splice
await a.save('edited.wav')

Ringtone from any song

let a = audio('song.mp3')
a.crop({ at: 45, duration: 30 }).fade(0.5, 2).normalize()
await a.save('ringtone.mp3')

Detect clipping

let a = audio('master.wav')
let clips = await a.stat('clipping')
if (clips.length) console.warn(`${clips.length} clipped blocks`)

Stream to network

let a = audio('2hour-mix.flac')
a.highpass(40).normalize('broadcast')
for await (let chunk of a) socket.send(chunk[0].buffer)

Glitch: stutter + reverse

let a = audio('beat.wav')
let v = a.clip({ at: 1, duration: 0.25 })
let glitch = audio([v, v, v, v])
glitch.reverse({ at: 0.25, duration: 0.25 })
await glitch.save('glitch.wav')

Tremolo / sidechain

let a = audio('pad.wav')
a.gain(t => -12 * (0.5 + 0.5 * Math.cos(t * Math.PI * 4)))  // 2Hz tremolo in dB
await a.save('tremolo.wav')

Sonify data

let prices = [100, 102, 98, 105, 110, 95, 88, 92, 101, 107]
let a = audio.from(t => {
  let freq = 200 + (prices[Math.min(Math.floor(t / 0.2), prices.length - 1)] - 80) * 10
  return Math.sin(freq * Math.PI * 2 * t) * 0.5
}, { duration: prices.length * 0.2 })
await a.save('sonification.wav')

API

Create

audio(source, opts?) – decode from file, URL, or bytes. Returns instantly — decodes in background.
audio.from(source, opts?) – wrap existing PCM, AudioBuffer, silence, or function. Sync, no I/O.

let a = audio('voice.mp3')                // file path
let b = audio('https://cdn.ex/track.mp3') // URL
let c = audio(inputEl.files[0])           // Blob, File, Response, ArrayBuffer
let d = audio()                           // empty, ready for .push() or .record()
let e = audio([intro, body, outro])       // concat (virtual, no copy)
// opts: { sampleRate, channels, storage: 'memory' | 'persistent' | 'auto' }

await a    // await for decode — if you need .duration, full stats etc

let a = audio.from([left, right])                 // Float32Array[] channels
let b = audio.from(3, { channels: 2 })           // 3s silence
let c = audio.from(t => Math.sin(440*TAU*t), { duration: 2 })  // generator
let d = audio.from(audioBuffer)                   // Web Audio AudioBuffer
let e = audio.from(int16arr, { format: 'int16' }) // typed array + format

Properties

// format
a.duration                // total seconds (reflects edits)
a.channels                // channel count
a.sampleRate              // sample rate
a.length                  // total samples per channel

// playback
a.currentTime             // position in seconds (smooth interpolation during playback)
a.playing                 // true during playback
a.paused                  // true when paused
a.volume = 0.5             // 0..1 linear (settable)
a.muted = true            // mute gate (independent of volume)
a.loop = true             // on/off (settable)
a.ended                   // true when playback ended naturally (not via stop)
a.seeking                 // true during a seek operation
a.played                  // promise, resolves when playback starts
a.recording               // true during mic recording

// state
a.ready                   // promise, resolves when fully decoded
a.source                  // original source reference
a.pages                   // Float32Array page store
a.stats                   // per-block stats (peak, rms, etc.)
a.edits                   // edit list (non-destructive ops)
a.version                 // increments on each edit

Structure

Non-destructive time/channel rearrangement. All support {at, duration, channel}.

.trim(threshold?) – strip leading/trailing silence (dB, default auto).
.crop({at, duration}) – keep range, discard rest.
.remove({at, duration}) – cut range, close gap.
.insert(source, {at}) – insert audio or silence (number of seconds) at position.
.clip({at, duration}) – zero-copy range reference.
.split(...offsets) – zero-copy split at timestamps.
.pad(before, after?) – silence at edges (seconds).
.repeat(n) – repeat n times.
.reverse({at?, duration?}) – reverse audio or range.
.speed(rate) – playback speed (affects pitch and duration).
.remix(channels) – channel count: number or array map ([1, 0] swaps L/R).

a.trim(-30)                               // strip silence below -30dB
a.remove({ at: '2m', duration: 15 })      // cut 2:00–2:15, close gap
a.insert(intro, { at: 0 })               // prepend; .insert(3) appends 3s silence
let [pt1, pt2] = a.split('30m')          // zero-copy views
let hook = a.clip({ at: 60, duration: 30 })  // zero-copy excerpt
a.remix([0, 0])                           // L→both; .remix(1) for mono

Process

Amplitude, mixing, normalization. All support {at, duration, channel} ranges.

.gain(dB, opts?) – volume. Number, range, or t => dB function. { unit: 'linear' } for multiplier.
.fade(in, out?, curve?) – fade in/out. Curves: 'linear' 'exp' 'log' 'cos'.
.normalize(target?) – remove DC offset, clamp, and normalize loudness.
- 'podcast' – -16 LUFS, -1 dBTP.
- 'streaming' – -14 LUFS.
- 'broadcast' – -23 LUFS.
- -3 – custom dB target (peak mode).
- no arg – peak 0dBFS.
- { mode: 'rms' } – RMS normalization. Also 'peak', 'lufs'.
- { ceiling: -1 } – true peak limiter in dB.
- { dc: false } – skip DC removal.
.mix(source, opts?) – overlay another audio (additive).
.pan(value, opts?) – stereo balance (−1 left, 0 center, 1 right). Accepts function.
.write(data, {at?}) – overwrite samples with raw PCM.
.transform(fn) – inline processor: (chs, ctx) => chs. Not serialized.

a.gain(-3)                                // reduce 3dB
a.gain(6, { at: 10, duration: 5 })       // boost range
a.gain(t => -12 * Math.cos(t * TAU))     // automate over time
a.fade(0.5, -2, 'exp')                    // 0.5s in, 2s exp fade-out
a.normalize('podcast')                    // -16 LUFS; also 'streaming', 'broadcast'
a.mix(voice, { at: 2 })                  // overlay at 2s
a.pan(-0.3, { at: 10, duration: 5 })      // pan left for range

Filter

Biquad filters, chainable. All support {at, duration} ranges.

.highpass(freq), .lowpass(freq) – pass filter.
.bandpass(freq, Q?), .notch(freq, Q?) – band-pass / notch.
.lowshelf(freq, dB), .highshelf(freq, dB) – shelf EQ.
.eq(freq, gain, Q?) – parametric EQ.
.filter(type, ...params) – generic dispatch.

a.highpass(80).lowshelf(200, -3)          // rumble + mud
a.eq(3000, 2, 1.5).highshelf(8000, 3)    // presence + air
a.notch(50)                               // remove hum
a.filter(customFn, { cutoff: 2000 })     // custom filter function

I/O

Read PCM, encode, stream, push. Format inferred from extension.

await .read(opts?) – rendered PCM. { format, channel } to convert.
await .save(path, opts?) – encode + write. { at, duration } for sub-range.
await .encode(format?, opts?) – encode to Uint8Array.
for await (let block of a) – async-iterable over blocks.
.clone() – deep copy, independent edits, shared pages.
.push(data, format?) – feed PCM into pushable instance. .stop() to finalize.

let pcm = await a.read()                  // Float32Array[]
let raw = await a.read({ format: 'int16', channel: 0 })
await a.save('out.mp3')                   // format from extension
let bytes = await a.encode('flac')        // Uint8Array
for await (let block of a) send(block)    // stream blocks
let b = a.clone()                         // independent copy, shared pages

let src = audio()                         // pushable source
src.push(buf, 'int16')                    // feed PCM
src.stop()                                // finalize

Playback / Recording

Live playback with dB volume, seeking, looping. Mic recording via audio-mic.

.play(opts?) – start playback. { at, duration, volume, loop }. .played promise resolves when output starts.
.pause(), .resume(), .seek(t), .stop() – playback control.
.record(opts?) – mic recording. { deviceId, sampleRate, channels }.

a.play({ at: 30, duration: 10 })          // play 30s–40s
await a.played                             // wait for output to start
a.volume = 0.5; a.loop = true             // live adjustments
a.muted = true                             // mute without changing volume
a.pause(); a.seek(60); a.resume()         // jump to 1:00
a.stop()                                  // end playback or recording

let mic = audio()
mic.record({ sampleRate: 16000, channels: 1 })
mic.stop()

Analysis

await .stat(name, opts?) — without bins returns scalar, with bins returns Float32Array. Array of names returns array of results. Sub-ranges via {at, duration}, per-channel via {channel}.

'db' – peak amplitude in dBFS.
'rms' – RMS amplitude (linear).
'loudness' – integrated LUFS (ITU-R BS.1770).
'dc' – DC offset.
'clipping' – clipped samples (scalar: timestamps, binned: counts).
'silence' – silent ranges as {at, duration}.
'max', 'min' – peak envelope (use together for waveform rendering).
'spectrum' – mel-frequency spectrum in dB (A-weighted).
'cepstrum' – MFCCs.

let loud = await a.stat('loudness')                       // LUFS
let [db, clips] = await a.stat(['db', 'clipping'])        // multiple at once
let spec = await a.stat('spectrum', { bins: 128 })        // frequency bins
let peaks = await a.stat('max', { bins: 800 })            // waveform data
await a.stat('rms', { channel: 0 })                       // left only → number
await a.stat('rms', { channel: [0, 1] })                  // per-channel → [n, n]
let gaps = await a.stat('silence', { threshold: -40 })    // [{at, duration}, ...]

Utility

Events, lifecycle, undo/redo, serialization.

.on(event, fn) / .off(event?, fn?) – subscribe / unsubscribe.
- 'data' – pages decoded/pushed. Payload: { delta, offset, sampleRate, channels }.
- 'change' – any edit or undo.
- 'metadata' – stream header decoded. Payload: { sampleRate, channels }.
- 'timeupdate' – playback position. Payload: currentTime.
- 'play' – playback started or resumed.
- 'pause' – playback paused.
- 'volumechange' – volume or muted changed.
- 'ended' – playback finished (not on loop).
- 'progress' – during save/encode. Payload: { offset, total } in seconds.
.dispose() – release resources. Supports using for auto-dispose.
.undo(n?) – undo last edit(s). Returns edit for redo via .run().
.run(...edits) – apply edit objects { type, args, at?, duration? }. Batch or replay.

a.on('data', ({ delta }) => draw(delta))  // decode progress
a.on('timeupdate', t => ui.update(t))     // playback position

a.undo()                                  // undo last edit
b.run(...a.edits)                         // replay onto another file
JSON.stringify(a); audio(json)            // serialize / restore

Plugins

Extend with custom ops and stats. See Plugin Tutorial.

audio.op(name, fn) – register op. Shorthand for { process: fn }. Full descriptor: { process, plan, resolve, call }.
audio.op(name) – query descriptor. audio.op() – all ops.
audio.stat(name, descriptor) – register stat. Shorthand (chs, ctx) => [...] or { block, reduce, query }.

// op: process function receives (channels[], ctx) per 1024-sample block
audio.op('crush', (chs, ctx) => {
  let steps = 2 ** (ctx.args[0] ?? 8)
  return chs.map(ch => ch.map(s => Math.round(s * steps) / steps))
})

// stat: block function collects per-block, reduce enables scalar queries
audio.stat('peak', {
  block: (chs) => chs.map(ch => { let m = 0; for (let s of ch) m = Math.max(m, Math.abs(s)); return m }),
  reduce: (src, from, to) => { let m = 0; for (let i = from; i < to; i++) m = Math.max(m, src[i]); return m },
})

a.crush(4)                    // chainable like built-in ops
a.stat('peak')                // → scalar from reduce
a.stat('peak', { bins: 100 }) // → binned array

CLI

npm i -g audio

audio [file] [ops...] [-o output] [options]

# ops
eq          mix         pad         pan       crop
fade        gain        stat        trim      notch
remix       speed       split       insert    remove
repeat      bandpass    highpass    lowpass   reverse
lowshelf    highshelf   normalize

# options
-p play     -l loop     -o output   -f force  --format

Playback

␣ pause · ←/→ seek ±10s · ⇧←/⇧→ seek ±60s · ↑/↓ volume ±3dB · l loop · q quit

# Play fragment of the song
audio song.mp3 10s..15s -p

# Play clip (not full song)
audio song.mp3 clip 10s..20s -p -l

# Normalize before

Edit

# clean up
audio raw-take.wav trim -30db normalize podcast fade 0.3s -0.5s -o clean.wav

# ranges
audio in.wav gain -3db 1s..10s -o out.wav

# filter chain
audio in.mp3 highpass 80hz lowshelf 200hz -3db -o out.wav

# join
audio intro.mp3 + content.wav + outro.mp3 trim normalize fade 0.5s -2s -o ep.mp3

# voiceover
audio bg.mp3 gain -12db mix narration.wav 2s -o mixed.wav

# split
audio audiobook.mp3 split 30m 60m -o 'chapter-{i}.mp3'

Analysis

# all default stats (db, rms, loudness, clipping, dc)
audio speech.wav stat

# specific stats
audio speech.wav stat loudness rms

# spectrum / cepstrum with bin count
audio speech.wav stat spectrum 128
audio speech.wav stat cepstrum 13

# stat after transforms
audio speech.wav gain -3db stat db

Batch

audio '*.wav' trim normalize podcast -o '{name}.clean.{ext}'
audio '*.wav' gain -3db -o '{name}.out.{ext}'

Stdin/stdout

cat in.wav | audio gain -3db > out.wav
curl -s https://example.com/speech.mp3 | audio normalize -o clean.wav
ffmpeg -i video.mp4 -f wav - | audio trim normalize podcast > voice.wav

Tab completion

eval "$(audio --completions zsh)"       # add to ~/.zshrc
eval "$(audio --completions bash)"      # add to ~/.bashrc
audio --completions fish | source       # fish

FAQ

Ecosystem

audio-decode – codec decoding (13+ formats)
encode-audio – codec encoding
audio-filter – filters (weighting, EQ, auditory)
audio-speaker – audio output
audio-mic – audio input
audio-type – format detection
pcm-convert – PCM format conversion

Published

Vulnerabilities

Links

Maintainers

Keywords

Readme

audio

Quick Start Recipes API CLI FAQ Ecosystem Plugins Architecture

Quick Start

Node

Browser

CLI

Recipes

Clean up a recording

Podcast montage

Render a waveform

Render as it decodes

Voiceover on music

Split a long file

Record from mic

Extract features for ML

Generate a tone

Custom op

Serialize and restore

Remove a section

Ringtone from any song

Detect clipping

Stream to network

Glitch: stutter + reverse

Tremolo / sidechain

Sonify data

API

Create

Properties

Structure

Process

Filter

I/O

Playback / Recording

Analysis

Utility

Plugins

CLI

Playback

Edit

Analysis

Batch

Stdin/stdout

Tab completion

FAQ

Ecosystem