audio
v2.2.0
Published
Audio loading, editing, and rendering for JavaScript
Downloads
471
Readme
audio

Audio in JavaScript
audio('raw.wav').trim(-30).normalize('podcast').fade(0.3, 0.5).save('clean.mp3')- Any Format — fast wasm codecs, no ffmpeg.
- Streaming — playback during decode.
- Immutable — safe edits, infinite undo/redo.
- Page Cache — open 10Gb+ files.
- Analysis — loudness, spectrum, and more.
- Modular – pluggable ops, tree-shakable.
- CLI — playback, unix pipes, tab completion.
- Isomorphic — node / browser.
- Audio-first – dB, Hz, LUFS, not bytes and indices.
Quick Start Recipes API CLI FAQ Ecosystem Plugins Architecture
Quick Start
Node
npm i audio
import audio from 'audio'
let a = audio('voice.mp3')
a.trim().normalize('podcast').fade(0.3, 0.5)
await a.save('clean.mp3')Browser
<script type="module">
import audio from './dist/audio.min.js'
let a = audio('./song.mp3')
a.trim().normalize().fade(0.5, 2)
a.clip({ at: 60, duration: 30 }).play() // play the chorus
</script>Codecs load on demand via import() — map them with an import map or your bundler.
<script type="importmap">
{
"imports": {
"@audio/decode-mp3": "https://esm.sh/@audio/decode-mp3",
"@audio/decode-wav": "https://esm.sh/@audio/decode-wav",
"@audio/decode-flac": "https://esm.sh/@audio/decode-flac",
"@audio/decode-opus": "https://esm.sh/@audio/decode-opus",
"@audio/decode-vorbis": "https://esm.sh/@audio/decode-vorbis",
"@audio/decode-aac": "https://esm.sh/@audio/decode-aac",
"@audio/decode-qoa": "https://esm.sh/@audio/decode-qoa",
"@audio/decode-aiff": "https://esm.sh/@audio/decode-aiff",
"@audio/decode-caf": "https://esm.sh/@audio/decode-caf",
"@audio/decode-webm": "https://esm.sh/@audio/decode-webm",
"@audio/decode-amr": "https://esm.sh/@audio/decode-amr",
"@audio/decode-wma": "https://esm.sh/@audio/decode-wma",
"@audio/encode-wav": "https://esm.sh/@audio/encode-wav",
"@audio/encode-mp3": "https://esm.sh/@audio/encode-mp3",
"@audio/encode-flac": "https://esm.sh/@audio/encode-flac",
"@audio/encode-opus": "https://esm.sh/@audio/encode-opus",
"@audio/encode-ogg": "https://esm.sh/@audio/encode-ogg",
"@audio/encode-aiff": "https://esm.sh/@audio/encode-aiff"
}
}
</script>CLI
npm i -g audio
audio voice.wav trim normalize podcast fade 0.3s -0.5s -o clean.mp3Recipes
Clean up a recording
let a = audio('raw-take.wav')
a.trim(-30).normalize('podcast').fade(0.3, 0.5)
await a.save('clean.wav')Podcast montage
let intro = audio('intro.mp3')
let body = audio('interview.wav')
let outro = audio('outro.mp3')
body.trim().normalize('podcast')
let ep = audio([intro, body, outro])
ep.fade(0.5, 2)
await ep.save('episode.mp3')Render a waveform
let a = audio('track.mp3')
let [mins, peaks] = await a.stat(['min', 'max'], { bins: canvas.width })
for (let i = 0; i < peaks.length; i++)
ctx.fillRect(i, h/2 - peaks[i] * h/2, 1, (peaks[i] - mins[i]) * h/2)Render as it decodes
let a = audio('long.flac')
a.on('data', ({ delta }) => appendBars(delta.max[0], delta.min[0]))
await aVoiceover on music
let music = audio('bg.mp3')
let voice = audio('narration.wav')
music.gain(-12).mix(voice, { at: 2 })
await music.save('mixed.wav')Split a long file
let a = audio('audiobook.mp3')
let [ch1, ch2, ch3] = a.split(1800, 3600)
for (let [i, ch] of [ch1, ch2, ch3].entries())
await ch.save(`chapter-${i + 1}.mp3`)Record from mic
let a = audio()
a.record()
await new Promise(r => setTimeout(r, 5000))
a.stop()
a.trim().normalize()
await a.save('recording.wav')Extract features for ML
let a = audio('speech.wav')
let mfcc = await a.stat('cepstrum', { bins: 13 })
let spec = await a.stat('spectrum', { bins: 128 })
let [loud, rms] = await a.stat(['loudness', 'rms'])Generate a tone
let a = audio.from(t => Math.sin(440 * Math.PI * 2 * t), { duration: 2 })
await a.save('440hz.wav')Custom op
audio.op('crush', (chs, ctx) => {
let steps = 2 ** (ctx.args[0] ?? 8)
return chs.map(ch => ch.map(s => Math.round(s * steps) / steps))
})
a.crush(4)Serialize and restore
let json = JSON.stringify(a) // { source, edits, ... }
let b = audio(JSON.parse(json)) // re-decode + replay editsRemove a section
let a = audio('interview.wav')
a.remove({ at: 120, duration: 15 }) // cut 2:00–2:15
a.fade(0.1, { at: 120 }) // smooth the splice
await a.save('edited.wav')Ringtone from any song
let a = audio('song.mp3')
a.crop({ at: 45, duration: 30 }).fade(0.5, 2).normalize()
await a.save('ringtone.mp3')Detect clipping
let a = audio('master.wav')
let clips = await a.stat('clipping')
if (clips.length) console.warn(`${clips.length} clipped blocks`)Stream to network
let a = audio('2hour-mix.flac')
a.highpass(40).normalize('broadcast')
for await (let chunk of a) socket.send(chunk[0].buffer)Glitch: stutter + reverse
let a = audio('beat.wav')
let v = a.clip({ at: 1, duration: 0.25 })
let glitch = audio([v, v, v, v])
glitch.reverse({ at: 0.25, duration: 0.25 })
await glitch.save('glitch.wav')Tremolo / sidechain
let a = audio('pad.wav')
a.gain(t => -12 * (0.5 + 0.5 * Math.cos(t * Math.PI * 4))) // 2Hz tremolo in dB
await a.save('tremolo.wav')Sonify data
let prices = [100, 102, 98, 105, 110, 95, 88, 92, 101, 107]
let a = audio.from(t => {
let freq = 200 + (prices[Math.min(Math.floor(t / 0.2), prices.length - 1)] - 80) * 10
return Math.sin(freq * Math.PI * 2 * t) * 0.5
}, { duration: prices.length * 0.2 })
await a.save('sonification.wav')API
Create
audio(source, opts?)– decode from file, URL, or bytes. Returns instantly — decodes in background.audio.from(source, opts?)– wrap existing PCM, AudioBuffer, silence, or function. Sync, no I/O.
let a = audio('voice.mp3') // file path
let b = audio('https://cdn.ex/track.mp3') // URL
let c = audio(inputEl.files[0]) // Blob, File, Response, ArrayBuffer
let d = audio() // empty, ready for .push() or .record()
let e = audio([intro, body, outro]) // concat (virtual, no copy)
// opts: { sampleRate, channels, storage: 'memory' | 'persistent' | 'auto' }
await a // await for decode — if you need .duration, full stats etc
let a = audio.from([left, right]) // Float32Array[] channels
let b = audio.from(3, { channels: 2 }) // 3s silence
let c = audio.from(t => Math.sin(440*TAU*t), { duration: 2 }) // generator
let d = audio.from(audioBuffer) // Web Audio AudioBuffer
let e = audio.from(int16arr, { format: 'int16' }) // typed array + formatProperties
// format
a.duration // total seconds (reflects edits)
a.channels // channel count
a.sampleRate // sample rate
a.length // total samples per channel
// playback
a.currentTime // position in seconds (smooth interpolation during playback)
a.playing // true during playback
a.paused // true when paused
a.volume = 0.5 // 0..1 linear (settable)
a.muted = true // mute gate (independent of volume)
a.loop = true // on/off (settable)
a.ended // true when playback ended naturally (not via stop)
a.seeking // true during a seek operation
a.played // promise, resolves when playback starts
a.recording // true during mic recording
// state
a.ready // promise, resolves when fully decoded
a.source // original source reference
a.pages // Float32Array page store
a.stats // per-block stats (peak, rms, etc.)
a.edits // edit list (non-destructive ops)
a.version // increments on each editStructure
Non-destructive time/channel rearrangement. All support {at, duration, channel}.
.trim(threshold?)– strip leading/trailing silence (dB, default auto)..crop({at, duration})– keep range, discard rest..remove({at, duration})– cut range, close gap..insert(source, {at})– insert audio or silence (number of seconds) at position..clip({at, duration})– zero-copy range reference..split(...offsets)– zero-copy split at timestamps..pad(before, after?)– silence at edges (seconds)..repeat(n)– repeat n times..reverse({at?, duration?})– reverse audio or range..speed(rate)– playback speed (affects pitch and duration)..remix(channels)– channel count: number or array map ([1, 0]swaps L/R).
a.trim(-30) // strip silence below -30dB
a.remove({ at: '2m', duration: 15 }) // cut 2:00–2:15, close gap
a.insert(intro, { at: 0 }) // prepend; .insert(3) appends 3s silence
let [pt1, pt2] = a.split('30m') // zero-copy views
let hook = a.clip({ at: 60, duration: 30 }) // zero-copy excerpt
a.remix([0, 0]) // L→both; .remix(1) for monoProcess
Amplitude, mixing, normalization. All support {at, duration, channel} ranges.
.gain(dB, opts?)– volume. Number, range, ort => dBfunction.{ unit: 'linear' }for multiplier..fade(in, out?, curve?)– fade in/out. Curves:'linear''exp''log''cos'..normalize(target?)– remove DC offset, clamp, and normalize loudness.'podcast'– -16 LUFS, -1 dBTP.'streaming'– -14 LUFS.'broadcast'– -23 LUFS.-3– custom dB target (peak mode).- no arg – peak 0dBFS.
{ mode: 'rms' }– RMS normalization. Also'peak','lufs'.{ ceiling: -1 }– true peak limiter in dB.{ dc: false }– skip DC removal.
.mix(source, opts?)– overlay another audio (additive)..pan(value, opts?)– stereo balance (−1 left, 0 center, 1 right). Accepts function..write(data, {at?})– overwrite samples with raw PCM..transform(fn)– inline processor:(chs, ctx) => chs. Not serialized.
a.gain(-3) // reduce 3dB
a.gain(6, { at: 10, duration: 5 }) // boost range
a.gain(t => -12 * Math.cos(t * TAU)) // automate over time
a.fade(0.5, -2, 'exp') // 0.5s in, 2s exp fade-out
a.normalize('podcast') // -16 LUFS; also 'streaming', 'broadcast'
a.mix(voice, { at: 2 }) // overlay at 2s
a.pan(-0.3, { at: 10, duration: 5 }) // pan left for rangeFilter
Biquad filters, chainable. All support {at, duration} ranges.
.highpass(freq),.lowpass(freq)– pass filter..bandpass(freq, Q?),.notch(freq, Q?)– band-pass / notch..lowshelf(freq, dB),.highshelf(freq, dB)– shelf EQ..eq(freq, gain, Q?)– parametric EQ..filter(type, ...params)– generic dispatch.
a.highpass(80).lowshelf(200, -3) // rumble + mud
a.eq(3000, 2, 1.5).highshelf(8000, 3) // presence + air
a.notch(50) // remove hum
a.filter(customFn, { cutoff: 2000 }) // custom filter functionI/O
Read PCM, encode, stream, push. Format inferred from extension.
await .read(opts?)– rendered PCM.{ format, channel }to convert.await .save(path, opts?)– encode + write.{ at, duration }for sub-range.await .encode(format?, opts?)– encode toUint8Array.for await (let block of a)– async-iterable over blocks..clone()– deep copy, independent edits, shared pages..push(data, format?)– feed PCM into pushable instance..stop()to finalize.
let pcm = await a.read() // Float32Array[]
let raw = await a.read({ format: 'int16', channel: 0 })
await a.save('out.mp3') // format from extension
let bytes = await a.encode('flac') // Uint8Array
for await (let block of a) send(block) // stream blocks
let b = a.clone() // independent copy, shared pages
let src = audio() // pushable source
src.push(buf, 'int16') // feed PCM
src.stop() // finalizePlayback / Recording
Live playback with dB volume, seeking, looping. Mic recording via audio-mic.
.play(opts?)– start playback.{ at, duration, volume, loop }..playedpromise resolves when output starts..pause(),.resume(),.seek(t),.stop()– playback control..record(opts?)– mic recording.{ deviceId, sampleRate, channels }.
a.play({ at: 30, duration: 10 }) // play 30s–40s
await a.played // wait for output to start
a.volume = 0.5; a.loop = true // live adjustments
a.muted = true // mute without changing volume
a.pause(); a.seek(60); a.resume() // jump to 1:00
a.stop() // end playback or recording
let mic = audio()
mic.record({ sampleRate: 16000, channels: 1 })
mic.stop()Analysis
await .stat(name, opts?) — without bins returns scalar, with bins returns Float32Array. Array of names returns array of results. Sub-ranges via {at, duration}, per-channel via {channel}.
'db'– peak amplitude in dBFS.'rms'– RMS amplitude (linear).'loudness'– integrated LUFS (ITU-R BS.1770).'dc'– DC offset.'clipping'– clipped samples (scalar: timestamps, binned: counts).'silence'– silent ranges as{at, duration}.'max','min'– peak envelope (use together for waveform rendering).'spectrum'– mel-frequency spectrum in dB (A-weighted).'cepstrum'– MFCCs.
let loud = await a.stat('loudness') // LUFS
let [db, clips] = await a.stat(['db', 'clipping']) // multiple at once
let spec = await a.stat('spectrum', { bins: 128 }) // frequency bins
let peaks = await a.stat('max', { bins: 800 }) // waveform data
await a.stat('rms', { channel: 0 }) // left only → number
await a.stat('rms', { channel: [0, 1] }) // per-channel → [n, n]
let gaps = await a.stat('silence', { threshold: -40 }) // [{at, duration}, ...]Utility
Events, lifecycle, undo/redo, serialization.
.on(event, fn)/.off(event?, fn?)– subscribe / unsubscribe.'data'– pages decoded/pushed. Payload:{ delta, offset, sampleRate, channels }.'change'– any edit or undo.'metadata'– stream header decoded. Payload:{ sampleRate, channels }.'timeupdate'– playback position. Payload:currentTime.'play'– playback started or resumed.'pause'– playback paused.'volumechange'– volume or muted changed.'ended'– playback finished (not on loop).'progress'– during save/encode. Payload:{ offset, total }in seconds.
.dispose()– release resources. Supportsusingfor auto-dispose..undo(n?)– undo last edit(s). Returns edit for redo via.run()..run(...edits)– apply edit objects{ type, args, at?, duration? }. Batch or replay.
a.on('data', ({ delta }) => draw(delta)) // decode progress
a.on('timeupdate', t => ui.update(t)) // playback position
a.undo() // undo last edit
b.run(...a.edits) // replay onto another file
JSON.stringify(a); audio(json) // serialize / restorePlugins
Extend with custom ops and stats. See Plugin Tutorial.
audio.op(name, fn)– register op. Shorthand for{ process: fn }. Full descriptor:{ process, plan, resolve, call }.audio.op(name)– query descriptor.audio.op()– all ops.audio.stat(name, descriptor)– register stat. Shorthand(chs, ctx) => [...]or{ block, reduce, query }.
// op: process function receives (channels[], ctx) per 1024-sample block
audio.op('crush', (chs, ctx) => {
let steps = 2 ** (ctx.args[0] ?? 8)
return chs.map(ch => ch.map(s => Math.round(s * steps) / steps))
})
// stat: block function collects per-block, reduce enables scalar queries
audio.stat('peak', {
block: (chs) => chs.map(ch => { let m = 0; for (let s of ch) m = Math.max(m, Math.abs(s)); return m }),
reduce: (src, from, to) => { let m = 0; for (let i = from; i < to; i++) m = Math.max(m, src[i]); return m },
})
a.crush(4) // chainable like built-in ops
a.stat('peak') // → scalar from reduce
a.stat('peak', { bins: 100 }) // → binned arrayCLI
npm i -g audio
audio [file] [ops...] [-o output] [options]
# ops
eq mix pad pan crop
fade gain stat trim notch
remix speed split insert remove
repeat bandpass highpass lowpass reverse
lowshelf highshelf normalize
# options
-p play -l loop -o output -f force --formatPlayback
␣ pause · ←/→ seek ±10s · ⇧←/⇧→ seek ±60s · ↑/↓ volume ±3dB · l loop · q quit
# Play fragment of the song
audio song.mp3 10s..15s -p
# Play clip (not full song)
audio song.mp3 clip 10s..20s -p -l
# Normalize beforeEdit
# clean up
audio raw-take.wav trim -30db normalize podcast fade 0.3s -0.5s -o clean.wav
# ranges
audio in.wav gain -3db 1s..10s -o out.wav
# filter chain
audio in.mp3 highpass 80hz lowshelf 200hz -3db -o out.wav
# join
audio intro.mp3 + content.wav + outro.mp3 trim normalize fade 0.5s -2s -o ep.mp3
# voiceover
audio bg.mp3 gain -12db mix narration.wav 2s -o mixed.wav
# split
audio audiobook.mp3 split 30m 60m -o 'chapter-{i}.mp3'Analysis
# all default stats (db, rms, loudness, clipping, dc)
audio speech.wav stat
# specific stats
audio speech.wav stat loudness rms
# spectrum / cepstrum with bin count
audio speech.wav stat spectrum 128
audio speech.wav stat cepstrum 13
# stat after transforms
audio speech.wav gain -3db stat dbBatch
audio '*.wav' trim normalize podcast -o '{name}.clean.{ext}'
audio '*.wav' gain -3db -o '{name}.out.{ext}'Stdin/stdout
cat in.wav | audio gain -3db > out.wav
curl -s https://example.com/speech.mp3 | audio normalize -o clean.wav
ffmpeg -i video.mp4 -f wav - | audio trim normalize podcast > voice.wavTab completion
eval "$(audio --completions zsh)" # add to ~/.zshrc
eval "$(audio --completions bash)" # add to ~/.bashrc
audio --completions fish | source # fishFAQ
Ecosystem
- audio-decode – codec decoding (13+ formats)
- encode-audio – codec encoding
- audio-filter – filters (weighting, EQ, auditory)
- audio-speaker – audio output
- audio-mic – audio input
- audio-type – format detection
- pcm-convert – PCM format conversion
