@counterrealist/simhash-wasm
v0.1.0
Published
SimHash implementation for detecting near-duplicate text using SipHash-2- function
Readme
SimHash (WASM)
SimHash implementation for detecting near-duplicate text using the SipHash-2-4 function
Installation
npm install @counterrealis/simhash-wasmUsage
const { SimHash } = require("@counterrealis/simhash-wasm");
const simhash = new SimHash(3);
// Text to compare
const text1 = "khan academy";
const text2 = "khan academia";
// Compute BigInteger hashes
const bigIntHash1: BigInteger = simhash.compute(text1); // 182883240033146189889226648883436234289n
const bigIntHash2: BigInteger = simhash.compute(text2); // 188200070891594117632711953576407656125n
// Calculate similarity between BigInteger hashes
const bigIntSimilarity: number = simhash.similarity(bigIntHash1, bigIntHash2); // 0.8203125
// Compute hexadecimal hashes
const hexHash1: string = simhash.compute_hex(text1); // "899607e4844c4236a88584c4ca58a631"
const hexHash2: string = simhash.compute_hex(text2); // "8d9603e494480e34e8e7a5edfa58e6bd"
// Calculate similarity between hexadecimal hashes
const hexSimilarity: number = simhash.similarity_from_hex(hexHash1, hexHash2); // 0.8203125
// Free WebAssembly memory when done
simhash.free();Note: The expected similarity for this example in another SimHash implementation is 0.890625.
