ngrams-search
v0.0.4
Published
search by Ngram similarity. An emitation of the python NGram module
Maintainers
Readme
ngrams
nodejs module for searching by Ngram similarity of characters. An emitation of the python NGram module
basic usage
var NGrams = require('ngram-search');
var n = new NGrams() //default N=3 (size of ngram) w=1 (warp, use greater than 1 to increase the similarity of shorter string pairs)
n.add("spam"); //add single items
n.add(["span", "eg"]); //or an array of items
console.log(n.search("spa")); // second argument is optional - threshold - return only items with similarity greater than threshold. default is 0
/*
will output an array of items with similarity greater than threshold ordered by similarity
//[{
item: "spam",
similarity: 0.375
}, {
item: "span",
similarity: 0.375
}]
*/
n.getMaxNgram("spam");
/*
returns the item with the maximum ngram similarity or undefined if none
{
item: "spam",
similarity: 1.0
}
*/more usage examples
var n = new NGrams(2); //create ngrams of size 2
n.pad("word"); //returns " word " padding is of size N-1
n.split("ab");
/*
returns the ngrams of the item "ab" after padding
[
[' ', 'a'],
['a', 'b'],
['b', ' ']
]
*/
n.getSharedNgrams("abe", "abc");
/*
returns all the ngrams that both items share:
[
[' ', 'a'],
['a', 'b']
]
*/
n.getCountSharedNgrams("abe", "abc"); // returns 2
n.getStatsSharedNgrams("abe", "abc");
/*
returns
{
all: 8, //count of all ngrams in both items
same: 2, //ngrams sahred by both items
distinct: 6, //count of distinct ngrams in total
diff: 4 //count of unique ngrams - which do not appear in both items
}
*/
n.compare("abe","abc"); //third argument is warp - optional, default is 1
/*
returns 0.3333333333333333
formula is: ((distinct ^ warp)-(diff ^ warp))/(distinct^warp)
*/for more use cases look at test.js

