nlcst-parse-english
v1.4.0
Published
Parse english with POS and output NLCST.
Readme
nlcst-parse-english
Parse english with Part-of-speech(POS) tagged and output NLCST.
Install
Install with npm:
npm install nlcst-parse-englishPart-of-speech(POS)
WordNode has { data: { pos: Annotation } }.
This library use parse-english + FinNLP/en-pos.
Annotation | Name | Example
--- | --- | ---
NN | Noun | dog man
NNS | Plural noun | dogs men
NNP | Proper noun | London Alex
NNPS | Plural proper noun | Smiths
VB | Base form verb | be
VBP | Present form verb | throw
VBZ | Present form (3rd person) | throws
VBG | Gerund form verb | throwing
VBD | Past tense verb | threw
VBN | Past participle verb | thrown
MD | Modal verb | can shall will may must ought
JJ | Adjective | big fast
JJR | Comparative adjective | bigger
JJS | Superlative adjective | biggest
RB | Adverb | not quickly closely
RBR | Comparative adverb | less-closely faster
RBS | Superlative adverb | fastest
DT | Determiner | the a some both
PDT | Predeterminer | all quite
PRP | Personal Pronoun | I you he she
PRP$ | Possessive Pronoun | I you he she
POS | Possessive ending | 's
IN | Preposition | of by in
PR | Particle | up off
TO | to | to
WDT | Wh-determiner | which that whatever whichever
WP | Wh-pronoun | who whoever whom what
WP$ | Wh-possessive | whose
WRB | Wh-adverb | how where
EX | Expletive there | there
CC | Coordinating conjugation | & and nor or
CD | Cardinal Numbers | 1 7 77 one
LS | List item marker | 1 B C One
UH | Interjection | ah oh oops
FW | Foreign Words | viva mon toujours
, | Comma | ,
: |Mid-sent punct | : ; ...
. | Sent-final punct. | . ! ?
( | Left parenthesis | ) } ]
) | Right parenthesis | ( { [
# | Pound sign | #
$ | Currency symbols | $ € £ ¥
SYM | Other symbols | + * / < >
EM | Emojis & emoticons | :) ❤
Usage
const parser = new EnglishParser();
const CST = parser.parse("Mr. Henry Brown: A hapless but friendly City of London worker.");
assert.deepEqual(CST, {
"type": "RootNode",
"children": [
{
"type": "ParagraphNode",
"children": [
{
"type": "SentenceNode",
"children": [
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "Mr",
"position": {
"start": {
"line": 1,
"column": 1,
"offset": 0
},
"end": {
"line": 1,
"column": 3,
"offset": 2
}
}
},
{
"type": "PunctuationNode",
"value": ".",
"position": {
"start": {
"line": 1,
"column": 3,
"offset": 2
},
"end": {
"line": 1,
"column": 4,
"offset": 3
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 1,
"offset": 0
},
"end": {
"line": 1,
"column": 4,
"offset": 3
}
},
"data": {
"pos": "NNP"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 4,
"offset": 3
},
"end": {
"line": 1,
"column": 5,
"offset": 4
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "Henry",
"position": {
"start": {
"line": 1,
"column": 5,
"offset": 4
},
"end": {
"line": 1,
"column": 10,
"offset": 9
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 5,
"offset": 4
},
"end": {
"line": 1,
"column": 10,
"offset": 9
}
},
"data": {
"pos": "NNP"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 10,
"offset": 9
},
"end": {
"line": 1,
"column": 11,
"offset": 10
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "Brown",
"position": {
"start": {
"line": 1,
"column": 11,
"offset": 10
},
"end": {
"line": 1,
"column": 16,
"offset": 15
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 11,
"offset": 10
},
"end": {
"line": 1,
"column": 16,
"offset": 15
}
},
"data": {
"pos": "NNP"
}
},
{
"type": "PunctuationNode",
"value": ":",
"position": {
"start": {
"line": 1,
"column": 16,
"offset": 15
},
"end": {
"line": 1,
"column": 17,
"offset": 16
}
},
"data": {
"pos": ":"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 17,
"offset": 16
},
"end": {
"line": 1,
"column": 18,
"offset": 17
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "A",
"position": {
"start": {
"line": 1,
"column": 18,
"offset": 17
},
"end": {
"line": 1,
"column": 19,
"offset": 18
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 18,
"offset": 17
},
"end": {
"line": 1,
"column": 19,
"offset": 18
}
},
"data": {
"pos": "DT"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 19,
"offset": 18
},
"end": {
"line": 1,
"column": 20,
"offset": 19
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "hapless",
"position": {
"start": {
"line": 1,
"column": 20,
"offset": 19
},
"end": {
"line": 1,
"column": 27,
"offset": 26
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 20,
"offset": 19
},
"end": {
"line": 1,
"column": 27,
"offset": 26
}
},
"data": {
"pos": "JJ"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 27,
"offset": 26
},
"end": {
"line": 1,
"column": 28,
"offset": 27
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "but",
"position": {
"start": {
"line": 1,
"column": 28,
"offset": 27
},
"end": {
"line": 1,
"column": 31,
"offset": 30
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 28,
"offset": 27
},
"end": {
"line": 1,
"column": 31,
"offset": 30
}
},
"data": {
"pos": "CC"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 31,
"offset": 30
},
"end": {
"line": 1,
"column": 32,
"offset": 31
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "friendly",
"position": {
"start": {
"line": 1,
"column": 32,
"offset": 31
},
"end": {
"line": 1,
"column": 40,
"offset": 39
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 32,
"offset": 31
},
"end": {
"line": 1,
"column": 40,
"offset": 39
}
},
"data": {
"pos": "JJ"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 40,
"offset": 39
},
"end": {
"line": 1,
"column": 41,
"offset": 40
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "City",
"position": {
"start": {
"line": 1,
"column": 41,
"offset": 40
},
"end": {
"line": 1,
"column": 45,
"offset": 44
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 41,
"offset": 40
},
"end": {
"line": 1,
"column": 45,
"offset": 44
}
},
"data": {
"pos": "NNP"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 45,
"offset": 44
},
"end": {
"line": 1,
"column": 46,
"offset": 45
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "of",
"position": {
"start": {
"line": 1,
"column": 46,
"offset": 45
},
"end": {
"line": 1,
"column": 48,
"offset": 47
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 46,
"offset": 45
},
"end": {
"line": 1,
"column": 48,
"offset": 47
}
},
"data": {
"pos": "IN"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 48,
"offset": 47
},
"end": {
"line": 1,
"column": 49,
"offset": 48
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "London",
"position": {
"start": {
"line": 1,
"column": 49,
"offset": 48
},
"end": {
"line": 1,
"column": 55,
"offset": 54
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 49,
"offset": 48
},
"end": {
"line": 1,
"column": 55,
"offset": 54
}
},
"data": {
"pos": "NNP"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 55,
"offset": 54
},
"end": {
"line": 1,
"column": 56,
"offset": 55
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "worker",
"position": {
"start": {
"line": 1,
"column": 56,
"offset": 55
},
"end": {
"line": 1,
"column": 62,
"offset": 61
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 56,
"offset": 55
},
"end": {
"line": 1,
"column": 62,
"offset": 61
}
},
"data": {
"pos": "NN"
}
},
{
"type": "PunctuationNode",
"value": ".",
"position": {
"start": {
"line": 1,
"column": 62,
"offset": 61
},
"end": {
"line": 1,
"column": 63,
"offset": 62
}
},
"data": {
"pos": "."
}
}
],
"position": {
"start": {
"line": 1,
"column": 1,
"offset": 0
},
"end": {
"line": 1,
"column": 63,
"offset": 62
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 1,
"offset": 0
},
"end": {
"line": 1,
"column": 63,
"offset": 62
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 1,
"offset": 0
},
"end": {
"line": 1,
"column": 63,
"offset": 62
}
}
});
Changelog
See Releases page.
Running tests
Install devDependencies and Run npm test:
npm i -d && npm testContributing
Pull requests and stars are always welcome.
For bugs and feature requests, please create an issue.
- Fork it!
- Create your feature branch:
git checkout -b my-new-feature - Commit your changes:
git commit -am 'Add some feature' - Push to the branch:
git push origin my-new-feature - Submit a pull request :D
Author
License
MIT © azu
