cheerio-mt
v1.0.0
Published
Multi-threaded Cheerio wrapper
Downloads
16
Readme
cheerio-mt
Multi-threaded Cheerio wrapper
Installation
npm install cheerio-mt
# or
pnpm install cheerio-mtUsage
Process multiple HTML documents in parallel
import { map } from 'cheerio-mt';
const documents = [
'<html><body><h1>Page 1</h1></body></html>',
'<html><body><h1>Page 2</h1></body></html>',
'<html><body><h1>Page 3</h1></body></html>',
];
const results = await map(documents, ($) => {
return $('h1').text();
});
console.log(results); Extract data from multiple pages
import { map } from 'cheerio-mt';
const documents = [
'<html><body><h1>Page 1</h1></body></html>',
'<html><body><h1>Page 2</h1></body></html>',
'<html><body><h1>Page 3</h1></body></html>',
];
const results = await map(documents, ($) => {
return {
title: $('h1').text(),
links: $('a').map((i, el) => $(el).attr('href')).get(),
paragraphs: $('p').length
};
});Control concurrency
import { mapLimit } from 'cheerio-mt';
const documents = [
'<html><body><h1>Page 1</h1></body></html>',
'<html><body><h1>Page 2</h1></body></html>',
'<html><body><h1>Page 3</h1></body></html>',
];
const results = await mapLimit(documents, ($) => {
return $('h1').text();
}, {
concurrency: 4
});Process a single document
import { process } from 'cheerio-mt';
const html = '<html><body><h1>Hello</h1></body></html>';
const result = await process(html, ($) => {
return {
title: $('h1').text(),
hasBody: $('body').length > 0
};
});Custom instance
import { create } from 'cheerio-mt';
const cheerio = create({
maxWorkers: 4
});
const results = await cheerio.map(documents, ($) => {
return $('h1').text();
});