npm package discovery and stats viewer.

Discover Tips

  • General search

    [free text search, go nuts!]

  • Package details

    pkg:[package-name]

  • User packages

    @[username]

Sponsor

Optimize Toolset

I’ve always been into building performant and accessible sites, but lately I’ve been taking it extremely seriously. So much so that I’ve been building a tool to help me optimize and monitor the sites that I build to make sure that I’m making an attempt to offer the best experience to those who visit them. If you’re into performant, accessible and SEO friendly sites, you might like it too! You can check it out at Optimize Toolset.

About

Hi, 👋, I’m Ryan Hefner  and I built this site for me, and you! The goal of this site was to provide an easy way for me to check the stats on my npm packages, both for prioritizing issues and updates, and to give me a little kick in the pants to keep up on stuff.

As I was building it, I realized that I was actually using the tool to build the tool, and figured I might as well put this out there and hopefully others will find it to be a fast and useful way to search and browse npm packages as I have.

If you’re interested in other things I’m working on, follow me on Twitter or check out the open source projects I’ve been publishing on GitHub.

I am also working on a Twitter bot for this site to tweet the most popular, newest, random packages from npm. Please follow that account now and it will start sending out packages soon–ish.

Open Software & Tools

This site wouldn’t be possible without the immense generosity and tireless efforts from the people who make contributions to the world and share their work via open source initiatives. Thank you 🙏

© 2026 – Pkg Stats / Ryan Hefner

gather-site

v0.4.5

Published

个人垂直采集器

Readme

myGather

npm version

gather(requestConfig, parseConfig, proxyConfig)

获取规则(requestConfig)

// 和 request 相同config, 但是不支持 pipe 等函数
// headers['User-Agent'] 会自动设置
// followRedirect 默认为false

// 额外参数:
{
  encodingCheck: '是否检测gbk并转换',
  retryDelay: '当proxyConfig!==false时, 存在重试间隔, 默认为0',
  retryStrategy: '当proxyConfig!==false时, 存在重试机制, 默认为retryStrategy.all,也可以自定义函数,参数为request的返回结果中的(err, response)',
  logRetryFun: '当proxyConfig!==false时, 存在重试机制,重试时输出上次失败的信息,参数为(err, nu, url)'
}

解析规则(parseConfig)

{
  encoding: {
    isNeed: 0,
    type: 'any',
    defaultValue: 'utf8',
    optionalValue: [undefined, null, 'utf8', 'gbk'],
    description: '采集后进行的编码转换, 默认会检测gbk并转换'
  },
  mode: {
    isNeed: 0,
    type: 'String',
    defaultValue: 'json',
    optionalValue: ['css', 'text', 'json'],
    description: '采集内容进行解析, 对css进行类JQ 转换, 对text直接返回文本, 对json则进行JSON.parse'
  },
  extract_rules: [{
    name: {
      isNeed: 1,
      type: 'String',
      optionalValue: null,
      description: '对执行 expression 函数后返回的结果封装成数组(即非数组会成为长度为1的数组)进行缓存, 以供下面的规则进行使用'
    },
    expression: {
      isNeed: 1,
      type: 'Function',
      optionalValue: 'function($, cache)',
      description: 'mode: css($ === $), RegExp($ === String), json($ === obj), cache(obj)'
    }
  }]
}

代理规则(proxyConfig)

proxyConfig === false // 不设置代理
proxyConfig === undefined || proxyConfig === null; // 默认失败后重试一次

// proxyConfig
{
  // urls表示从一个网站网站获取proxy列表
  urls: [
    'full url get a json proxy list'  // 一个url, 返回内容为 [{url: 'proxy_url_1'}, {url: 'proxy_url_2'}]
  ],
  beforeProxies: '数组,默认为[null], 表示在在获取url的proxy列表之前添加一个不使用代理获取资源',
  afterProxies: '数组,表示在在获取url的proxy列表之后添加',
  time: 5 * 60 * 1000,              // urls 轮询更新 间隔
  proxies: 'proxy_url数组, 一旦设置, 上面其它设置均无效',
  name: '这个代理别名,String,只有在proxies设置的时候必须设置,用于区分这个代理的是否已经存在'
}

gather.getProxy(proxyConfig, noPromise)

var proxy = gather.getProxy(false) // 返回一个无代理的proxy

gather
  .getProxy(proxyConfig)
  .then(function(proxy){
    // 返回一个proxyConfig的proxy

    proxy.get(index); // 获取index位置的proxyUrl
  });

gather.clearProxyPool() 删除代理池中的所有代理

例子

var gather = require('gather-site');

var requestConfig = {
  url: 'http://www.xicidaili.com/nn'
};

var parseConfig = {
  mode: 'css',
  extract_rules: [{
    name: 'ipList',
    expression: function($) {
      var arr = [];
      $('#ip_list').find('tr').each(function(i, e) {
        var info = $(e).text().replace(/^\s+|\s+$/g, '').split(/\s+/);
        if (info.length < 2 || !/\d+/.test(info[1])) {
          return;
        }
        arr.push({
          url: 'http://' + info[0] + ':' + info[1],
          type: 'nn',
          data: info
        });
      });
      return arr;
    }
  }]
};

var proxyConfig = false;

gather(requestConfig, parseConfig, proxyConfig)
  .then(function(data) {
    console.log(data);
  })
  .catch(function(e) {

  });

许可证

MIT