@ducksguse/scraper-sdk
v1.1.0
Published
TypeScript SDK for Scraper Microservice - Server-side only
Maintainers
Readme
🦆 Scraper SDK
TypeScript SDK for Scraper Microservice - Server-side only
🚀 Quick Start
Installation
npm install @ducksguse/scraper-sdk
# or
yarn add @ducksguse/scraper-sdkBasic Usage (Next.js API Route)
// pages/api/scrape.ts или app/api/scrape/route.ts
import { ScraperClient } from '@ducksguse/scraper-sdk';
const scraper = new ScraperClient({
baseUrl: 'URL',
apiKey: process.env.SCRAPER_API_KEY!, // From .env.local
});
export default async function handler(req, res) {
try {
// Smart scraping
const result = await scraper.smartScrape(
'https://example.com',
'Extract title and price'
);
res.json({
success: true,
data: result.result,
tokens: result.token_usage,
executionTime: result.execution_time_seconds
});
} catch (error) {
res.status(500).json({ error: error.message });
}
}🛡️ Security Features
- Server-side only - Automatically blocks browser usage
- Environment detection - Requires Node.js environment
- API key protection - Never exposed to client-side
📚 Available Methods
Basic Scrapers
// Extract specific data
await scraper.smartScrape(url, prompt);
// Universal page analysis
await scraper.omniScrape(url, prompt);
// Search the web
await scraper.search(query);
// Deep web research
await scraper.omniSearch(query);
// Crawl entire website (up to 10 pages)
await scraper.depthSearch(url, prompt, {
max_pages: 5,
max_depth: 2
});Specialized Scrapers
// Extract tables/CSV data
await scraper.csvScrape(url, prompt);
// Extract JSON data
await scraper.jsonScrape(url, prompt);
// Take screenshot
await scraper.screenshot(url);
// Convert to Markdown
await scraper.markdownify(url);Advanced Usage
// With custom options
const result = await scraper.smartScrape(url, prompt, {
priority: 9, // High priority
callback_url: 'https://your-app.com/webhook'
});
// Async task management
const task = await scraper.createTask({
url: 'https://example.com',
prompt: 'Extract data',
task_type: 'smart_scraper'
});
// Poll for result
const result = await scraper.waitForTask(task.task_id, {
timeout: 300, // 5 minutes
pollInterval: 3 // Check every 3 seconds
});🌐 Next.js Examples
App Router (app/api)
// app/api/scrape/route.ts
import { ScraperClient } from '@ducksguse/scraper-sdk';
import { NextRequest, NextResponse } from 'next/server';
const scraper = new ScraperClient({
baseUrl: process.env.SCRAPER_BASE_URL!,
apiKey: process.env.SCRAPER_API_KEY!,
});
export async function POST(request: NextRequest) {
try {
const { url, prompt } = await request.json();
const result = await scraper.smartScrape(url, prompt);
return NextResponse.json({
success: true,
data: result.result,
metadata: {
tokens: result.token_usage,
executionTime: result.execution_time_seconds,
model: result.model_used
}
});
} catch (error) {
return NextResponse.json(
{ error: error.message },
{ status: 500 }
);
}
}Pages Router (pages/api)
// pages/api/scrape/[type].ts
import type { NextApiRequest, NextApiResponse } from 'next';
import { ScraperClient } from '@ducksguse/scraper-sdk';
const scraper = new ScraperClient({
baseUrl: process.env.SCRAPER_BASE_URL!,
apiKey: process.env.SCRAPER_API_KEY!,
});
export default async function handler(
req: NextApiRequest,
res: NextApiResponse
) {
const { type } = req.query;
const { url, prompt } = req.body;
try {
let result;
switch (type) {
case 'smart':
result = await scraper.smartScrape(url, prompt);
break;
case 'search':
result = await scraper.search(prompt);
break;
case 'screenshot':
result = await scraper.screenshot(url);
break;
default:
return res.status(400).json({ error: 'Invalid scraper type' });
}
res.json({ success: true, data: result });
} catch (error) {
res.status(500).json({ error: error.message });
}
}Server Actions (Next.js 13+)
// app/actions/scrape.ts
'use server';
import { ScraperClient } from '@ducksguse/scraper-sdk';
const scraper = new ScraperClient({
baseUrl: process.env.SCRAPER_BASE_URL!,
apiKey: process.env.SCRAPER_API_KEY!,
});
export async function scrapeAction(url: string, prompt: string) {
try {
const result = await scraper.smartScrape(url, prompt);
return { success: true, data: result };
} catch (error) {
return { success: false, error: error.message };
}
}💰 Token Usage & Cost Tracking
const result = await scraper.smartScrape(url, prompt);
// Access token usage
console.log('Token usage:', result.token_usage);
// {
// prompt_tokens: 150,
// completion_tokens: 75,
// total_tokens: 225
// }
// Calculate cost (your logic)
const cost = calculateCost(result.token_usage, result.model_used);⚙️ Environment Variables
# .env.local
SCRAPER_BASE_URL=URL
SCRAPER_API_KEY=your-api-key-here🔧 Configuration Options
const scraper = new ScraperClient({
baseUrl: 'URL',
apiKey: 'your-api-key',
timeout: 30000, // Request timeout (ms)
retries: 3, // Retry failed requests
defaultPriority: 5 // Default task priority (1-9)
});📊 Error Handling
import { ScraperError, ScraperTimeoutError } from '@your-company/scraper-sdk';
try {
const result = await scraper.smartScrape(url, prompt);
} catch (error) {
if (error instanceof ScraperTimeoutError) {
console.log('Task timed out:', error.message);
} else if (error instanceof ScraperError) {
console.log('Scraper error:', error.message, error.statusCode);
} else {
console.log('Unknown error:', error.message);
}
}🚫 What NOT to do
// ❌ DON'T use in browser/client components
'use client'; // This will throw an error!
import { ScraperClient } from '@ducksguse/scraper-sdk';
export default function ClientComponent() {
const scraper = new ScraperClient({ ... }); // ERROR!
}📞 Support
For questions and support, contact your development team.
