Firecrawl Integration¶
Official Firecrawl API Documentation
Firecrawl provides web scraping and crawling capabilities. Use KeyPool to call Firecrawl with your team token and a stable base URL.
KeyPool Endpoint for Firecrawl¶
Due to how firecrawl-py and @mendable/firecrawl-js SDKs handle base URLs with subpaths, configure the SDKs to point directly to your KeyPool base URL and include the x-keypool-service: firecrawl header.
{YOUR_KEYPOOL_BASE_URL}
Use the x-keypool-service: firecrawl header with this base URL.
Authentication¶
Use your KeyPool team token as a bearer token when making requests to the KeyPool Firecrawl endpoint.
Do not send a personal Firecrawl API key when using KeyPool.
Key Features and Usage¶
KeyPool supports the main Firecrawl SDK workflows, including scraping web pages, mapping URLs, and managing crawl jobs.
Special Note on SDK Configuration¶
The firecrawl-py and @mendable/firecrawl-js v2+ SDKs use absolute paths for their API endpoints (e.g., /v2/scrape). When these absolute paths are joined with a base URL that includes a subpath (like {YOUR_KEYPOOL_BASE_URL}/v1/firecrawl), the subpath is stripped by the SDK's internal URL handling.
To work around this, configure your SDK with {YOUR_KEYPOOL_BASE_URL} as the api_url (Python) or apiUrl (TypeScript), and send x-keypool-service: firecrawl. The client wrappers in the SDK examples below add this header.
1. Scrape Web Pages¶
Extract content from a specific URL.
Python Example:
import os
from firecrawl import Firecrawl
def _make_keypool_client(keypool_base_url: str, keypool_token: str) -> Firecrawl:
client = Firecrawl(
api_key=keypool_token,
api_url=keypool_base_url,
)
# Add the service header required for this SDK base URL shape.
original_prepare = client._v2_client.http_client._prepare_headers
def _patched_prepare(idempotency_key=None):
h = original_prepare(idempotency_key)
h['x-keypool-service'] = 'firecrawl'
return h
client._v2_client.http_client._prepare_headers = _patched_prepare
return client
# Replace with your KeyPool base URL and Team Token
KEYPOOL_BASE_URL = os.environ.get("KEYPOOL_BASE_URL")
KEYPOOL_TOKEN = os.environ.get("KEYPOOL_TOKEN")
client = _make_keypool_client(KEYPOOL_BASE_URL, KEYPOOL_TOKEN)
result_markdown = client.scrape("https://www.google.com", formats=["markdown"])
print(f"Scraped Markdown (first 500 chars):\n{result_markdown.markdown[:500]}...")
result_html = client.scrape("https://www.google.com", formats=["html"])
print(f"Scraped HTML (first 500 chars):\n{result_html.html[:500]}...")
TypeScript Example:
import Firecrawl from "@mendable/firecrawl-js";
function makeKeypoolFirecrawlClient(baseUrl: string, token: string): Firecrawl {
return new Firecrawl({
apiKey: token,
apiUrl: baseUrl, // SDK URL handling strips subpaths, so include x-keypool-service.
});
}
// Replace with your KeyPool base URL and Team Token
const KEYPOOL_BASE_URL = process.env.KEYPOOL_BASE_URL;
const KEYPOOL_TOKEN = process.env.KEYPOOL_TOKEN;
const client = makeKeypoolFirecrawlClient(KEYPOOL_BASE_URL, KEYPOOL_TOKEN);
async function scrapeWebPage() {
const resultMarkdown = await client.scrape("https://www.google.com", {
formats: ["markdown"],
});
console.log(`Scraped Markdown (first 500 chars):\n${resultMarkdown.markdown?.substring(0, 500)}...`);
const resultHtml = await client.scrape("https://www.google.com", {
formats: ["html"],
});
console.log(`Scraped HTML (first 500 chars):\n${resultHtml.html?.substring(0, 500)}...`);
}
scrapeWebPage();
2. Map URLs¶
Get a sitemap-like overview of links on a given URL.
Python Example:
import os
from firecrawl import Firecrawl
# Re-use the _make_keypool_client function from above
def _make_keypool_client(keypool_base_url: str, keypool_token: str) -> Firecrawl:
client = Firecrawl(
api_key=keypool_token,
api_url=keypool_base_url,
)
# Add the service header required for this SDK base URL shape.
original_prepare = client._v2_client.http_client._prepare_headers
def _patched_prepare(idempotency_key=None):
h = original_prepare(idempotency_key)
h['x-keypool-service'] = 'firecrawl'
return h
client._v2_client.http_client._prepare_headers = _patched_prepare
return client
KEYPOOL_BASE_URL = os.environ.get("KEYPOOL_BASE_URL")
KEYPOOL_TOKEN = os.environ.get("KEYPOOL_TOKEN")
client = _make_keypool_client(KEYPOOL_BASE_URL, KEYPOOL_TOKEN)
result = client.map("https://docs.google.com/document/d/1yD6jX3sO2u6_k9K1gN-L7E8c5jW4B5_f3f9F5-F5W4")
if result.links:
print(f"Mapped {len(result.links)} links. First 5:")
for link in result.links[:5]:
print(f"- {link.url}")
TypeScript Example:
import Firecrawl from "@mendable/firecrawl-js";
// Re-use the makeKeypoolFirecrawlClient function from above
function makeKeypoolFirecrawlClient(baseUrl: string, token: string): Firecrawl {
return new Firecrawl({
apiKey: token,
apiUrl: baseUrl,
});
}
const KEYPOOL_BASE_URL = process.env.KEYPOOL_BASE_URL;
const KEYPOOL_TOKEN = process.env.KEYPOOL_TOKEN;
const client = makeKeypoolFirecrawlClient(KEYPOOL_BASE_URL, KEYPOOL_TOKEN);
async function mapUrls() {
const result = await client.map("https://docs.google.com/document/d/1yD6jX3sO2u6_k9K1gN-L7E8c5jW4B5_f3f9F5-F5W4");
if (result.links) {
console.log(`Mapped ${result.links.length} links. First 5:`);
result.links.slice(0, 5).forEach((link) => {
console.log(`- ${link.url}`);
});
}
}
mapUrls();
3. Crawl Session Affinity¶
For long-running operations like web crawls (startCrawl, getCrawlStatus), keep the returned crawl job ID and continue status checks through the same KeyPool base URL and team token.
Python Example:
import os
import time
from firecrawl import Firecrawl
# Re-use the _make_keypool_client function from above
def _make_keypool_client(keypool_base_url: str, keypool_token: str) -> Firecrawl:
client = Firecrawl(
api_key=keypool_token,
api_url=keypool_base_url,
)
# Add the service header required for this SDK base URL shape.
original_prepare = client._v2_client.http_client._prepare_headers
def _patched_prepare(idempotency_key=None):
h = original_prepare(idempotency_key)
h['x-keypool-service'] = 'firecrawl'
return h
client._v2_client.http_client._prepare_headers = _patched_prepare
return client
KEYPOOL_BASE_URL = os.environ.get("KEYPOOL_BASE_URL")
KEYPOOL_TOKEN = os.environ.get("KEYPOOL_TOKEN")
client = _make_keypool_client(KEYPOOL_BASE_URL, KEYPOOL_TOKEN)
# 1. Start a crawl job
job = client.start_crawl("https://www.google.com", limit=1)
print(f"Started crawl job with ID: {job.id}")
# 2. Check the status of the crawl job (KeyPool maintains session affinity)
# In a real application, you would poll this until completion.
status = client.get_crawl_status(job.id)
print(f"Crawl job status: {status.status}")
TypeScript Example:
import Firecrawl from "@mendable/firecrawl-js";
// Re-use the makeKeypoolFirecrawlClient function from above
function makeKeypoolFirecrawlClient(baseUrl: string, token: string): Firecrawl {
return new Firecrawl({
apiKey: token,
apiUrl: baseUrl,
});
}
const KEYPOOL_BASE_URL = process.env.KEYPOOL_BASE_URL;
const KEYPOOL_TOKEN = process.env.KEYPOOL_TOKEN;
const client = makeKeypoolFirecrawlClient(KEYPOOL_BASE_URL, KEYPOOL_TOKEN);
async function crawlSessionAffinity() {
// 1. Start a crawl job
const job = await client.startCrawl("https://www.google.com", { limit: 1 });
console.log(`Started crawl job with ID: ${job.id}`);
// 2. Check the status of the crawl job (KeyPool maintains session affinity)
// In a real application, you would poll this until completion.
const status = await client.getCrawlStatus(job.id);
console.log(`Crawl job status: ${status.status}`);
}
crawlSessionAffinity();
Interactive API Reference¶
For all Firecrawl endpoints and in-browser testing, see API Reference → Firecrawl.