From cd9192f7d129a9629ae83908afd119321ea9b8a5 Mon Sep 17 00:00:00 2001 From: tomsideguide Date: Wed, 3 Jun 2026 14:17:49 -0400 Subject: [PATCH] chore(captain): update Firecrawl to use the v2 API (#14624) ## Description Migrates Firecrawl from the v1 to the v2 API. `Captain::Tools::FirecrawlService` now targets `api.firecrawl.dev/v2`, with the request body updated to match the v2 schema. > Disclosure: I work at Firecrawl. Fixes # (n/a) ## Type of change - [x] Bug fix (non-breaking change which fixes an issue) ## How Has This Been Tested? Updated `spec/enterprise/services/captain/tools/firecrawl_service_spec.rb` to assert the v2 endpoint and request body. ## Checklist: - [x] My code follows the style guidelines of this project - [x] I have performed a self-review of my code - [x] My changes generate no new warnings - [x] I have added tests that prove my fix is effective or that my feature works - [x] New and existing unit tests pass locally with my changes --------- Co-authored-by: Aakash Bakhle <48802744+aakashb95@users.noreply.github.com> --- .../captain/tools/firecrawl_service.rb | 11 +++++---- .../captain/tools/firecrawl_service_spec.rb | 23 ++++++++++--------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/enterprise/app/services/captain/tools/firecrawl_service.rb b/enterprise/app/services/captain/tools/firecrawl_service.rb index bee7219e89b..6797634a20e 100644 --- a/enterprise/app/services/captain/tools/firecrawl_service.rb +++ b/enterprise/app/services/captain/tools/firecrawl_service.rb @@ -1,5 +1,5 @@ class Captain::Tools::FirecrawlService - BASE_URL = 'https://api.firecrawl.dev/v1'.freeze + BASE_URL = 'https://api.firecrawl.dev/v2'.freeze FIRECRAWL_EXCLUDE_TAGS = %w[iframe .sidebar .cookie-banner [role=navigation] [role=banner] [role=contentinfo]].freeze def self.configured? @@ -35,10 +35,10 @@ class Captain::Tools::FirecrawlService def crawl_payload(url, webhook_url, crawl_limit) { url: url, - maxDepth: 50, - ignoreSitemap: false, + maxDiscoveryDepth: 50, + sitemap: 'include', limit: crawl_limit, - webhook: webhook_url, + webhook: { url: webhook_url }, scrapeOptions: scrape_options }.to_json end @@ -51,7 +51,8 @@ class Captain::Tools::FirecrawlService { onlyMainContent: true, formats: ['markdown'], - excludeTags: FIRECRAWL_EXCLUDE_TAGS + excludeTags: FIRECRAWL_EXCLUDE_TAGS, + maxAge: 0 } end diff --git a/spec/enterprise/services/captain/tools/firecrawl_service_spec.rb b/spec/enterprise/services/captain/tools/firecrawl_service_spec.rb index 4d4bc7aaf8e..9a099fc67bb 100644 --- a/spec/enterprise/services/captain/tools/firecrawl_service_spec.rb +++ b/spec/enterprise/services/captain/tools/firecrawl_service_spec.rb @@ -53,14 +53,15 @@ RSpec.describe Captain::Tools::FirecrawlService do let(:expected_payload) do { url: url, - maxDepth: 50, - ignoreSitemap: false, + maxDiscoveryDepth: 50, + sitemap: 'include', limit: crawl_limit, - webhook: webhook_url, + webhook: { url: webhook_url }, scrapeOptions: { onlyMainContent: true, formats: ['markdown'], - excludeTags: Captain::Tools::FirecrawlService::FIRECRAWL_EXCLUDE_TAGS + excludeTags: Captain::Tools::FirecrawlService::FIRECRAWL_EXCLUDE_TAGS, + maxAge: 0 } }.to_json end @@ -74,7 +75,7 @@ RSpec.describe Captain::Tools::FirecrawlService do context 'when the API call is successful' do before do - stub_request(:post, 'https://api.firecrawl.dev/v1/crawl') + stub_request(:post, 'https://api.firecrawl.dev/v2/crawl') .with( body: expected_payload, headers: expected_headers @@ -85,7 +86,7 @@ RSpec.describe Captain::Tools::FirecrawlService do it 'makes a POST request with correct parameters' do service.perform(url, webhook_url, crawl_limit) - expect(WebMock).to have_requested(:post, 'https://api.firecrawl.dev/v1/crawl') + expect(WebMock).to have_requested(:post, 'https://api.firecrawl.dev/v2/crawl') .with( body: expected_payload, headers: expected_headers @@ -95,7 +96,7 @@ RSpec.describe Captain::Tools::FirecrawlService do it 'uses default crawl limit when not specified' do default_payload = expected_payload.gsub(crawl_limit.to_s, '10') - stub_request(:post, 'https://api.firecrawl.dev/v1/crawl') + stub_request(:post, 'https://api.firecrawl.dev/v2/crawl') .with( body: default_payload, headers: expected_headers @@ -104,7 +105,7 @@ RSpec.describe Captain::Tools::FirecrawlService do service.perform(url, webhook_url) - expect(WebMock).to have_requested(:post, 'https://api.firecrawl.dev/v1/crawl') + expect(WebMock).to have_requested(:post, 'https://api.firecrawl.dev/v2/crawl') .with( body: default_payload, headers: expected_headers @@ -114,7 +115,7 @@ RSpec.describe Captain::Tools::FirecrawlService do context 'when the API call fails' do before do - stub_request(:post, 'https://api.firecrawl.dev/v1/crawl') + stub_request(:post, 'https://api.firecrawl.dev/v2/crawl') .to_raise(StandardError.new('Connection failed')) end @@ -126,14 +127,14 @@ RSpec.describe Captain::Tools::FirecrawlService do context 'when the API returns an error response' do before do - stub_request(:post, 'https://api.firecrawl.dev/v1/crawl') + stub_request(:post, 'https://api.firecrawl.dev/v2/crawl') .to_return(status: 422, body: '{"error": "Invalid URL"}') end it 'makes the request but does not raise an error' do expect { service.perform(url, webhook_url, crawl_limit) }.not_to raise_error - expect(WebMock).to have_requested(:post, 'https://api.firecrawl.dev/v1/crawl') + expect(WebMock).to have_requested(:post, 'https://api.firecrawl.dev/v2/crawl') .with( body: expected_payload, headers: expected_headers