diff --git a/db/migrate/20260525093000_change_captain_document_external_link_to_text.rb b/db/migrate/20260525093000_change_captain_document_external_link_to_text.rb new file mode 100644 index 00000000000..50cee2b0df9 --- /dev/null +++ b/db/migrate/20260525093000_change_captain_document_external_link_to_text.rb @@ -0,0 +1,16 @@ +class ChangeCaptainDocumentExternalLinkToText < ActiveRecord::Migration[7.0] + OLD_INDEX_NAME = 'index_captain_documents_on_assistant_id_and_external_link'.freeze + NEW_INDEX_NAME = 'idx_captain_documents_on_assistant_id_and_external_link_md5'.freeze + + def up + remove_index :captain_documents, name: OLD_INDEX_NAME, if_exists: true + change_column :captain_documents, :external_link, :text, null: false + add_index :captain_documents, 'assistant_id, md5(external_link)', unique: true, name: NEW_INDEX_NAME, if_not_exists: true + end + + def down + remove_index :captain_documents, name: NEW_INDEX_NAME, if_exists: true + change_column :captain_documents, :external_link, :string, null: false + add_index :captain_documents, [:assistant_id, :external_link], unique: true, name: OLD_INDEX_NAME, if_not_exists: true + end +end diff --git a/db/schema.rb b/db/schema.rb index 9d9fe3cbc95..f2c47957137 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.1].define(version: 2026_05_15_000000) do +ActiveRecord::Schema[7.1].define(version: 2026_05_25_093000) do # These extensions should be enabled to support this database enable_extension "pg_stat_statements" enable_extension "pg_trgm" @@ -370,7 +370,7 @@ ActiveRecord::Schema[7.1].define(version: 2026_05_15_000000) do create_table "captain_documents", force: :cascade do |t| t.string "name" - t.string "external_link", null: false + t.text "external_link", null: false t.text "content" t.bigint "assistant_id", null: false t.bigint "account_id", null: false @@ -381,10 +381,10 @@ ActiveRecord::Schema[7.1].define(version: 2026_05_15_000000) do t.integer "sync_status" t.datetime "last_synced_at" t.datetime "last_sync_attempted_at" + t.index "assistant_id, md5(external_link)", name: "idx_captain_documents_on_assistant_id_and_external_link_md5", unique: true t.index ["account_id", "assistant_id", "sync_status", "last_synced_at"], name: "idx_captain_documents_on_account_assistant_sync_stats" t.index ["account_id", "sync_status"], name: "index_captain_documents_on_account_id_and_sync_status" t.index ["account_id"], name: "index_captain_documents_on_account_id" - t.index ["assistant_id", "external_link"], name: "index_captain_documents_on_assistant_id_and_external_link", unique: true t.index ["assistant_id"], name: "index_captain_documents_on_assistant_id" t.index ["status"], name: "index_captain_documents_on_status" end diff --git a/enterprise/app/models/captain/document.rb b/enterprise/app/models/captain/document.rb index 0fe14813bd4..6eda0eb5ae2 100644 --- a/enterprise/app/models/captain/document.rb +++ b/enterprise/app/models/captain/document.rb @@ -5,7 +5,7 @@ # id :bigint not null, primary key # content :text # content_fingerprint :string -# external_link :string not null +# external_link :text not null # last_sync_attempted_at :datetime # last_sync_error_code :string # last_synced_at :datetime @@ -20,12 +20,12 @@ # # Indexes # -# idx_captain_documents_on_account_assistant_sync_stats (account_id,assistant_id,sync_status,last_synced_at) -# index_captain_documents_on_account_id (account_id) -# index_captain_documents_on_account_id_and_sync_status (account_id,sync_status) -# index_captain_documents_on_assistant_id (assistant_id) -# index_captain_documents_on_assistant_id_and_external_link (assistant_id,external_link) UNIQUE -# index_captain_documents_on_status (status) +# idx_captain_documents_on_account_assistant_sync_stats (account_id,assistant_id,sync_status,last_synced_at) +# idx_captain_documents_on_assistant_id_and_external_link_md5 (assistant_id, md5(external_link)) UNIQUE +# index_captain_documents_on_account_id (account_id) +# index_captain_documents_on_account_id_and_sync_status (account_id,sync_status) +# index_captain_documents_on_assistant_id (assistant_id) +# index_captain_documents_on_status (status) # class Captain::Document < ApplicationRecord class LimitExceededError < StandardError; end diff --git a/spec/enterprise/jobs/captain/tools/firecrawl_parser_job_spec.rb b/spec/enterprise/jobs/captain/tools/firecrawl_parser_job_spec.rb index e12efc54bd0..6aed603855d 100644 --- a/spec/enterprise/jobs/captain/tools/firecrawl_parser_job_spec.rb +++ b/spec/enterprise/jobs/captain/tools/firecrawl_parser_job_spec.rb @@ -61,6 +61,16 @@ RSpec.describe Captain::Tools::FirecrawlParserJob, type: :job do end end + it 'stores external links longer than 255 characters' do + long_url = "https://example.com/#{'arabic-product-slug-' * 300}" + payload[:metadata]['url'] = long_url + + described_class.perform_now(assistant_id: assistant.id, payload: payload) + + expect(assistant.documents.last.external_link).to eq(long_url) + expect(assistant.documents.last.external_link.length).to be > 255 + end + context 'when an error occurs' do it 'raises an error with a descriptive message' do allow(Captain::Assistant).to receive(:find).and_raise(ActiveRecord::RecordNotFound)