Ingestion of remote collections (#38144)

This commit is contained in:
David Roetzel 2026-03-11 15:29:00 +01:00 committed by GitHub
parent 4a08ab64d1
commit 3091e2e525
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 203 additions and 5 deletions

View File

@ -69,3 +69,5 @@ The following table summarizes those limits.
| Account aliases (actor `alsoKnownAs`) | 256 | List will be truncated |
| Custom emoji shortcode (`Emoji` `name`) | 2048 | Emoji will be rejected |
| Media and avatar/header descriptions (`name`/`summary`) | 1500 | Description will be truncated |
| Collection name (`FeaturedCollection` `name`) | 256 | Name will be truncated |
| Collection description (`FeaturedCollection` `summary`) | 2048 | Description will be truncated |

View File

@ -22,6 +22,8 @@
#
class Collection < ApplicationRecord
MAX_ITEMS = 25
NAME_LENGTH_HARD_LIMIT = 256
DESCRIPTION_LENGTH_HARD_LIMIT = 2048
belongs_to :account
belongs_to :tag, optional: true
@ -31,10 +33,16 @@ class Collection < ApplicationRecord
has_many :collection_reports, dependent: :delete_all
validates :name, presence: true
validates :description, presence: true,
if: :local?
validates :description_html, presence: true,
if: :remote?
validates :name, length: { maximum: 40 }, if: :local?
validates :name, length: { maximum: NAME_LENGTH_HARD_LIMIT }, if: :remote?
validates :description,
presence: true,
length: { maximum: 100 },
if: :local?
validates :description_html,
presence: true,
length: { maximum: DESCRIPTION_LENGTH_HARD_LIMIT },
if: :remote?
validates :local, inclusion: [true, false]
validates :sensitive, inclusion: [true, false]
validates :discoverable, inclusion: [true, false]

View File

@ -14,7 +14,9 @@ class REST::CollectionSerializer < ActiveModel::Serializer
end
def description
object.local? ? object.description : object.description_html
return object.description if object.local?
Sanitize.fragment(object.description_html, Sanitize::Config::MASTODON_STRICT)
end
def items

View File

@ -0,0 +1,52 @@
# frozen_string_literal: true
class ActivityPub::ProcessFeaturedCollectionService
include JsonLdHelper
include Lockable
include Redisable
ITEMS_LIMIT = 150
def call(account, json)
@account = account
@json = json
return if non_matching_uri_hosts?(@account.uri, @json['id'])
with_redis_lock("collection:#{@json['id']}") do
return if @account.collections.exists?(uri: @json['id'])
@collection = @account.collections.create!(
local: false,
uri: @json['id'],
name: (@json['name'] || '')[0, Collection::NAME_LENGTH_HARD_LIMIT],
description_html: truncated_summary,
language:,
sensitive: @json['sensitive'],
discoverable: @json['discoverable'],
original_number_of_items: @json['totalItems'] || 0,
tag_name: @json.dig('topic', 'name')
)
process_items!
@collection
end
end
private
def truncated_summary
text = @json['summaryMap']&.values&.first || @json['summary'] || ''
text[0, Collection::DESCRIPTION_LENGTH_HARD_LIMIT]
end
def language
@json['summaryMap']&.keys&.first
end
def process_items!
@json['orderedItems'].take(ITEMS_LIMIT).each do |item_json|
ActivityPub::ProcessFeaturedItemWorker.perform_async(@collection.id, item_json)
end
end
end

View File

@ -0,0 +1,16 @@
# frozen_string_literal: true
class ActivityPub::ProcessFeaturedItemWorker
include Sidekiq::Worker
include ExponentialBackoff
sidekiq_options queue: 'pull', retry: 3
def perform(collection_id, id_or_json)
collection = Collection.find(collection_id)
ActivityPub::ProcessFeaturedItemService.new.call(collection, id_or_json)
rescue ActiveRecord::RecordNotFound
true
end
end

View File

@ -8,8 +8,12 @@ RSpec.describe Collection do
it { is_expected.to validate_presence_of(:name) }
it { is_expected.to validate_length_of(:name).is_at_most(40) }
it { is_expected.to validate_presence_of(:description) }
it { is_expected.to validate_length_of(:description).is_at_most(100) }
it { is_expected.to_not allow_value(nil).for(:local) }
it { is_expected.to_not allow_value(nil).for(:sensitive) }
@ -23,10 +27,14 @@ RSpec.describe Collection do
context 'when collection is remote' do
subject { Fabricate.build :collection, local: false }
it { is_expected.to validate_length_of(:name).is_at_most(Collection::NAME_LENGTH_HARD_LIMIT) }
it { is_expected.to_not validate_presence_of(:description) }
it { is_expected.to validate_presence_of(:description_html) }
it { is_expected.to validate_length_of(:description_html).is_at_most(Collection::DESCRIPTION_LENGTH_HARD_LIMIT) }
it { is_expected.to validate_presence_of(:uri) }
it { is_expected.to validate_presence_of(:original_number_of_items) }

View File

@ -51,5 +51,14 @@ RSpec.describe REST::CollectionSerializer do
expect(subject)
.to include('description' => '<p>remote</p>')
end
context 'when the description contains unwanted HTML' do
let(:description_html) { '<script>alert("hi!");</script><p>Nice people</p>' }
let(:collection) { Fabricate(:remote_collection, description_html:) }
it 'scrubs the HTML' do
expect(subject).to include('description' => '<p>Nice people</p>')
end
end
end
end

View File

@ -0,0 +1,76 @@
# frozen_string_literal: true
require 'rails_helper'
RSpec.describe ActivityPub::ProcessFeaturedCollectionService do
subject { described_class.new }
let(:account) { Fabricate(:remote_account) }
let(:summary) { '<p>A list of remote actors you should follow.</p>' }
let(:base_json) do
{
'@context' => 'https://www.w3.org/ns/activitystreams',
'id' => 'https://example.com/featured_collections/1',
'type' => 'FeaturedCollection',
'attributedTo' => account.uri,
'name' => 'Good people from other servers',
'sensitive' => false,
'discoverable' => true,
'topic' => {
'type' => 'Hashtag',
'name' => '#people',
},
'published' => '2026-03-09T15:19:25Z',
'totalItems' => 2,
'orderedItems' => [
'https://example.com/featured_items/1',
'https://example.com/featured_items/2',
],
}
end
let(:featured_collection_json) { base_json.merge('summary' => summary) }
context "when the collection's URI does not match the account's" do
let(:non_matching_account) { Fabricate(:remote_account, domain: 'other.example.com') }
it 'does not create a collection and returns `nil`' do
expect do
expect(subject.call(non_matching_account, featured_collection_json)).to be_nil
end.to_not change(Collection, :count)
end
end
context 'when URIs match up' do
it 'creates a collection and queues jobs to handle its items' do
expect { subject.call(account, featured_collection_json) }.to change(account.collections, :count).by(1)
new_collection = account.collections.last
expect(new_collection.uri).to eq 'https://example.com/featured_collections/1'
expect(new_collection.name).to eq 'Good people from other servers'
expect(new_collection.description_html).to eq '<p>A list of remote actors you should follow.</p>'
expect(new_collection.sensitive).to be false
expect(new_collection.discoverable).to be true
expect(new_collection.tag.formatted_name).to eq '#people'
expect(ActivityPub::ProcessFeaturedItemWorker).to have_enqueued_sidekiq_job.exactly(2).times
end
end
context 'when the json includes a summary map' do
let(:featured_collection_json) do
base_json.merge({
'summaryMap' => {
'en' => summary,
},
})
end
it 'sets language and summary correctly' do
expect { subject.call(account, featured_collection_json) }.to change(account.collections, :count).by(1)
new_collection = account.collections.last
expect(new_collection.language).to eq 'en'
expect(new_collection.description_html).to eq '<p>A list of remote actors you should follow.</p>'
end
end
end

View File

@ -0,0 +1,25 @@
# frozen_string_literal: true
require 'rails_helper'
RSpec.describe ActivityPub::ProcessFeaturedItemWorker do
subject { described_class.new }
let(:collection) { Fabricate(:remote_collection) }
let(:object) { 'https://example.com/featured_items/1' }
let(:stubbed_service) do
instance_double(ActivityPub::ProcessFeaturedItemService, call: true)
end
before do
allow(ActivityPub::ProcessFeaturedItemService).to receive(:new).and_return(stubbed_service)
end
describe 'perform' do
it 'calls the service to process the item' do
subject.perform(collection.id, object)
expect(stubbed_service).to have_received(:call).with(collection, object)
end
end
end