Batched remove status service (#3735)
* Make Pubsubhubbub::DistributionWorker handle both single stream entry arguments, as well as arrays of stream entries * Add BatchedRemoveStatusService, make SuspendAccountService use it * Improve method names * Add test * Add more tests * Use PuSH payloads of 100 to have a clear mapping of 1000 input statuses -> 10 PuSH payloads It was nice while it lasted
This commit is contained in:
		
							
								
								
									
										114
									
								
								app/services/batched_remove_status_service.rb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								app/services/batched_remove_status_service.rb
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,114 @@ | ||||
| # frozen_string_literal: true | ||||
|  | ||||
| class BatchedRemoveStatusService < BaseService | ||||
|   include StreamEntryRenderer | ||||
|  | ||||
|   # Delete given statuses and reblogs of them | ||||
|   # Dispatch PuSH updates of the deleted statuses, but only local ones | ||||
|   # Dispatch Salmon deletes, unique per domain, of the deleted statuses, but only local ones | ||||
|   # Remove statuses from home feeds | ||||
|   # Push delete events to streaming API for home feeds and public feeds | ||||
|   # @param [Status] statuses A preferably batched array of statuses | ||||
|   def call(statuses) | ||||
|     statuses = Status.where(id: statuses.map(&:id)).includes(:account, :stream_entry).flat_map { |status| [status] + status.reblogs.includes(:account, :stream_entry).to_a } | ||||
|  | ||||
|     @mentions = statuses.map { |s| [s.id, s.mentions.includes(:account).to_a] }.to_h | ||||
|     @tags     = statuses.map { |s| [s.id, s.tags.pluck(:name)] }.to_h | ||||
|  | ||||
|     @stream_entry_batches = [] | ||||
|     @salmon_batches       = [] | ||||
|     @json_payloads        = statuses.map { |s| [s.id, Oj.dump(event: :delete, payload: s.id)] }.to_h | ||||
|  | ||||
|     # Ensure that rendered XML reflects destroyed state | ||||
|     Status.where(id: statuses.map(&:id)).in_batches.destroy_all | ||||
|  | ||||
|     # Batch by source account | ||||
|     statuses.group_by(&:account_id).each do |_, account_statuses| | ||||
|       account = account_statuses.first.account | ||||
|  | ||||
|       unpush_from_home_timelines(account_statuses) | ||||
|       batch_stream_entries(account_statuses) if account.local? | ||||
|     end | ||||
|  | ||||
|     # Cannot be batched | ||||
|     statuses.each do |status| | ||||
|       unpush_from_public_timelines(status) | ||||
|       batch_salmon_slaps(status) if status.local? | ||||
|     end | ||||
|  | ||||
|     Pubsubhubbub::DistributionWorker.push_bulk(@stream_entry_batches) { |batch| batch } | ||||
|     NotificationWorker.push_bulk(@salmon_batches) { |batch| batch } | ||||
|   end | ||||
|  | ||||
|   private | ||||
|  | ||||
|   def batch_stream_entries(statuses) | ||||
|     stream_entry_ids = statuses.map { |s| s.stream_entry.id } | ||||
|  | ||||
|     stream_entry_ids.each_slice(100) do |batch_of_stream_entry_ids| | ||||
|       @stream_entry_batches << [batch_of_stream_entry_ids] | ||||
|     end | ||||
|   end | ||||
|  | ||||
|   def unpush_from_home_timelines(statuses) | ||||
|     account    = statuses.first.account | ||||
|     recipients = account.followers.local.pluck(:id) | ||||
|  | ||||
|     recipients << account.id if account.local? | ||||
|  | ||||
|     recipients.each do |follower_id| | ||||
|       unpush(follower_id, statuses) | ||||
|     end | ||||
|   end | ||||
|  | ||||
|   def unpush_from_public_timelines(status) | ||||
|     payload = @json_payloads[status.id] | ||||
|  | ||||
|     redis.pipelined do | ||||
|       redis.publish('timeline:public', payload) | ||||
|       redis.publish('timeline:public:local', payload) if status.local? | ||||
|  | ||||
|       @tags[status.id].each do |hashtag| | ||||
|         redis.publish("timeline:hashtag:#{hashtag}", payload) | ||||
|         redis.publish("timeline:hashtag:#{hashtag}:local", payload) if status.local? | ||||
|       end | ||||
|     end | ||||
|   end | ||||
|  | ||||
|   def batch_salmon_slaps(status) | ||||
|     return if @mentions[status.id].empty? | ||||
|  | ||||
|     payload    = stream_entry_to_xml(status.stream_entry.reload) | ||||
|     recipients = @mentions[status.id].map(&:account).reject(&:local?).uniq(&:domain).map(&:id) | ||||
|  | ||||
|     recipients.each do |recipient_id| | ||||
|       @salmon_batches << [payload, status.account_id, recipient_id] | ||||
|     end | ||||
|   end | ||||
|  | ||||
|   def unpush(follower_id, statuses) | ||||
|     key = FeedManager.instance.key(:home, follower_id) | ||||
|  | ||||
|     originals = statuses.reject(&:reblog?) | ||||
|     reblogs   = statuses.reject { |s| !s.reblog? } | ||||
|  | ||||
|     # Quickly remove all originals | ||||
|     redis.pipelined do | ||||
|       originals.each do |status| | ||||
|         redis.zremrangebyscore(key, status.id, status.id) | ||||
|         redis.publish("timeline:#{follower_id}", @json_payloads[status.id]) | ||||
|       end | ||||
|     end | ||||
|  | ||||
|     # For reblogs, re-add original status to feed, unless the reblog | ||||
|     # was not in the feed in the first place | ||||
|     reblogs.each do |status| | ||||
|       redis.zadd(key, status.reblog_of_id, status.reblog_of_id) unless redis.zscore(key, status.reblog_of_id).nil? | ||||
|       redis.publish("timeline:#{follower_id}", @json_payloads[status.id]) | ||||
|     end | ||||
|   end | ||||
|  | ||||
|   def redis | ||||
|     Redis.current | ||||
|   end | ||||
| end | ||||
| @@ -17,9 +17,8 @@ class SuspendAccountService < BaseService | ||||
|   end | ||||
|  | ||||
|   def purge_content | ||||
|     @account.statuses.reorder(nil).find_each do |status| | ||||
|       # This federates out deletes to previous followers | ||||
|       RemoveStatusService.new.call(status) | ||||
|     @account.statuses.reorder(nil).find_in_batches do |statuses| | ||||
|       BatchedRemoveStatusService.new.call(statuses) | ||||
|     end | ||||
|  | ||||
|     [ | ||||
|   | ||||
| @@ -5,25 +5,45 @@ class Pubsubhubbub::DistributionWorker | ||||
|  | ||||
|   sidekiq_options queue: 'push' | ||||
|  | ||||
|   def perform(stream_entry_id) | ||||
|     stream_entry = StreamEntry.find(stream_entry_id) | ||||
|   def perform(stream_entry_ids) | ||||
|     stream_entries = StreamEntry.where(id: stream_entry_ids).includes(:status).reject { |e| e.status&.direct_visibility? } | ||||
|  | ||||
|     return if stream_entry.status&.direct_visibility? | ||||
|     return if stream_entries.empty? | ||||
|  | ||||
|     @account = stream_entry.account | ||||
|     @payload = AtomSerializer.render(AtomSerializer.new.feed(@account, [stream_entry])) | ||||
|     @domains = @account.followers_domains | ||||
|     @account       = stream_entries.first.account | ||||
|     @subscriptions = active_subscriptions.to_a | ||||
|  | ||||
|     Subscription.where(account: @account).active.select('id, callback_url').find_each do |subscription| | ||||
|       next if stream_entry.hidden? && !allowed_to_receive?(subscription.callback_url) | ||||
|       Pubsubhubbub::DeliveryWorker.perform_async(subscription.id, @payload) | ||||
|     end | ||||
|   rescue ActiveRecord::RecordNotFound | ||||
|     true | ||||
|     distribute_public!(stream_entries.reject(&:hidden?)) | ||||
|     distribute_hidden!(stream_entries.reject { |s| !s.hidden? }) | ||||
|   end | ||||
|  | ||||
|   private | ||||
|  | ||||
|   def distribute_public!(stream_entries) | ||||
|     return if stream_entries.empty? | ||||
|  | ||||
|     @payload = AtomSerializer.render(AtomSerializer.new.feed(@account, stream_entries)) | ||||
|  | ||||
|     Pubsubhubbub::DeliveryWorker.push_bulk(@subscriptions) do |subscription| | ||||
|       [subscription.id, @payload] | ||||
|     end | ||||
|   end | ||||
|  | ||||
|   def distribute_hidden!(stream_entries) | ||||
|     return if stream_entries.empty? | ||||
|  | ||||
|     @payload = AtomSerializer.render(AtomSerializer.new.feed(@account, stream_entries)) | ||||
|     @domains = @account.followers_domains | ||||
|  | ||||
|     Pubsubhubbub::DeliveryWorker.push_bulk(@subscriptions.reject { |s| !allowed_to_receive?(s.callback_url) }) do |subscription| | ||||
|       [subscription.id, @payload] | ||||
|     end | ||||
|   end | ||||
|  | ||||
|   def active_subscriptions | ||||
|     Subscription.where(account: @account).active.select('id, callback_url') | ||||
|   end | ||||
|  | ||||
|   def allowed_to_receive?(callback_url) | ||||
|     @domains.include?(Addressable::URI.parse(callback_url).host) | ||||
|   end | ||||
|   | ||||
							
								
								
									
										61
									
								
								spec/services/batched_remove_status_service_spec.rb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								spec/services/batched_remove_status_service_spec.rb
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,61 @@ | ||||
| require 'rails_helper' | ||||
|  | ||||
| RSpec.describe BatchedRemoveStatusService do | ||||
|   subject { BatchedRemoveStatusService.new } | ||||
|  | ||||
|   let!(:alice)  { Fabricate(:account) } | ||||
|   let!(:bob)    { Fabricate(:account, username: 'bob', domain: 'example.com', salmon_url: 'http://example.com/salmon') } | ||||
|   let!(:jeff)   { Fabricate(:account) } | ||||
|  | ||||
|   let(:status1) { PostStatusService.new.call(alice, 'Hello @bob@example.com') } | ||||
|   let(:status2) { PostStatusService.new.call(alice, 'Another status') } | ||||
|  | ||||
|   before do | ||||
|     allow(Redis.current).to receive_messages(publish: nil) | ||||
|  | ||||
|     stub_request(:post, 'http://example.com/push').to_return(status: 200, body: '', headers: {}) | ||||
|     stub_request(:post, 'http://example.com/salmon').to_return(status: 200, body: '', headers: {}) | ||||
|  | ||||
|     Fabricate(:subscription, account: alice, callback_url: 'http://example.com/push', confirmed: true, expires_at: 30.days.from_now) | ||||
|     jeff.follow!(alice) | ||||
|  | ||||
|     status1 | ||||
|     status2 | ||||
|  | ||||
|     subject.call([status1, status2]) | ||||
|   end | ||||
|  | ||||
|   it 'removes statuses from author\'s home feed' do | ||||
|     expect(Feed.new(:home, alice).get(10)).to_not include([status1.id, status2.id]) | ||||
|   end | ||||
|  | ||||
|   it 'removes statuses from local follower\'s home feed' do | ||||
|     expect(Feed.new(:home, jeff).get(10)).to_not include([status1.id, status2.id]) | ||||
|   end | ||||
|  | ||||
|   it 'notifies streaming API of followers' do | ||||
|     expect(Redis.current).to have_received(:publish).with("timeline:#{jeff.id}", any_args).at_least(:once) | ||||
|   end | ||||
|  | ||||
|   it 'notifies streaming API of author' do | ||||
|     expect(Redis.current).to have_received(:publish).with("timeline:#{alice.id}", any_args).at_least(:once) | ||||
|   end | ||||
|  | ||||
|   it 'notifies streaming API of public timeline' do | ||||
|     expect(Redis.current).to have_received(:publish).with('timeline:public', any_args).at_least(:once) | ||||
|   end | ||||
|  | ||||
|   it 'sends PuSH update to PuSH subscribers with two payloads united' do | ||||
|     expect(a_request(:post, 'http://example.com/push').with { |req| | ||||
|       matches = req.body.scan(TagManager::VERBS[:delete]) | ||||
|       matches.size == 2 | ||||
|     }).to have_been_made | ||||
|   end | ||||
|  | ||||
|   it 'sends Salmon slap to previously mentioned users' do | ||||
|     expect(a_request(:post, "http://example.com/salmon").with { |req| | ||||
|       xml = OStatus2::Salmon.new.unpack(req.body) | ||||
|       xml.match(TagManager::VERBS[:delete]) | ||||
|     }).to have_been_made.once | ||||
|   end | ||||
| end | ||||
| @@ -16,10 +16,9 @@ describe Pubsubhubbub::DistributionWorker do | ||||
|     let(:status) { Fabricate(:status, account: alice, text: 'Hello', visibility: :public) } | ||||
|  | ||||
|     it 'delivers payload to all subscriptions' do | ||||
|       allow(Pubsubhubbub::DeliveryWorker).to receive(:perform_async) | ||||
|       allow(Pubsubhubbub::DeliveryWorker).to receive(:push_bulk) | ||||
|       subject.perform(status.stream_entry.id) | ||||
|       expect(Pubsubhubbub::DeliveryWorker).to have_received(:perform_async).with(subscription_with_follower.id, /.*/) | ||||
|       expect(Pubsubhubbub::DeliveryWorker).to have_received(:perform_async).with(anonymous_subscription.id, /.*/) | ||||
|       expect(Pubsubhubbub::DeliveryWorker).to have_received(:push_bulk).with([anonymous_subscription, subscription_with_follower]) | ||||
|     end | ||||
|   end | ||||
|  | ||||
| @@ -27,10 +26,10 @@ describe Pubsubhubbub::DistributionWorker do | ||||
|     let(:status) { Fabricate(:status, account: alice, text: 'Hello', visibility: :private) } | ||||
|  | ||||
|     it 'delivers payload only to subscriptions with followers' do | ||||
|       allow(Pubsubhubbub::DeliveryWorker).to receive(:perform_async) | ||||
|       allow(Pubsubhubbub::DeliveryWorker).to receive(:push_bulk) | ||||
|       subject.perform(status.stream_entry.id) | ||||
|       expect(Pubsubhubbub::DeliveryWorker).to have_received(:perform_async).with(subscription_with_follower.id, /.*/) | ||||
|       expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:perform_async).with(anonymous_subscription.id, /.*/) | ||||
|       expect(Pubsubhubbub::DeliveryWorker).to have_received(:push_bulk).with([subscription_with_follower]) | ||||
|       expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:push_bulk).with([anonymous_subscription]) | ||||
|     end | ||||
|   end | ||||
|  | ||||
| @@ -38,9 +37,9 @@ describe Pubsubhubbub::DistributionWorker do | ||||
|     let(:status) { Fabricate(:status, account: alice, text: 'Hello', visibility: :direct) } | ||||
|  | ||||
|     it 'does not deliver payload' do | ||||
|       allow(Pubsubhubbub::DeliveryWorker).to receive(:perform_async) | ||||
|       allow(Pubsubhubbub::DeliveryWorker).to receive(:push_bulk) | ||||
|       subject.perform(status.stream_entry.id) | ||||
|       expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:perform_async) | ||||
|       expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:push_bulk) | ||||
|     end | ||||
|   end | ||||
| end | ||||
|   | ||||
		Reference in New Issue
	
	Block a user