Refactor streaming's filtering logic & improve documentation (#26213)
This commit is contained in:
		| @@ -622,29 +622,39 @@ const startServer = async () => { | |||||||
|  |  | ||||||
|     log.verbose(req.requestId, `Starting stream from ${ids.join(', ')} for ${accountId}`); |     log.verbose(req.requestId, `Starting stream from ${ids.join(', ')} for ${accountId}`); | ||||||
|  |  | ||||||
|     // Currently message is of type string, soon it'll be Record<string, any> |     const transmit = (event, payload) => { | ||||||
|     const listener = message => { |       // TODO: Replace "string"-based delete payloads with object payloads: | ||||||
|       const { event, payload, queued_at } = message; |  | ||||||
|  |  | ||||||
|       const transmit = (payload) => { |  | ||||||
|         const now = new Date().getTime(); |  | ||||||
|         const delta = now - queued_at; |  | ||||||
|       const encodedPayload = typeof payload === 'object' ? JSON.stringify(payload) : payload; |       const encodedPayload = typeof payload === 'object' ? JSON.stringify(payload) : payload; | ||||||
|  |  | ||||||
|         log.silly(req.requestId, `Transmitting for ${accountId}: ${event} ${encodedPayload} Delay: ${delta}ms`); |       log.silly(req.requestId, `Transmitting for ${accountId}: ${event} ${encodedPayload}`); | ||||||
|       output(event, encodedPayload); |       output(event, encodedPayload); | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|       // Only messages that may require filtering are statuses, since notifications |     // The listener used to process each message off the redis subscription, | ||||||
|       // are already personalized and deletes do not matter |     // message here is an object with an `event` and `payload` property. Some | ||||||
|       if (!needsFiltering || event !== 'update') { |     // events also include a queued_at value, but this is being removed shortly. | ||||||
|         transmit(payload); |     const listener = message => { | ||||||
|  |       const { event, payload } = message; | ||||||
|  |  | ||||||
|  |       // Streaming only needs to apply filtering to some channels and only to | ||||||
|  |       // some events. This is because majority of the filtering happens on the | ||||||
|  |       // Ruby on Rails side when producing the event for streaming. | ||||||
|  |       // | ||||||
|  |       // The only events that require filtering from the streaming server are | ||||||
|  |       // `update` and `status.update`, all other events are transmitted to the | ||||||
|  |       // client as soon as they're received (pass-through). | ||||||
|  |       // | ||||||
|  |       // The channels that need filtering are determined in the function | ||||||
|  |       // `channelNameToIds` defined below: | ||||||
|  |       if (!needsFiltering || (event !== 'update' && event !== 'status.update')) { | ||||||
|  |         transmit(event, payload); | ||||||
|         return; |         return; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       const targetAccountIds = [payload.account.id].concat(payload.mentions.map(item => item.id)); |       // The rest of the logic from here on in this function is to handle | ||||||
|       const accountDomain = payload.account.acct.split('@')[1]; |       // filtering of statuses: | ||||||
|  |  | ||||||
|  |       // Filter based on language: | ||||||
|       if (Array.isArray(req.chosenLanguages) && payload.language !== null && req.chosenLanguages.indexOf(payload.language) === -1) { |       if (Array.isArray(req.chosenLanguages) && payload.language !== null && req.chosenLanguages.indexOf(payload.language) === -1) { | ||||||
|         log.silly(req.requestId, `Message ${payload.id} filtered by language (${payload.language})`); |         log.silly(req.requestId, `Message ${payload.id} filtered by language (${payload.language})`); | ||||||
|         return; |         return; | ||||||
| @@ -652,11 +662,16 @@ const startServer = async () => { | |||||||
|  |  | ||||||
|       // When the account is not logged in, it is not necessary to confirm the block or mute |       // When the account is not logged in, it is not necessary to confirm the block or mute | ||||||
|       if (!req.accountId) { |       if (!req.accountId) { | ||||||
|         transmit(payload); |         transmit(event, payload); | ||||||
|         return; |         return; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       pgPool.connect((err, client, done) => { |       // Filter based on domain blocks, blocks, mutes, or custom filters: | ||||||
|  |       const targetAccountIds = [payload.account.id].concat(payload.mentions.map(item => item.id)); | ||||||
|  |       const accountDomain = payload.account.acct.split('@')[1]; | ||||||
|  |  | ||||||
|  |       // TODO: Move this logic out of the message handling loop | ||||||
|  |       pgPool.connect((err, client, releasePgConnection) => { | ||||||
|         if (err) { |         if (err) { | ||||||
|           log.error(err); |           log.error(err); | ||||||
|           return; |           return; | ||||||
| @@ -683,28 +698,45 @@ const startServer = async () => { | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         Promise.all(queries).then(values => { |         Promise.all(queries).then(values => { | ||||||
|           done(); |           releasePgConnection(); | ||||||
|  |  | ||||||
|  |           // Handling blocks & mutes and domain blocks: If one of those applies, | ||||||
|  |           // then we don't transmit the payload of the event to the client | ||||||
|           if (values[0].rows.length > 0 || (accountDomain && values[1].rows.length > 0)) { |           if (values[0].rows.length > 0 || (accountDomain && values[1].rows.length > 0)) { | ||||||
|             return; |             return; | ||||||
|           } |           } | ||||||
|  |  | ||||||
|           if (!payload.filtered && !req.cachedFilters) { |           // If the payload already contains the `filtered` property, it means | ||||||
|  |           // that filtering has been applied on the ruby on rails side, as  | ||||||
|  |           // such, we don't need to construct or apply the filters in streaming: | ||||||
|  |           if (Object.prototype.hasOwnProperty.call(payload, "filtered")) { | ||||||
|  |             transmit(event, payload); | ||||||
|  |             return; | ||||||
|  |           } | ||||||
|  |  | ||||||
|  |           // Handling for constructing the custom filters and caching them on the request | ||||||
|  |           // TODO: Move this logic out of the message handling lifecycle | ||||||
|  |           if (!req.cachedFilters) { | ||||||
|             const filterRows = values[accountDomain ? 2 : 1].rows; |             const filterRows = values[accountDomain ? 2 : 1].rows; | ||||||
|  |  | ||||||
|             req.cachedFilters = filterRows.reduce((cache, row) => { |             req.cachedFilters = filterRows.reduce((cache, filter) => { | ||||||
|               if (cache[row.id]) { |               if (cache[filter.id]) { | ||||||
|                 cache[row.id].keywords.push([row.keyword, row.whole_word]); |                 cache[filter.id].keywords.push([filter.keyword, filter.whole_word]); | ||||||
|               } else { |               } else { | ||||||
|                 cache[row.id] = { |                 cache[filter.id] = { | ||||||
|                   keywords: [[row.keyword, row.whole_word]], |                   keywords: [[filter.keyword, filter.whole_word]], | ||||||
|                   expires_at: row.expires_at, |                   expires_at: filter.expires_at, | ||||||
|                   repr: { |                   filter: { | ||||||
|                     id: row.id, |                     id: filter.id, | ||||||
|                     title: row.title, |                     title: filter.title, | ||||||
|                     context: row.context, |                     context: filter.context, | ||||||
|                     expires_at: row.expires_at, |                     expires_at: filter.expires_at, | ||||||
|                     filter_action: ['warn', 'hide'][row.filter_action], |                     // filter.filter_action is the value from the | ||||||
|  |                     // custom_filters.action database column, it is an integer | ||||||
|  |                     // representing a value in an enum defined by Ruby on Rails: | ||||||
|  |                     // | ||||||
|  |                     // enum { warn: 0, hide: 1 } | ||||||
|  |                     filter_action: ['warn', 'hide'][filter.filter_action], | ||||||
|                   }, |                   }, | ||||||
|                 }; |                 }; | ||||||
|               } |               } | ||||||
| @@ -712,6 +744,10 @@ const startServer = async () => { | |||||||
|               return cache; |               return cache; | ||||||
|             }, {}); |             }, {}); | ||||||
|  |  | ||||||
|  |             // Construct the regular expressions for the custom filters: This | ||||||
|  |             // needs to be done in a separate loop as the database returns one | ||||||
|  |             // filterRow per keyword, so we need all the keywords before | ||||||
|  |             // constructing the regular expression | ||||||
|             Object.keys(req.cachedFilters).forEach((key) => { |             Object.keys(req.cachedFilters).forEach((key) => { | ||||||
|               req.cachedFilters[key].regexp = new RegExp(req.cachedFilters[key].keywords.map(([keyword, whole_word]) => { |               req.cachedFilters[key].regexp = new RegExp(req.cachedFilters[key].keywords.map(([keyword, whole_word]) => { | ||||||
|                 let expr = keyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); |                 let expr = keyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); | ||||||
| @@ -731,34 +767,56 @@ const startServer = async () => { | |||||||
|             }); |             }); | ||||||
|           } |           } | ||||||
|  |  | ||||||
|           // Check filters |           // Apply cachedFilters against the payload, constructing a | ||||||
|           if (req.cachedFilters && !payload.filtered) { |           // `filter_results` array of FilterResult entities | ||||||
|             const mutatedPayload = { ...payload }; |           if (req.cachedFilters) { | ||||||
|             const status = payload; |             const status = payload; | ||||||
|             const searchContent = ([status.spoiler_text || '', status.content].concat((status.poll && status.poll.options) ? status.poll.options.map(option => option.title) : [])).concat(status.media_attachments.map(att => att.description)).join('\n\n').replace(/<br\s*\/?>/g, '\n').replace(/<\/p><p>/g, '\n\n'); |             // TODO: Calculate searchableContent in Ruby on Rails: | ||||||
|             const searchIndex = JSDOM.fragment(searchContent).textContent; |             const searchableContent = ([status.spoiler_text || '', status.content].concat((status.poll && status.poll.options) ? status.poll.options.map(option => option.title) : [])).concat(status.media_attachments.map(att => att.description)).join('\n\n').replace(/<br\s*\/?>/g, '\n').replace(/<\/p><p>/g, '\n\n'); | ||||||
|  |             const searchableTextContent = JSDOM.fragment(searchableContent).textContent; | ||||||
|  |  | ||||||
|             const now = new Date(); |             const now = new Date(); | ||||||
|             mutatedPayload.filtered = []; |             const filter_results = Object.values(req.cachedFilters).reduce((results, cachedFilter) => { | ||||||
|             Object.values(req.cachedFilters).forEach((cachedFilter) => { |               // Check the filter hasn't expired before applying: | ||||||
|               if ((cachedFilter.expires_at === null || cachedFilter.expires_at > now)) { |               if (cachedFilter.expires_at !== null && cachedFilter.expires_at < now) { | ||||||
|                 const keyword_matches = searchIndex.match(cachedFilter.regexp); |                 return; | ||||||
|                 if (keyword_matches) { |  | ||||||
|                   mutatedPayload.filtered.push({ |  | ||||||
|                     filter: cachedFilter.repr, |  | ||||||
|                     keyword_matches, |  | ||||||
|                   }); |  | ||||||
|               } |               } | ||||||
|               } |  | ||||||
|             }); |  | ||||||
|  |  | ||||||
|             transmit(mutatedPayload); |               // Just in-case JSDOM fails to find textContent in searchableContent | ||||||
|  |               if (!searchableTextContent) { | ||||||
|  |                 return; | ||||||
|  |               } | ||||||
|  |  | ||||||
|  |               const keyword_matches = searchableTextContent.match(cachedFilter.regexp); | ||||||
|  |               if (keyword_matches) { | ||||||
|  |                 // results is an Array of FilterResult; status_matches is always | ||||||
|  |                 // null as we only are only applying the keyword-based custom | ||||||
|  |                 // filters, not the status-based custom filters. | ||||||
|  |                 // https://docs.joinmastodon.org/entities/FilterResult/ | ||||||
|  |                 results.push({ | ||||||
|  |                   filter: cachedFilter.filter, | ||||||
|  |                   keyword_matches, | ||||||
|  |                   status_matches: null | ||||||
|  |                 }); | ||||||
|  |               } | ||||||
|  |             }, []); | ||||||
|  |  | ||||||
|  |             // Send the payload + the FilterResults as the `filtered` property | ||||||
|  |             // to the streaming connection. To reach this code, the `event` must | ||||||
|  |             // have been either `update` or `status.update`, meaning the | ||||||
|  |             // `payload` is a Status entity, which has a `filtered` property: | ||||||
|  |             // | ||||||
|  |             // filtered: https://docs.joinmastodon.org/entities/Status/#filtered | ||||||
|  |             transmit(event, { | ||||||
|  |               ...payload, | ||||||
|  |               filtered: filter_results | ||||||
|  |             }); | ||||||
|           } else { |           } else { | ||||||
|             transmit(payload); |             transmit(event, payload); | ||||||
|           } |           } | ||||||
|         }).catch(err => { |         }).catch(err => { | ||||||
|  |           releasePgConnection(); | ||||||
|           log.error(err); |           log.error(err); | ||||||
|           done(); |  | ||||||
|         }); |         }); | ||||||
|       }); |       }); | ||||||
|     }; |     }; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user