perf: slightly more efficient word filter format (#1991)

This commit is contained in:
Nolan Lawson 2021-03-14 09:24:00 -07:00 committed by GitHub
parent 4adc8ff748
commit 5e61a8582b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 47 additions and 42 deletions

View file

@ -1,4 +1,5 @@
import { createRegexFromFilter } from '../../_utils/createRegexFromFilter'
import { createRegexFromFilters } from '../../_utils/createRegexFromFilters'
import { WORD_FILTER_CONTEXTS } from '../../_static/wordFilters'
export function wordFilterComputations (store) {
// unexpiredInstanceFilters is calculated based on `now` and `instanceFilters`,
@ -9,13 +10,17 @@ export function wordFilterComputations (store) {
(unexpiredInstanceFilters, currentInstance) => unexpiredInstanceFilters[currentInstance] || []
)
store.compute('unexpiredInstanceFiltersWithRegexes', ['unexpiredInstanceFilters'], unexpiredInstanceFilters => {
store.compute('unexpiredInstanceFilterRegexes', ['unexpiredInstanceFilters'], unexpiredInstanceFilters => {
return Object.fromEntries(Object.entries(unexpiredInstanceFilters).map(([instanceName, filters]) => {
const filtersWithRegexes = filters.map(filter => ({
...filter,
regex: createRegexFromFilter(filter)
}))
return [instanceName, filtersWithRegexes]
const contextsToRegex = Object.fromEntries(WORD_FILTER_CONTEXTS.map(context => {
const filtersForThisContext = filters.filter(_ => _.context.includes(context))
if (!filtersForThisContext.length) {
return undefined // don't bother even adding it to the map
}
const regex = createRegexFromFilters(filtersForThisContext)
return [context, regex]
}).filter(Boolean))
return [instanceName, contextsToRegex]
}))
})
}

View file

@ -40,8 +40,8 @@ export function wordFilterObservers () {
updateUnexpiredInstanceFiltersIfUnchanged(now, instanceFilters)
})
store.observe('unexpiredInstanceFiltersWithRegexes', async unexpiredInstanceFiltersWithRegexes => {
console.log('unexpiredInstanceFiltersWithRegexes changed, recomputing filterContexts')
store.observe('unexpiredInstanceFilterRegexes', async unexpiredInstanceFilterRegexes => {
console.log('unexpiredInstanceFilterRegexes changed, recomputing filterContexts')
mark('update timeline item summary filter contexts')
// Whenever the filters change, we need to re-compute the filterContexts on the TimelineSummaries.
// This is a bit of an odd design, but we do it for perf. See timelineItemToSummary.js for details.
@ -55,7 +55,7 @@ export function wordFilterObservers () {
let somethingChanged = false
await Promise.all(Object.entries(unexpiredInstanceFiltersWithRegexes).map(async ([instanceName, filtersWithRegexes]) => {
await Promise.all(Object.entries(unexpiredInstanceFilterRegexes).map(async ([instanceName, contextsToRegex]) => {
const timelinesToSummaries = timelineItemSummaries[instanceName] || {}
const timelinesToSummariesToAdd = timelineItemSummariesToAdd[instanceName] || {}
const summariesToUpdate = [
@ -70,7 +70,7 @@ export function wordFilterObservers () {
? database.getNotification(instanceName, summary.id)
: database.getStatus(instanceName, summary.id)
)
const newFilterContexts = computeFilterContextsForStatusOrNotification(item, filtersWithRegexes)
const newFilterContexts = computeFilterContextsForStatusOrNotification(item, contextsToRegex)
if (!isEqual(summary.filterContexts, newFilterContexts)) {
somethingChanged = true
summary.filterContexts = newFilterContexts

View file

@ -1,18 +1,16 @@
import { createSearchIndexFromStatusOrNotification } from './createSearchIndexFromStatusOrNotification'
import { uniq } from 'lodash-es'
export function computeFilterContextsForStatusOrNotification (statusOrNotification, filtersWithRegexes) {
if (!filtersWithRegexes || !filtersWithRegexes.length) {
export function computeFilterContextsForStatusOrNotification (statusOrNotification, contextsToRegex) {
if (!contextsToRegex || !Object.keys(contextsToRegex).length) {
// avoid computing the search index, just bail out
return undefined
}
// the searchIndex is really just a string of text
const searchIndex = createSearchIndexFromStatusOrNotification(statusOrNotification)
const res = filtersWithRegexes && uniq(filtersWithRegexes
.filter(({ regex }) => regex.test(searchIndex))
.map(_ => _.context)
.flat())
const res = Object.entries(contextsToRegex)
.filter(([context, regex]) => regex.test(searchIndex))
.map(([context]) => context)
// return undefined instead of a new array to reduce memory usage of TimelineSummary
return (res && res.length) ? res : undefined
return res.length ? res : undefined
}

View file

@ -1,20 +0,0 @@
// copy-pasta'd from mastodon
// https://github.com/tootsuite/mastodon/blob/2ff01f7/app/javascript/mastodon/selectors/index.js#L40-L63
const escapeRegExp = string =>
string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') // $& means the whole matched string
export function createRegexFromFilter (filter) {
let expr = escapeRegExp(filter.phrase)
if (filter.whole_word) {
if (/^[\w]/.test(expr)) {
expr = `\\b${expr}`
}
if (/[\w]$/.test(expr)) {
expr = `${expr}\\b`
}
}
return new RegExp(expr, 'i')
}

View file

@ -0,0 +1,22 @@
// copy-pasta'd from mastodon
// https://github.com/tootsuite/mastodon/blob/2ff01f7/app/javascript/mastodon/selectors/index.js#L40-L63
const escapeRegExp = string =>
string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') // $& means the whole matched string
export const createRegexFromFilters = filters => {
return new RegExp(filters.map(filter => {
let expr = escapeRegExp(filter.phrase)
if (filter.whole_word) {
if (/^[\w]/.test(expr)) {
expr = `\\b${expr}`
}
if (/[\w]$/.test(expr)) {
expr = `${expr}\\b`
}
}
return expr
}).join('|'), 'i')
}

View file

@ -13,9 +13,9 @@ class TimelineSummary {
// 1. Avoid computing html-to-text (expensive) for users who don't have any filters (probably most users)
// 2. Avoiding keeping the entire html-to-text in memory at all times for all summaries
// 3. Filters probably change infrequently. When they do, we can just update the summaries
const { unexpiredInstanceFiltersWithRegexes } = store.get()
const filtersWithRegexes = unexpiredInstanceFiltersWithRegexes[instanceName]
this.filterContexts = computeFilterContextsForStatusOrNotification(item, filtersWithRegexes)
const { unexpiredInstanceFilterRegexes } = store.get()
const contextsToRegex = unexpiredInstanceFilterRegexes[instanceName]
this.filterContexts = computeFilterContextsForStatusOrNotification(item, contextsToRegex)
}
}