From e2c137b2ef6f521bc4115239311672a51df153d7 Mon Sep 17 00:00:00 2001 From: Nolan Lawson Date: Sun, 25 Aug 2019 21:48:59 -0700 Subject: [PATCH] fix: prefer local file URLs for OCR (#1436) --- src/routes/_actions/media.js | 2 ++ .../dialog/components/MediaAltEditor.html | 19 ++++++++++++++++--- src/routes/_utils/mediaUploadFileCache.js | 6 ++++++ src/routes/_utils/runTesseract.js | 4 ++-- 4 files changed, 26 insertions(+), 5 deletions(-) create mode 100644 src/routes/_utils/mediaUploadFileCache.js diff --git a/src/routes/_actions/media.js b/src/routes/_actions/media.js index 6719929c..25d5ae23 100644 --- a/src/routes/_actions/media.js +++ b/src/routes/_actions/media.js @@ -2,6 +2,7 @@ import { store } from '../_store/store' import { uploadMedia } from '../_api/media' import { toast } from '../_components/toast/toast' import { scheduleIdleTask } from '../_utils/scheduleIdleTask' +import { mediaUploadFileCache } from '../_utils/mediaUploadFileCache' export async function doMediaUpload (realm, file) { const { currentInstance, accessToken } = store.get() @@ -12,6 +13,7 @@ export async function doMediaUpload (realm, file) { if (composeMedia.length === 4) { throw new Error('Only 4 media max are allowed') } + mediaUploadFileCache.set(response.id, file) composeMedia.push({ data: response, file: { name: file.name }, diff --git a/src/routes/_components/dialog/components/MediaAltEditor.html b/src/routes/_components/dialog/components/MediaAltEditor.html index 8ab5cea9..a2dd00a1 100644 --- a/src/routes/_components/dialog/components/MediaAltEditor.html +++ b/src/routes/_components/dialog/components/MediaAltEditor.html @@ -98,6 +98,7 @@ import { runTesseract } from '../../../_utils/runTesseract' import SvgIcon from '../../SvgIcon.html' import { toast } from '../../toast/toast' + import { mediaUploadFileCache } from '../../../_utils/mediaUploadFileCache' const updateRawTextInStore = throttleTimer(requestPostAnimationFrame) @@ -119,7 +120,8 @@ computed: { length: ({ rawText }) => length(rawText || ''), overLimit: ({ mediaAltCharLimit, length }) => length > mediaAltCharLimit, - url: ({ media, index }) => get(media, [index, 'data', 'url']) + url: ({ media, index }) => get(media, [index, 'data', 'url']), + mediaId: ({ media, index }) => get(media, [index, 'data', 'id']) }, methods: { observe, @@ -165,8 +167,19 @@ async onClick () { this.set({ extracting: true }) try { - const { url } = this.get() - const text = await runTesseract(url) + const { url, mediaId } = this.get() + const file = mediaUploadFileCache.get(mediaId) + let text + if (file) { // Avoid downloading from the network a file that the user *just* uploaded + const fileUrl = URL.createObjectURL(file) + try { + text = await runTesseract(fileUrl) + } finally { + URL.revokeObjectURL(fileUrl) + } + } else { + text = await runTesseract(url) + } const { media, index, realm } = this.get() if (media[index].description !== text) { media[index].description = text diff --git a/src/routes/_utils/mediaUploadFileCache.js b/src/routes/_utils/mediaUploadFileCache.js new file mode 100644 index 00000000..cdada303 --- /dev/null +++ b/src/routes/_utils/mediaUploadFileCache.js @@ -0,0 +1,6 @@ +// keep a cache of files for the most recent uploads to avoid +// re-downloading them for OCR + +import { QuickLRU } from '../_thirdparty/quick-lru/quick-lru' + +export const mediaUploadFileCache = new QuickLRU({ maxSize: 4 }) diff --git a/src/routes/_utils/runTesseract.js b/src/routes/_utils/runTesseract.js index 7217d0ac..ce7454fd 100644 --- a/src/routes/_utils/runTesseract.js +++ b/src/routes/_utils/runTesseract.js @@ -1,6 +1,6 @@ import { importTesseractWorker } from '../_utils/asyncModules' -export async function runTesseract (image) { +export async function runTesseract (url) { const worker = await importTesseractWorker() // TODO: have to trick tesseract into not creating a blob URL because that would break our CSP @@ -9,7 +9,7 @@ export async function runTesseract (image) { const OldBlob = window.Blob window.Blob = null try { - promise = worker.recognize(image) + promise = worker.recognize(url) } finally { window.Blob = OldBlob }