fix: prefer local file URLs for OCR (#1436)

2019-08-25 21:48:59 -07:00 · 2019-08-25 21:48:59 -07:00 · e2c137b2ef
parent cb12e05584
commit e2c137b2ef
4 changed files with 26 additions and 5 deletions
--- a/src/routes/_actions/media.js
+++ b/src/routes/_actions/media.js
@ -2,6 +2,7 @@ import { store } from '../_store/store'
 import { uploadMedia } from '../_api/media'
 import { toast } from '../_components/toast/toast'
 import { scheduleIdleTask } from '../_utils/scheduleIdleTask'
+import { mediaUploadFileCache } from '../_utils/mediaUploadFileCache'

 export async function doMediaUpload (realm, file) {
  const { currentInstance, accessToken } = store.get()
@ -12,6 +13,7 @@ export async function doMediaUpload (realm, file) {
    if (composeMedia.length === 4) {
      throw new Error('Only 4 media max are allowed')
    }
+    mediaUploadFileCache.set(response.id, file)
    composeMedia.push({
      data: response,
      file: { name: file.name },
--- a/src/routes/_components/dialog/components/MediaAltEditor.html
+++ b/src/routes/_components/dialog/components/MediaAltEditor.html
@ -98,6 +98,7 @@
  import { runTesseract } from '../../../_utils/runTesseract'
  import SvgIcon from '../../SvgIcon.html'
  import { toast } from '../../toast/toast'
+  import { mediaUploadFileCache } from '../../../_utils/mediaUploadFileCache'

  const updateRawTextInStore = throttleTimer(requestPostAnimationFrame)

@ -119,7 +120,8 @@
    computed: {
      length: ({ rawText }) => length(rawText || ''),
      overLimit: ({ mediaAltCharLimit, length }) => length > mediaAltCharLimit,
-      url: ({ media, index }) => get(media, [index, 'data', 'url'])
+      url: ({ media, index }) => get(media, [index, 'data', 'url']),
+      mediaId: ({ media, index }) => get(media, [index, 'data', 'id'])
    },
    methods: {
      observe,
@ -165,8 +167,19 @@
      async onClick () {
        this.set({ extracting: true })
        try {
-          const { url } = this.get()
-          const text = await runTesseract(url)
+          const { url, mediaId } = this.get()
+          const file = mediaUploadFileCache.get(mediaId)
+          let text
+          if (file) { // Avoid downloading from the network a file that the user *just* uploaded
+            const fileUrl = URL.createObjectURL(file)
+            try {
+              text = await runTesseract(fileUrl)
+            } finally {
+              URL.revokeObjectURL(fileUrl)
+            }
+          } else {
+            text = await runTesseract(url)
+          }
          const { media, index, realm } = this.get()
          if (media[index].description !== text) {
            media[index].description = text
--- a/src/routes/_utils/mediaUploadFileCache.js
+++ b/src/routes/_utils/mediaUploadFileCache.js
@ -0,0 +1,6 @@
+// keep a cache of files for the most recent uploads to avoid
+// re-downloading them for OCR
+
+import { QuickLRU } from '../_thirdparty/quick-lru/quick-lru'
+
+export const mediaUploadFileCache = new QuickLRU({ maxSize: 4 })
--- a/src/routes/_utils/runTesseract.js
+++ b/src/routes/_utils/runTesseract.js
@ -1,6 +1,6 @@
 import { importTesseractWorker } from '../_utils/asyncModules'

-export async function runTesseract (image) {
+export async function runTesseract (url) {
  const worker = await importTesseractWorker()

  // TODO: have to trick tesseract into not creating a blob URL because that would break our CSP
@ -9,7 +9,7 @@ export async function runTesseract (image) {
  const OldBlob = window.Blob
  window.Blob = null
  try {
-    promise = worker.recognize(image)
+    promise = worker.recognize(url)
  } finally {
    window.Blob = OldBlob
  }