fix: prefer local file URLs for OCR (#1436)

This commit is contained in:
Nolan Lawson 2019-08-25 21:48:59 -07:00 committed by GitHub
parent cb12e05584
commit e2c137b2ef
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 26 additions and 5 deletions

View file

@ -2,6 +2,7 @@ import { store } from '../_store/store'
import { uploadMedia } from '../_api/media'
import { toast } from '../_components/toast/toast'
import { scheduleIdleTask } from '../_utils/scheduleIdleTask'
import { mediaUploadFileCache } from '../_utils/mediaUploadFileCache'
export async function doMediaUpload (realm, file) {
const { currentInstance, accessToken } = store.get()
@ -12,6 +13,7 @@ export async function doMediaUpload (realm, file) {
if (composeMedia.length === 4) {
throw new Error('Only 4 media max are allowed')
}
mediaUploadFileCache.set(response.id, file)
composeMedia.push({
data: response,
file: { name: file.name },

View file

@ -98,6 +98,7 @@
import { runTesseract } from '../../../_utils/runTesseract'
import SvgIcon from '../../SvgIcon.html'
import { toast } from '../../toast/toast'
import { mediaUploadFileCache } from '../../../_utils/mediaUploadFileCache'
const updateRawTextInStore = throttleTimer(requestPostAnimationFrame)
@ -119,7 +120,8 @@
computed: {
length: ({ rawText }) => length(rawText || ''),
overLimit: ({ mediaAltCharLimit, length }) => length > mediaAltCharLimit,
url: ({ media, index }) => get(media, [index, 'data', 'url'])
url: ({ media, index }) => get(media, [index, 'data', 'url']),
mediaId: ({ media, index }) => get(media, [index, 'data', 'id'])
},
methods: {
observe,
@ -165,8 +167,19 @@
async onClick () {
this.set({ extracting: true })
try {
const { url } = this.get()
const text = await runTesseract(url)
const { url, mediaId } = this.get()
const file = mediaUploadFileCache.get(mediaId)
let text
if (file) { // Avoid downloading from the network a file that the user *just* uploaded
const fileUrl = URL.createObjectURL(file)
try {
text = await runTesseract(fileUrl)
} finally {
URL.revokeObjectURL(fileUrl)
}
} else {
text = await runTesseract(url)
}
const { media, index, realm } = this.get()
if (media[index].description !== text) {
media[index].description = text

View file

@ -0,0 +1,6 @@
// keep a cache of files for the most recent uploads to avoid
// re-downloading them for OCR
import { QuickLRU } from '../_thirdparty/quick-lru/quick-lru'
export const mediaUploadFileCache = new QuickLRU({ maxSize: 4 })

View file

@ -1,6 +1,6 @@
import { importTesseractWorker } from '../_utils/asyncModules'
export async function runTesseract (image) {
export async function runTesseract (url) {
const worker = await importTesseractWorker()
// TODO: have to trick tesseract into not creating a blob URL because that would break our CSP
@ -9,7 +9,7 @@ export async function runTesseract (image) {
const OldBlob = window.Blob
window.Blob = null
try {
promise = worker.recognize(image)
promise = worker.recognize(url)
} finally {
window.Blob = OldBlob
}