fix: add progress bar for OCR (#1444)

This commit is contained in:
Nolan Lawson 2019-08-27 23:23:35 -07:00 committed by GitHub
parent c822f19975
commit c8738f17b0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 45 additions and 5 deletions

View file

@ -35,6 +35,11 @@
{/if} {/if}
</span> </span>
</button> </button>
<LengthGauge
length={extractionProgress}
overLimit={false}
max={100}
/>
</div> </div>
<style> <style>
.media-alt-editor { .media-alt-editor {
@ -124,7 +129,8 @@
rawText: '', rawText: '',
mediaAltCharLimit: MEDIA_ALT_CHAR_LIMIT, mediaAltCharLimit: MEDIA_ALT_CHAR_LIMIT,
extracting: false, extracting: false,
className: '' className: '',
extractionProgress: 0
}), }),
computed: { computed: {
length: ({ rawText }) => length(rawText || ''), length: ({ rawText }) => length(rawText || ''),
@ -176,17 +182,22 @@
this.set({ extracting: true }) this.set({ extracting: true })
try { try {
const { url } = this.get() const { url } = this.get()
const onProgress = progress => {
requestAnimationFrame(() => {
this.set({ extractionProgress: progress * 100 })
})
}
const file = mediaUploadFileCache.get(url) const file = mediaUploadFileCache.get(url)
let text let text
if (file) { // Avoid downloading from the network a file that the user *just* uploaded if (file) { // Avoid downloading from the network a file that the user *just* uploaded
const fileUrl = URL.createObjectURL(file) const fileUrl = URL.createObjectURL(file)
try { try {
text = await runTesseract(fileUrl) text = await runTesseract(fileUrl, onProgress)
} finally { } finally {
URL.revokeObjectURL(fileUrl) URL.revokeObjectURL(fileUrl)
} }
} else { } else {
text = await runTesseract(url) text = await runTesseract(url, onProgress)
} }
const { media, index, realm } = this.get() const { media, index, realm } = this.get()
if (media[index].description !== text) { if (media[index].description !== text) {
@ -201,6 +212,11 @@
) )
} finally { } finally {
this.set({ extracting: false }) this.set({ extracting: false })
setTimeout(() => {
requestAnimationFrame(() => {
this.set({ extractionProgress: 0 })
})
}, 400)
} }
} }
}, },

View file

@ -1,6 +1,25 @@
import { importTesseractWorker } from '../_utils/asyncModules' import { importTesseractWorker } from '../_utils/asyncModules'
export async function runTesseract (url) { // TODO: it's flaky to try to estimate tesseract's total progress this way
const steps = [
{ status: 'loading tesseract core', proportion: 0.05 },
{ status: 'initializing tesseract', proportion: 0.05 },
{ status: 'loading language traineddata', proportion: 0.1 },
{ status: 'initializing api', proportion: 0.2 },
{ status: 'recognizing text', proportion: 0.6 }
]
function getTotalProgress (progressInfo) {
const idx = steps.findIndex(({ status }) => progressInfo.status === status)
let total = 0
for (let i = 0; i < idx; i++) {
total += steps[i].proportion
}
total += steps[idx].proportion * progressInfo.progress
return total
}
export async function runTesseract (url, onProgress) {
const worker = await importTesseractWorker() const worker = await importTesseractWorker()
// TODO: have to trick tesseract into not creating a blob URL because that would break our CSP // TODO: have to trick tesseract into not creating a blob URL because that would break our CSP
@ -13,7 +32,12 @@ export async function runTesseract (url) {
} finally { } finally {
window.Blob = OldBlob window.Blob = OldBlob
} }
promise.progress(_ => console.log('progress', _)) promise.progress(progressInfo => {
console.log('progress', progressInfo)
if (onProgress && steps.find(({ status }) => status === progressInfo.status)) {
onProgress(getTotalProgress(progressInfo))
}
})
const res = await promise const res = await promise
return res.text return res.text
} }