parent
b01191037e
commit
56f266cb93
|
@ -1,5 +1,7 @@
|
||||||
import { importTesseractWorker } from '../_utils/asyncModules'
|
import { importTesseractWorker } from '../_utils/asyncModules'
|
||||||
|
|
||||||
|
const DESTROY_WORKER_DELAY = 300000 // 5 minutes
|
||||||
|
|
||||||
// TODO: it's flaky to try to estimate tesseract's total progress this way
|
// TODO: it's flaky to try to estimate tesseract's total progress this way
|
||||||
const steps = [
|
const steps = [
|
||||||
{ status: 'loading tesseract core', proportion: 0.05 },
|
{ status: 'loading tesseract core', proportion: 0.05 },
|
||||||
|
@ -9,6 +11,36 @@ const steps = [
|
||||||
{ status: 'recognizing text', proportion: 0.6 }
|
{ status: 'recognizing text', proportion: 0.6 }
|
||||||
]
|
]
|
||||||
|
|
||||||
|
let worker
|
||||||
|
let destroyWorkerHandle
|
||||||
|
|
||||||
|
async function initWorker () {
|
||||||
|
if (!worker) {
|
||||||
|
worker = (await importTesseractWorker())()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function destroyWorker () {
|
||||||
|
console.log('destroying tesseract worker')
|
||||||
|
if (worker) {
|
||||||
|
worker.terminate()
|
||||||
|
worker = null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// destroy the worker after a delay to reduce memory usage
|
||||||
|
function scheduleDestroyWorker () {
|
||||||
|
cancelDestroyWorker()
|
||||||
|
destroyWorkerHandle = setTimeout(destroyWorker, DESTROY_WORKER_DELAY)
|
||||||
|
}
|
||||||
|
|
||||||
|
function cancelDestroyWorker () {
|
||||||
|
if (destroyWorkerHandle) {
|
||||||
|
clearTimeout(destroyWorkerHandle)
|
||||||
|
destroyWorkerHandle = null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function getTotalProgress (progressInfo) {
|
function getTotalProgress (progressInfo) {
|
||||||
const idx = steps.findIndex(({ status }) => progressInfo.status === status)
|
const idx = steps.findIndex(({ status }) => progressInfo.status === status)
|
||||||
let total = 0
|
let total = 0
|
||||||
|
@ -19,9 +51,7 @@ function getTotalProgress (progressInfo) {
|
||||||
return total
|
return total
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function runTesseract (url, onProgress) {
|
function recognize (url, onProgress) {
|
||||||
const worker = await importTesseractWorker()
|
|
||||||
|
|
||||||
// TODO: have to trick tesseract into not creating a blob URL because that would break our CSP
|
// TODO: have to trick tesseract into not creating a blob URL because that would break our CSP
|
||||||
// see https://github.com/naptha/tesseract.js/pull/322
|
// see https://github.com/naptha/tesseract.js/pull/322
|
||||||
let promise
|
let promise
|
||||||
|
@ -38,6 +68,16 @@ export async function runTesseract (url, onProgress) {
|
||||||
onProgress(getTotalProgress(progressInfo))
|
onProgress(getTotalProgress(progressInfo))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
const res = await promise
|
return promise
|
||||||
return res.text
|
}
|
||||||
|
|
||||||
|
export async function runTesseract (url, onProgress) {
|
||||||
|
cancelDestroyWorker()
|
||||||
|
await initWorker()
|
||||||
|
try {
|
||||||
|
const { text } = await recognize(url, onProgress)
|
||||||
|
return text
|
||||||
|
} finally {
|
||||||
|
scheduleDestroyWorker()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,10 +12,9 @@ import { TesseractWorker } from 'tesseract.js'
|
||||||
// which seems excessive. So we just live with the bug for now.
|
// which seems excessive. So we just live with the bug for now.
|
||||||
// https://github.com/naptha/tesseract.js/issues/325
|
// https://github.com/naptha/tesseract.js/issues/325
|
||||||
const { origin } = location
|
const { origin } = location
|
||||||
const tesseractWorker = new TesseractWorker({
|
|
||||||
|
export default () => new TesseractWorker({
|
||||||
workerPath: `${origin}/${workerPath}`,
|
workerPath: `${origin}/${workerPath}`,
|
||||||
langPath: `${origin}/`,
|
langPath: `${origin}/`,
|
||||||
corePath: `${origin}/${corePath}`
|
corePath: `${origin}/${corePath}`
|
||||||
})
|
})
|
||||||
|
|
||||||
export default tesseractWorker
|
|
||||||
|
|
Loading…
Reference in a new issue