优化OCR接口 移除无用代码

This commit is contained in:
TonyJiangWJ 2022-02-08 21:19:41 +08:00
parent bd466224b2
commit 5fd0402cd5
16 changed files with 371 additions and 350 deletions

View File

@ -1,19 +1,18 @@
//目录路径(必须是完整路径)
var dirPath = "/sdcard/脚本/";
if(!files.isDir(dirPath)) files.create(dirPath);
//压缩文件路径(必须是完整路径)
var filePath = "/sdcard/脚本.7z";
//压缩类型
//支持的压缩类型包括:zip 7z bz2 bzip2 tbz2 tbz gz gzip tgz tar wim swm xz txz。
// 目录路径(必须是完整路径)
var dirPath = files.cwd();
// 压缩文件路径(必须是完整路径)
var filePath = org.autojs.autojs.Pref.getScriptDirPath() + '/测试.7z';
// 压缩类型
// 支持的压缩类型包括:zip 7z bz2 bzip2 tbz2 tbz gz gzip tgz tar wim swm xz txz。
var type = "7z";
//压缩密码
// 压缩密码
var password = "password"
//7z加密压缩(若文件已存在则跳过)
//zips.A(type, filePath, dirPath, password)
// 7z加密压缩(若文件已存在则跳过)
// $zips.addFiles(type, filePath, dirPath, password)
//压缩
switch (zips.A(type, filePath, dirPath)) {
// 压缩
switch ($zips.addFiles(type, filePath, dirPath)) {
case 0:
toastLog("压缩成功!文件已保存为: " + filePath)
break;

View File

@ -1,16 +1,16 @@
//压缩文件路径(必须是完整路径)
// 压缩文件路径(必须是完整路径)
var filePath = files.path("./bonus.rar");
//目录路径(必须是完整路径)
var dirPath = "/sdcard/脚本";
//压缩密码
// 目录路径(必须是完整路径)
var dirPath = org.autojs.autojs.Pref.getScriptDirPath();
// 压缩密码
var password = "password"
//支持的解压缩类型包括zip、7z、bz2、bzip2、tbz2、tbz、gz、gzip、tgz、tar、wim、swm、xz、txz以及rar、chm、iso、msi等众多格式。
//解压无加密的压缩包(若文件已存在则跳过)
//zips.X(filePath, dirPath)
// 支持的解压缩类型包括zip、7z、bz2、bzip2、tbz2、tbz、gz、gzip、tgz、tar、wim、swm、xz、txz以及rar、chm、iso、msi等众多格式。
// 解压无加密的压缩包(若文件已存在则跳过)
// $zips.X(filePath, dirPath)
//解压加密的压缩包(若文件已存在则跳过)
switch (zips.X(filePath, dirPath, password)) {
// 解压加密的压缩包(若文件已存在则跳过)
switch ($zips.extraFiles(filePath, dirPath, password)) {
case 0:
toastLog("解压缩成功!请到 " + dirPath + " 目录下查看。")
break;

View File

@ -1,19 +1,29 @@
const img = images.read("./test.png");
const cpuThreadNum = 4;
const img = images.read("./test.png")
console.show()
let cpuThreadNum = 4
// PaddleOCR 移动端提供了两种模型ocr_v2_for_cpu与ocr_v2_for_cpu(slim),此选项用于选择加载的模型,默认true使用v2的slim版(速度更快)false使用v2的普通版(准确率更高)
var useSlim = true;
let useSlim = true
let start = new Date()
// 识别图片中的文字返回完整识别信息兼容百度OCR格式
const result = ocr.R(img, cpuThreadNum, useSlim);
// 可以使用简化的调用命令默认参数cpuThreadNum = 4, useSlim = true;
// const result = ocr.R(img);
toastLog("完整识别信息兼容百度OCR格式: " + JSON.stringify(result));
// 识别图片中的文字,只返回文本识别信息(字符串列表)。
const stringList = ocr.T(img, cpuThreadNum, useSlim);
// 可以使用简化的调用命令默认参数cpuThreadNum = 4, useSlim = true;
// const stringList = ocr.T(img);
toastLog("文本识别信息(字符串列表): " + JSON.stringify(stringList));
let result = $ocr.detect(img, cpuThreadNum, useSlim)
log('slim识别耗时' + (new Date() - start) + 'ms')
// 可以使用简化的调用命令默认参数cpuThreadNum = 4, useSlim = true
// const result = $ocr.detect(img)
toastLog("完整识别信息: " + JSON.stringify(result))
start = new Date()
// 识别图片中的文字,只返回文本识别信息(字符串列表)。当前版本可能存在文字顺序错乱的问题 建议先使用detect后自行排序
const stringList = $ocr.recognizeText(img, cpuThreadNum, useSlim)
log('slim纯文本识别耗时' + (new Date() - start) + 'ms')
// 可以使用简化的调用命令默认参数cpuThreadNum = 4, useSlim = true
// const stringList = $ocr.recognizeText(img)
toastLog("文本识别信息(字符串列表): " + JSON.stringify(stringList))
// 增加线程数
cpuThreadNum = 8
start = new Date()
result = $ocr.detect(img, cpuThreadNum, useSlim)
log('8线程识别耗时' + (new Date() - start) + 'ms')
toastLog("完整识别信息兼容百度OCR格式: " + JSON.stringify(result))
// 释放模型 用于释放native内存
ocr.release()
$ocr.release()
// 回收图片
img.recycle()

View File

@ -2,15 +2,21 @@ importClass(com.baidu.paddle.lite.ocr.Predictor)
console.show()
let path = 'test.jpg'
// 指定是否用精简版模型 速度较快
let useSlim = false
// 创建检测器
let predictor = new Predictor()
// predictor.cpuThreadNum = 4 //可以自定义使用CPU的线程数
// 初始化模型 首次运行时会比较耗时
let loadSuccess = predictor.init(context)
// 内置的模型只有一个models/ocr_v2_for_cpu初始化自定义模型请写绝对路径否则无法获取到
// 使用自定义模型时det rec cls三个模型文件名称需要修改为后续版本开放自定义文件名
// ch_ppocr_mobile_v2.0_det_opt.nb
// ch_ppocr_mobile_v2.0_rec_opt.nb
// ch_ppocr_mobile_v2.0_cls_opt.nb
let loadSuccess = predictor.init(context, useSlim) // predictor.init(context) 为默认不使用精简版
// 内置默认 modelPath 为 models/ocr_v2_for_cpu初始化自定义模型请写绝对路径否则无法获取到
// 内置默认 labelPath 为 labels/ppocr_keys_v1.txt
// let modelPath = files.path('./models/customize') // 指定自定义模型路径
// let labelPath = files.path('./models/customize') // 指定自定义label路径
// 使用自定义模型时det rec cls三个模型文件名称需要手动指定
// predictor.detModelFilename = 'ch_ppocr_mobile_v2.0_det_opt.nb'
// predictor.recModelFilename = 'ch_ppocr_mobile_v2.0_rec_opt.nb'
// predictor.clsModelFilename = 'ch_ppocr_mobile_v2.0_cls_opt.nb'
// predictor.init(context, modelPath, labelPath)
toastLog('加载模型结果:' + loadSuccess)
let start = new Date()

View File

@ -0,0 +1,188 @@
let currentEngine = engines.myEngine()
let runningEngines = engines.all()
let currentSource = currentEngine.getSource() + ''
if (runningEngines.length > 1) {
runningEngines.forEach(compareEngine => {
let compareSource = compareEngine.getSource() + ''
if (currentEngine.id !== compareEngine.id && compareSource === currentSource) {
// 强制关闭同名的脚本
compareEngine.forceStop()
}
})
}
if (!requestScreenCapture()) {
toastLog('请求截图权限失败')
exit()
}
sleep(1000)
importClass(com.baidu.paddle.lite.ocr.Predictor)
// 指定是否用精简版模型 速度较快
let useSlim = false
// 创建检测器
let predictor = new Predictor()
// predictor.cpuThreadNum = 4 //可以自定义使用CPU的线程数
// 初始化模型 首次运行时会比较耗时
let loadSuccess = predictor.init(context, useSlim) // predictor.init(context) 为默认不使用精简版
// 内置默认 modelPath 为 models/ocr_v2_for_cpu初始化自定义模型请写绝对路径否则无法获取到
// 内置默认 labelPath 为 labels/ppocr_keys_v1.txt
// let modelPath = files.path('./models/customize') // 指定自定义模型路径
// let labelPath = files.path('./models/customize') // 指定自定义label路径
// 使用自定义模型时det rec cls三个模型文件名称需要手动指定
// predictor.detModelFilename = 'ch_ppocr_mobile_v2.0_det_opt.nb'
// predictor.recModelFilename = 'ch_ppocr_mobile_v2.0_rec_opt.nb'
// predictor.clsModelFilename = 'ch_ppocr_mobile_v2.0_cls_opt.nb'
// predictor.init(context, modelPath, labelPath)
if (!loadSuccess) {
toastLog('初始化ocr失败')
exit()
}
// 识别结果和截图信息
let result = []
let img = null
let running = true
let capturing = true
/**
* 截图并识别OCR文本信息
*/
function captureAndOcr() {
capturing = true
img && img.recycle()
img = captureScreen()
if (!img) {
toastLog('截图失败')
}
let start = new Date()
result = predictor.runOcr(img.getBitmap())
toastLog('耗时' + (new Date() - start) + 'ms')
capturing = false
}
captureAndOcr()
// 获取状态栏高度
let offset = -getStatusBarHeightCompat()
// 绘制识别结果
let window = floaty.rawWindow(
<canvas id="canvas" layout_weight="1" />
);
// 设置悬浮窗位置
ui.post(() => {
window.setPosition(0, offset)
window.setSize(device.width, device.height)
window.setTouchable(false)
})
// 操作按钮
let clickButtonWindow = floaty.rawWindow(
<vertical>
<button id="captureAndOcr" text="截图识别" />
<button id="closeBtn" text="退出" />
</vertical>
);
ui.run(function () {
clickButtonWindow.setPosition(device.width / 2 - ~~(clickButtonWindow.getWidth() / 2), device.height * 0.65)
})
// 点击识别
clickButtonWindow.captureAndOcr.click(function () {
result = []
ui.run(function () {
clickButtonWindow.setPosition(device.width, device.height)
})
setTimeout(() => {
captureAndOcr()
ui.run(function () {
clickButtonWindow.setPosition(device.width / 2 - ~~(clickButtonWindow.getWidth() / 2), device.height * 0.65)
})
}, 500)
})
// 点击关闭
clickButtonWindow.closeBtn.click(function () {
exit()
})
let Typeface = android.graphics.Typeface
let paint = new Paint()
paint.setStrokeWidth(1)
paint.setTypeface(Typeface.DEFAULT_BOLD)
paint.setTextAlign(Paint.Align.LEFT)
paint.setAntiAlias(true)
paint.setStrokeJoin(Paint.Join.ROUND)
paint.setDither(true)
window.canvas.on('draw', function (canvas) {
if (!running || capturing) {
return
}
// 清空内容
canvas.drawColor(0xFFFFFF, android.graphics.PorterDuff.Mode.CLEAR)
if (result && result.length > 0) {
for (let i = 0; i < result.length; i++) {
let ocrResult = result[i]
drawRectAndText(ocrResult.label, ocrResult.bounds, '#00ff00', canvas, paint)
}
}
})
setInterval(() => { }, 10000)
events.on('exit', () => {
// 标记停止 避免canvas导致闪退
running = false
// 回收图片
img && img.recycle()
// 撤销监听
window.canvas.removeAllListeners()
// 释放模型
predictor.releaseModel()
})
/**
* 绘制文本和方框
*
* @param {*} desc
* @param {*} rect
* @param {*} colorStr
* @param {*} canvas
* @param {*} paint
*/
function drawRectAndText (desc, rect, colorStr, canvas, paint) {
let color = colors.parseColor(colorStr)
paint.setStrokeWidth(1)
paint.setStyle(Paint.Style.STROKE)
// 反色
paint.setARGB(255, 255 - (color >> 16 & 0xff), 255 - (color >> 8 & 0xff), 255 - (color & 0xff))
canvas.drawRect(rect, paint)
paint.setARGB(255, color >> 16 & 0xff, color >> 8 & 0xff, color & 0xff)
paint.setStrokeWidth(1)
paint.setTextSize(20)
paint.setStyle(Paint.Style.FILL)
canvas.drawText(desc, rect.left, rect.top, paint)
paint.setTextSize(10)
paint.setStrokeWidth(1)
paint.setARGB(255, 0, 0, 0)
}
/**
* 获取状态栏高度
*
* @returns
*/
function getStatusBarHeightCompat () {
let result = 0
let resId = context.getResources().getIdentifier("status_bar_height", "dimen", "android")
if (resId > 0) {
result = context.getResources().getDimensionPixelOffset(resId)
}
if (result <= 0) {
result = context.getResources().getDimensionPixelOffset(R.dimen.dimen_25dp)
}
return result
}

View File

@ -15,7 +15,7 @@ import java.util.Set;
public class ExplorerFileItem implements ExplorerItem {
private static final Set<String> sEditableFileExts = new HashSet<>(Arrays.asList(
"js", "java", "xml", "json", "txt", "log", "ts", "html"
"js", "java", "xml", "json", "txt", "log", "ts", "html", "css", "vue"
));
private PFile mFile;

View File

@ -48,6 +48,12 @@ str_to_cpu_mode(const std::string &cpu_mode) {
::toupper);
auto index = cpu_mode_map.find(upper_key);
if (index == cpu_mode_map.end()) {
// 可能因为大小写转换后找不到 直接通过入参查找
index = cpu_mode_map.find(cpu_mode);
if (index != cpu_mode_map.end()) {
LOGI("find cpu_mode by &cpu_mode %s", cpu_mode.c_str());
return index->second;
}
LOGE("cpu_mode not found %s", upper_key.c_str());
return paddle::lite_api::LITE_POWER_HIGH;
} else {

View File

@ -33,7 +33,7 @@ public class OCRPredictorNative {
public OCRPredictorNative(Config config) {
this.config = config;
loadLibrary();
nativePointer = init(config.detModelFilename, config.recModelFilename,config.clsModelFilename,
nativePointer = init(config.detModelFilename, config.recModelFilename, config.clsModelFilename,
config.cpuThreadNum, config.cpuPower);
Log.i("OCRPredictorNative", "load success " + nativePointer);
@ -44,8 +44,7 @@ public class OCRPredictorNative {
Log.i("OCRPredictorNative", "begin to run image " + inputData.length + " " + width + " " + height);
float[] dims = new float[]{1, channels, height, width};
float[] rawResults = forward(nativePointer, inputData, dims, originalImage);
ArrayList<OcrResultModel> results = postprocess(rawResults);
return results;
return postprocess(rawResults);
}
public static class Config {
@ -57,15 +56,14 @@ public class OCRPredictorNative {
}
// 原代码中该方法名为destory()此处应该是拼写错误其它类中调用此方法的名称也已修正
public void destroy(){
public void destroy() {
if (nativePointer != 0) {
release(nativePointer);
nativePointer = 0;
}
}
protected native long init(String detModelPath, String recModelPath,String clsModelPath, int threadNum, String cpuMode);
protected native long init(String detModelPath, String recModelPath, String clsModelPath, int threadNum, String cpuMode);
protected native float[] forward(long pointer, float[] buf, float[] ddims, Bitmap originalImage);

View File

@ -2,16 +2,11 @@ package com.baidu.paddle.lite.ocr;
import android.graphics.Point;
import android.graphics.Rect;
import android.os.Parcelable;
public class OcrResult {
public float confidence;
public float preprocessTime;
public float inferenceTime;
public String words;
public Rect bounds;
public RectLocation location;
private String label;
private float confidence;
private Rect bounds;
public OcrResult() {
}
@ -63,21 +58,39 @@ public class OcrResult {
return bounds;
}
public RectLocation getLocation() {
return new RectLocation(bounds);
}
public void setBounds(Rect bounds) {
this.bounds = bounds;
}
public String getWords() {
return label.trim().replace("\r", "");
}
public static class RectLocation {
public int left;
public int top;
public int width;
public int height;
public RectLocation() {
}
public RectLocation(int left, int top, int width, int height) {
this.left = left;
this.top = top;
this.width = width;
this.height = height;
}
public RectLocation(Rect rect) {
left = rect.left;
top = rect.top;
width = rect.right - rect.left;
height = rect.bottom - rect.top;
}
}
}

View File

@ -32,25 +32,51 @@ public class Predictor {
protected OCRPredictorNative paddlePredictor = null;
protected float inferenceTime = 0;
// Only for object detection
protected Vector<String> wordLabels = new Vector<String>();
protected Vector<String> wordLabels = new Vector<>();
protected String inputColorFormat = "BGR";
protected long[] inputShape = new long[]{1, 3, 960};
protected float[] inputMean = new float[]{0.485f, 0.456f, 0.406f};
protected float[] inputStd = new float[]{0.229f, 0.224f, 0.225f};
protected float scoreThreshold = 0.1f;
protected Bitmap inputImage = null;
protected Bitmap outputImage = null;
protected volatile String outputResult = "";
protected float preprocessTime = 0;
protected float postprocessTime = 0;
protected boolean useSlim = true;;
protected boolean useSlim = true;
/**
* 检测模型
*/
public String detModelFilename = "ch_ppocr_mobile_v2.0_det_opt.nb";
/**
* 识别模型
*/
public String recModelFilename = "ch_ppocr_mobile_v2.0_rec_opt.nb";
/**
* 文本方向检测模型
*/
public String clsModelFilename = "ch_ppocr_mobile_v2.0_cls_opt.nb";
private final String defaultLabelPath = "labels/ppocr_keys_v1.txt";
private final String defaultModelPath = "models/ocr_v2_for_cpu";
private final String defaultModelPathSlim = "models/ocr_v2_for_cpu(slim)";
public Predictor() {
}
public boolean init(Context appCtx) {
return this.init(appCtx, "models/ocr_v2_for_cpu", "labels/ppocr_keys_v1.txt");
return this.init(appCtx, defaultModelPath, defaultLabelPath);
}
public boolean init(Context appCtx, boolean useSlim) {
if (this.isLoaded && this.useSlim == useSlim) {
return true;
}
this.useSlim = useSlim;
if (useSlim) {
return this.init(appCtx, defaultModelPathSlim, defaultLabelPath);
} else {
return this.init(appCtx, defaultModelPath, defaultLabelPath);
}
}
public boolean init(Context appCtx, String modelPath, String labelPath) {
@ -64,7 +90,6 @@ public class Predictor {
public boolean init(Context appCtx, String modelPath, String labelPath, int cpuThreadNum, String cpuPowerMode,
String inputColorFormat,
long[] inputShape, float[] inputMean,
float[] inputStd, float scoreThreshold) {
if (inputShape.length != 3) {
@ -89,15 +114,10 @@ public class Predictor {
"channel size in your Apps!");
return false;
}
if (!inputColorFormat.equalsIgnoreCase("BGR")) {
Log.e(TAG, "Only BGR color format is supported.");
return false;
}
boolean isLoaded = init(appCtx, modelPath, labelPath);
if (!isLoaded) {
return false;
}
this.inputColorFormat = inputColorFormat;
this.inputShape = inputShape;
this.inputMean = inputMean;
this.inputStd = inputStd;
@ -114,22 +134,19 @@ public class Predictor {
return false;
}
String realPath = modelPath;
if (!modelPath.substring(0, 1).equals("/")) {
if (modelPath.charAt(0) != '/') {
// Read model files from custom path if the first character of mode path is '/'
// otherwise copy model to cache from assets
realPath = appCtx.getCacheDir() + "/" + modelPath;
Utils.copyDirectoryFromAssets(appCtx, modelPath, realPath);
}
if (realPath.isEmpty()) {
return false;
}
OCRPredictorNative.Config config = new OCRPredictorNative.Config();
config.cpuThreadNum = cpuThreadNum;
config.detModelFilename = realPath + File.separator + "ch_ppocr_mobile_v2.0_det_opt.nb";
config.recModelFilename = realPath + File.separator + "ch_ppocr_mobile_v2.0_rec_opt.nb";
config.clsModelFilename = realPath + File.separator + "ch_ppocr_mobile_v2.0_cls_opt.nb";
Log.e("Predictor", "model path" + config.detModelFilename + " ; " + config.recModelFilename + ";" + config.clsModelFilename);
config.detModelFilename = realPath + File.separator + detModelFilename;
config.recModelFilename = realPath + File.separator + recModelFilename;
config.clsModelFilename = realPath + File.separator + clsModelFilename;
Log.i("Predictor", "model path" + config.detModelFilename + " ; " + config.recModelFilename + ";" + config.clsModelFilename);
config.cpuPower = cpuPowerMode;
paddlePredictor = new OCRPredictorNative(config);
@ -146,8 +163,6 @@ public class Predictor {
paddlePredictor = null;
}
isLoaded = false;
cpuThreadNum = 1;
cpuPowerMode = "LITE_POWER_HIGH";
modelPath = "";
modelName = "";
}
@ -176,81 +191,6 @@ public class Predictor {
}
public boolean runModel() {
if (inputImage == null || !isLoaded()) {
return false;
}
// Pre-process image, and feed input tensor with pre-processed data
Bitmap scaleImage = Utils.resizeWithStep(inputImage, Long.valueOf(inputShape[2]).intValue(), 32);
Date start = new Date();
int channels = (int) inputShape[1];
int width = scaleImage.getWidth();
int height = scaleImage.getHeight();
float[] inputData = new float[channels * width * height];
if (channels == 3) {
int[] channelIdx = null;
if (inputColorFormat.equalsIgnoreCase("RGB")) {
channelIdx = new int[]{0, 1, 2};
} else if (inputColorFormat.equalsIgnoreCase("BGR")) {
channelIdx = new int[]{2, 1, 0};
} else {
Log.i(TAG, "Unknown color format " + inputColorFormat + ", only RGB and BGR color format is " +
"supported!");
return false;
}
int[] channelStride = new int[]{width * height, width * height * 2};
int[] pixels = new int[width * height];
scaleImage.getPixels(pixels, 0, scaleImage.getWidth(), 0, 0, scaleImage.getWidth(), scaleImage.getHeight());
for (int i = 0; i < pixels.length; i++) {
int color = pixels[i];
float[] rgb = new float[]{(float) red(color) / 255.0f, (float) green(color) / 255.0f,
(float) blue(color) / 255.0f};
inputData[i] = (rgb[channelIdx[0]] - inputMean[0]) / inputStd[0];
inputData[i + channelStride[0]] = (rgb[channelIdx[1]] - inputMean[1]) / inputStd[1];
inputData[i + channelStride[1]] = (rgb[channelIdx[2]] - inputMean[2]) / inputStd[2];
}
} else if (channels == 1) {
int[] pixels = new int[width * height];
scaleImage.getPixels(pixels, 0, scaleImage.getWidth(), 0, 0, scaleImage.getWidth(), scaleImage.getHeight());
for (int i = 0; i < pixels.length; i++) {
int color = pixels[i];
float gray = (float) (red(color) + green(color) + blue(color)) / 3.0f / 255.0f;
inputData[i] = (gray - inputMean[0]) / inputStd[0];
}
} else {
Log.i(TAG, "Unsupported channel size " + Integer.toString(channels) + ", only channel 1 and 3 is " +
"supported!");
return false;
}
float[] pixels = inputData;
Log.i(TAG, "pixels " + pixels[0] + " " + pixels[1] + " " + pixels[2] + " " + pixels[3]
+ " " + pixels[pixels.length / 2] + " " + pixels[pixels.length / 2 + 1] + " " + pixels[pixels.length - 2] + " " + pixels[pixels.length - 1]);
Date end = new Date();
preprocessTime = (float) (end.getTime() - start.getTime());
// Warm up
for (int i = 0; i < warmupIterNum; i++) {
paddlePredictor.runImage(inputData, width, height, channels, inputImage);
}
warmupIterNum = 0; // do not need warm
// Run inference
start = new Date();
ArrayList<OcrResultModel> results = paddlePredictor.runImage(inputData, width, height, channels, inputImage);
end = new Date();
inferenceTime = (end.getTime() - start.getTime()) / (float) inferIterNum;
results = postprocess(results);
Log.i(TAG, "[stat] Preprocess Time: " + preprocessTime
+ " ; Inference Time: " + inferenceTime + " ;Box Size " + results.size());
drawResults(results);
return true;
}
public List<OcrResult> runOcr(Bitmap inputImage) {
if (inputImage == null || !isLoaded()) {
return Collections.emptyList();
@ -322,7 +262,7 @@ public class Predictor {
Log.i(TAG, "[stat] Preprocess Time: " + preprocessTime
+ " ; Inference Time: " + inferenceTime + " ;Box Size " + results.size());
List<OcrResult> ocrResults = new ArrayList<>();
for (OcrResultModel resultModel:results) {
for (OcrResultModel resultModel : results) {
ocrResults.add(new OcrResult(resultModel));
}
return ocrResults;
@ -357,14 +297,6 @@ public class Predictor {
return inputImage;
}
public Bitmap outputImage() {
return outputImage;
}
public String outputResult() {
return outputResult;
}
public float preprocessTime() {
return preprocessTime;
}
@ -373,6 +305,22 @@ public class Predictor {
return postprocessTime;
}
public String getDefaultLabelPath() {
return defaultLabelPath;
}
public String getDefaultModelPath() {
return defaultModelPath;
}
public String getDefaultModelPathSlim() {
return defaultModelPathSlim;
}
public boolean isUseSlim() {
return useSlim;
}
public void setInputImage(Bitmap image) {
if (image == null) {
return;
@ -382,7 +330,7 @@ public class Predictor {
private ArrayList<OcrResultModel> postprocess(ArrayList<OcrResultModel> results) {
for (OcrResultModel r : results) {
StringBuffer word = new StringBuffer();
StringBuilder word = new StringBuilder();
for (int index : r.getWordIndex()) {
if (index >= 0 && index < wordLabels.size()) {
word.append(wordLabels.get(index));
@ -396,166 +344,5 @@ public class Predictor {
return results;
}
private void drawResults(ArrayList<OcrResultModel> results) {
StringBuffer outputResultSb = new StringBuffer("");
for (int i = 0; i < results.size(); i++) {
OcrResultModel result = results.get(i);
StringBuilder sb = new StringBuilder("");
sb.append(result.getLabel());
sb.append(" ").append(result.getConfidence());
sb.append("; Points: ");
for (Point p : result.getPoints()) {
sb.append("(").append(p.x).append(",").append(p.y).append(") ");
}
Log.i(TAG, sb.toString()); // show LOG in Logcat panel
outputResultSb.append(i + 1).append(": ").append(result.getLabel()).append("\n");
}
outputResult = outputResultSb.toString();
outputImage = inputImage;
Canvas canvas = new Canvas(outputImage);
Paint paintFillAlpha = new Paint();
paintFillAlpha.setStyle(Paint.Style.FILL);
paintFillAlpha.setColor(Color.parseColor("#3B85F5"));
paintFillAlpha.setAlpha(50);
Paint paint = new Paint();
paint.setColor(Color.parseColor("#3B85F5"));
paint.setStrokeWidth(5);
paint.setStyle(Paint.Style.STROKE);
for (OcrResultModel result : results) {
Path path = new Path();
List<Point> points = result.getPoints();
path.moveTo(points.get(0).x, points.get(0).y);
for (int i = points.size() - 1; i >= 0; i--) {
Point p = points.get(i);
path.lineTo(p.x, p.y);
}
canvas.drawPath(path, paint);
canvas.drawPath(path, paintFillAlpha);
}
}
public boolean init(Context appCtx, boolean useSlim) {
if (!this.isLoaded || (this.useSlim != useSlim)) {
loadLabel(appCtx, "labels/ppocr_keys_v1.txt");
if (useSlim) {
loadModel(appCtx, "models/ocr_v2_for_cpu(slim)", 4, "LITE_POWER_HIGH");
} else {
loadModel(appCtx, "models/ocr_v2_for_cpu", 4, "LITE_POWER_HIGH");
}
}
this.isLoaded = true;
this.useSlim = useSlim;
Log.i(TAG, "isLoaded: " + this.isLoaded);
return this.isLoaded;
}
public List<OcrResult> transformData(List<OcrResultModel> OcrResultModelList) {
if (OcrResultModelList == null) {
return Collections.emptyList();
}
List<OcrResult> words_result = new ArrayList<>();
for (OcrResultModel model : OcrResultModelList) {
List<Point> pointList = model.getPoints();
if (pointList.isEmpty()) {
continue;
}
Point firstPoint = pointList.get(0);
int left = firstPoint.x;
int top = firstPoint.y;
int right = firstPoint.x;
int bottom = firstPoint.y;
for (Point p : pointList) {
if (p.x < left) {
left = p.x;
}
if (p.x > right) {
right = p.x;
}
if (p.y < top) {
top = p.y;
}
if (p.y > bottom) {
bottom = p.y;
}
}
OcrResult ocrResult = new OcrResult();
ocrResult.preprocessTime = preprocessTime;
ocrResult.inferenceTime = inferenceTime;
ocrResult.confidence = model.getConfidence();
ocrResult.words = model.getLabel().trim().replace("\r", "");
ocrResult.location = new OcrResult.RectLocation(left, top, Math.abs(right - left), Math.abs(bottom - top));
ocrResult.bounds = new Rect(left, top, right, bottom);
words_result.add(ocrResult);
}
return words_result;
}
public List<OcrResult> ocr(Bitmap inputImage, int cpuThreadNum) {
this.cpuThreadNum = cpuThreadNum;
if (inputImage == null) {
return Collections.emptyList();
}
// Pre-process image, and feed input tensor with pre-processed data
Bitmap scaleImage = Utils.resizeWithStep(inputImage, Long.valueOf(inputShape[2]).intValue(), 32);
Date start = new Date();
int channels = (int) inputShape[1];
int width = scaleImage.getWidth();
int height = scaleImage.getHeight();
float[] inputData = new float[channels * width * height];
if (channels == 3) {
int[] channelIdx = null;
if (inputColorFormat.equalsIgnoreCase("RGB")) {
channelIdx = new int[]{0, 1, 2};
} else if (inputColorFormat.equalsIgnoreCase("BGR")) {
channelIdx = new int[]{2, 1, 0};
} else {
Log.i(TAG, "Unknown color format " + inputColorFormat + ", only RGB and BGR color format is " +
"supported!");
return Collections.emptyList();
}
int[] channelStride = new int[]{width * height, width * height * 2};
int[] pixels = new int[width * height];
scaleImage.getPixels(pixels, 0, scaleImage.getWidth(), 0, 0, scaleImage.getWidth(), scaleImage.getHeight());
for (int i = 0; i < pixels.length; i++) {
int color = pixels[i];
float[] rgb = new float[]{(float) red(color) / 255.0f, (float) green(color) / 255.0f,
(float) blue(color) / 255.0f};
inputData[i] = (rgb[channelIdx[0]] - inputMean[0]) / inputStd[0];
inputData[i + channelStride[0]] = (rgb[channelIdx[1]] - inputMean[1]) / inputStd[1];
inputData[i + channelStride[1]] = (rgb[channelIdx[2]] - inputMean[2]) / inputStd[2];
}
} else if (channels == 1) {
int[] pixels = new int[width * height];
scaleImage.getPixels(pixels, 0, scaleImage.getWidth(), 0, 0, scaleImage.getWidth(), scaleImage.getHeight());
for (int i = 0; i < pixels.length; i++) {
int color = pixels[i];
float gray = (float) (red(color) + green(color) + blue(color)) / 3.0f / 255.0f;
inputData[i] = (gray - inputMean[0]) / inputStd[0];
}
} else {
Log.i(TAG, "Unsupported channel size " + Integer.toString(channels) + ", only channel 1 and 3 is " +
"supported!");
return Collections.emptyList();
}
float[] pixels = inputData;
Log.i(TAG, "pixels " + pixels[0] + " " + pixels[1] + " " + pixels[2] + " " + pixels[3]
+ " " + pixels[pixels.length / 2] + " " + pixels[pixels.length / 2 + 1] + " " + pixels[pixels.length - 2] + " " + pixels[pixels.length - 1]);
Date end = new Date();
preprocessTime = (float) (end.getTime() - start.getTime());
// Warm up
for (int i = 0; i < warmupIterNum; i++) {
paddlePredictor.runImage(inputData, width, height, channels, inputImage);
}
warmupIterNum = 0; // do not need warm
// Run inference
start = new Date();
ArrayList<OcrResultModel> results = paddlePredictor.runImage(inputData, width, height, channels, inputImage);
end = new Date();
inferenceTime = (float) (end.getTime() - start.getTime());
results = postprocess(results);
return transformData(results);
}
}

View File

@ -141,7 +141,7 @@ module.exports = function(runtime, global){
return buildTypes.release;
}
global.zips = Object.create(runtime.zips);
global.ocr = Object.create(runtime.ocr);
global.$zips = Object.create(runtime.zips);
global.$ocr = Object.create(runtime.ocr);
}

View File

@ -68,6 +68,12 @@ public class ImageWrapper {
return new ImageWrapper(mat);
}
public static ImageWrapper ofMat(org.opencv.core.Mat mat) {
if (mat == null) {
return null;
}
return new ImageWrapper(new Mat(mat.clone().getNativeObjAddr()));
}
public static ImageWrapper ofBitmap(Bitmap bitmap) {
if (bitmap == null) {

View File

@ -13,7 +13,7 @@ public class DeleteOnFinalizeFile {
public DeleteOnFinalizeFile(File file) {
fileObject = file;
}
/*
@Override
protected void finalize() throws Throwable {
super.finalize();
@ -23,5 +23,4 @@ public class DeleteOnFinalizeFile {
fileObject.delete();
}
}
*/
}

View File

@ -452,6 +452,7 @@ public class ScriptRuntime {
sensors = null;
ignoresException(timers::recycle);
ignoresException(ui::recycle);
ignoresException(ocr::release);
ignoresException(() -> mTopLevelScope.get().markReleased(engines.myEngine().getSource().toString()));
}

View File

@ -8,22 +8,26 @@ import com.baidu.paddle.lite.ocr.OcrResult;
import com.baidu.paddle.lite.ocr.Predictor;
import com.stardust.app.GlobalAppContext;
import com.stardust.autojs.core.image.ImageWrapper;
import com.stardust.concurrent.VolatileDispose;
import java.util.Collections;
import java.util.List;
public class OCR {
private Predictor mPredictor = new Predictor();
private final Predictor mPredictor = new Predictor();
public synchronized boolean init(boolean useSlim) {
if (!mPredictor.isLoaded) {
if (!mPredictor.isLoaded || useSlim != mPredictor.isUseSlim()) {
if (Looper.getMainLooper() == Looper.myLooper()) {
VolatileDispose<Boolean> result = new VolatileDispose<>();
new Thread(() -> {
mPredictor.init(GlobalAppContext.get(), useSlim);
result.setAndNotify(mPredictor.init(GlobalAppContext.get(), useSlim));
}).start();
return result.blockedGet();
} else {
mPredictor.init(GlobalAppContext.get(), useSlim);
return mPredictor.init(GlobalAppContext.get(), useSlim);
}
}
return mPredictor.isLoaded;
@ -33,7 +37,7 @@ public class OCR {
mPredictor.releaseModel();
}
public List<OcrResult> R(ImageWrapper image, int cpuThreadNum, boolean useSlim) {
public List<OcrResult> detect(ImageWrapper image, int cpuThreadNum, boolean useSlim) {
if (image == null) {
return Collections.emptyList();
}
@ -41,34 +45,38 @@ public class OCR {
if (bitmap == null || bitmap.isRecycled()) {
return Collections.emptyList();
}
if (mPredictor.cpuThreadNum != cpuThreadNum) {
mPredictor.releaseModel();
mPredictor.cpuThreadNum = cpuThreadNum;
}
init(useSlim);
return mPredictor.ocr(bitmap, cpuThreadNum);
return mPredictor.runOcr(bitmap);
}
public List<OcrResult> R(ImageWrapper image, int cpuThreadNum) {
return R(image, cpuThreadNum, true);
public List<OcrResult> detect(ImageWrapper image, int cpuThreadNum) {
return detect(image, cpuThreadNum, true);
}
public List<OcrResult> R(ImageWrapper image) {
return R(image, 4, true);
public List<OcrResult> detect(ImageWrapper image) {
return detect(image, 4, true);
}
public String[] T(ImageWrapper image, int cpuThreadNum, boolean useSlim) {
List<OcrResult> words_result = R(image, cpuThreadNum, useSlim);
public String[] recognizeText(ImageWrapper image, int cpuThreadNum, boolean useSlim) {
List<OcrResult> words_result = detect(image, cpuThreadNum, useSlim);
String[] outputResult = new String[words_result.size()];
for (int i = 0; i < words_result.size(); i++) {
outputResult[i] = words_result.get(i).words;
Log.i("outputResult", outputResult[i].toString()); // show LOG in Logcat panel
outputResult[i] = words_result.get(i).getLabel();
Log.i("outputResult", outputResult[i]); // show LOG in Logcat panel
}
return outputResult;
}
public String[] T(ImageWrapper image, int cpuThreadNum) {
return T(image, cpuThreadNum, true);
public String[] recognizeText(ImageWrapper image, int cpuThreadNum) {
return recognizeText(image, cpuThreadNum, true);
}
public String[] T(ImageWrapper image) {
return T(image, 4, true);
public String[] recognizeText(ImageWrapper image) {
return recognizeText(image, 4, true);
}
}

View File

@ -14,7 +14,7 @@ public class SevenZip {
}
}
public int A(String type, String destFilePath, String srcPath) {
public int addFiles(String type, String destFilePath, String srcPath) {
String typeOption = "";
if (!type.trim().isEmpty()) {
typeOption = " -t" + type.trim();
@ -32,7 +32,7 @@ public class SevenZip {
}
}
public int A(String type, String destFilePath, String srcPath, String password) {
public int addFiles(String type, String destFilePath, String srcPath, String password) {
String typeOption = "";
if (!type.trim().isEmpty()) {
typeOption = " -t" + type.trim();
@ -50,7 +50,7 @@ public class SevenZip {
}
}
public int X(String filePath0, String dirPath1) {
public int extraFiles(String filePath0, String dirPath1) {
String cmdStr = "7z x -y -aos " + filePath0;
if (PFiles.isFile(filePath0)) {
if (PFiles.isDir(dirPath1)) {
@ -66,10 +66,10 @@ public class SevenZip {
}
}
public int X(String filePath0, String dirPath1, String password) {
public int extraFiles(String filePath0, String dirPath1, String password) {
String cmdStr = "7z x -y -aos " + filePath0 + "";
if (password == "") {
X(filePath0, dirPath1);
if (password.equals("")) {
extraFiles(filePath0, dirPath1);
} else {
if (PFiles.isFile(filePath0)) {
if (PFiles.isDir(dirPath1)) {