Skip to content

Commit

Permalink
feat(capture-sdk): Add MLKit text recognizer and a fake IBAN recognizer
Browse files Browse the repository at this point in the history
The IBAN recognizer is fake to get the text recognizer working first. If text is recognized, then
a hardcoded IBAN is returned.

IBAN recognition is only started if the entry point is set to FIELD.
  • Loading branch information
a-szotyori committed Sep 14, 2023
1 parent e0c732b commit 6ad6d57
Show file tree
Hide file tree
Showing 8 changed files with 519 additions and 9 deletions.
1 change: 1 addition & 0 deletions capture-sdk/sdk/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ dependencies {
implementation(libs.material)
implementation(libs.androidx.cardview)
implementation(libs.mlkit.barcodescanning)
implementation(libs.mlkit.textrecognition)
implementation(libs.apachecommons.imaging)
implementation(libs.completableFuture)

Expand Down
2 changes: 1 addition & 1 deletion capture-sdk/sdk/src/main/AndroidManifest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
android:value="@integer/google_play_services_version" />
<meta-data
android:name="com.google.mlkit.vision.DEPENDENCIES"
android:value="barcode" />
android:value="barcode,ocr" />
</application>

</manifest>
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import net.gini.android.capture.AsyncCallback;
import net.gini.android.capture.Document;
import net.gini.android.capture.DocumentImportEnabledFileTypes;
import net.gini.android.capture.EntryPoint;
import net.gini.android.capture.GiniCapture;
import net.gini.android.capture.GiniCaptureError;
import net.gini.android.capture.ImportImageFileUrisAsyncTask;
Expand Down Expand Up @@ -51,11 +52,13 @@
import net.gini.android.capture.internal.network.AnalysisNetworkRequestResult;
import net.gini.android.capture.internal.network.FailureException;
import net.gini.android.capture.internal.network.NetworkRequestsManager;
import net.gini.android.capture.internal.textrecognition.IBANRecognizer;
import net.gini.android.capture.internal.qrcode.PaymentQRCodeData;
import net.gini.android.capture.internal.qrcode.PaymentQRCodeReader;
import net.gini.android.capture.internal.qrcode.QRCodeDetectorTask;
import net.gini.android.capture.internal.qrcode.QRCodeDetectorTaskMLKit;
import net.gini.android.capture.internal.storage.ImageDiskStore;
import net.gini.android.capture.internal.textrecognition.MLKitTextRecognizer;
import net.gini.android.capture.internal.ui.ClickListenerExtKt;
import net.gini.android.capture.internal.ui.FragmentImplCallback;
import net.gini.android.capture.internal.ui.IntervalClickListener;
Expand Down Expand Up @@ -92,6 +95,7 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.atomic.AtomicInteger;

import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
Expand Down Expand Up @@ -222,6 +226,8 @@ public void noExtractionsFromQRCode(QRCodeDocument qrCodeDocument) {
private InjectedViewContainer<CustomLoadingIndicatorAdapter> mLoadingIndicator;
private InjectedViewContainer<CameraNavigationBarBottomAdapter> mBottomInjectedContainer;

private IBANRecognizer ibanRecognizer;

CameraFragmentImpl(@NonNull final FragmentImplCallback fragment) {
mFragment = fragment;
}
Expand Down Expand Up @@ -393,6 +399,9 @@ public void onStart() {
if (isQRCodeScanningEnabled()) {
initQRCodeReader();
}
if (GiniCapture.hasInstance() && GiniCapture.getInstance().getEntryPoint() == EntryPoint.FIELD) {
initIBANRecognizer();
}

if (isCameraPermissionGranted()) {
openCamera().thenAccept(unused -> {
Expand Down Expand Up @@ -509,6 +518,13 @@ public void onInterrupted() {
});
}

private void initIBANRecognizer() {
if (ibanRecognizer != null) {
return;
}
ibanRecognizer = new IBANRecognizer(MLKitTextRecognizer.newInstance());
}

private void enableTapToFocus() {
mCameraController.enableTapToFocus(new CameraInterface.TapToFocusListener() {
@Override
Expand Down Expand Up @@ -622,6 +638,10 @@ private void closeCamera() {
mPaymentQRCodeReader.release();
mPaymentQRCodeReader = null; // NOPMD
}
if (ibanRecognizer != null) {
ibanRecognizer.close();
ibanRecognizer = null; // NOPMD
}
mCameraController.disableTapToFocus();
mCameraController.setPreviewCallback(null);
mCameraController.stopPreview();
Expand Down Expand Up @@ -1104,7 +1124,7 @@ public void documentAccepted() {
} else {
if (document.isReviewable()) {
if (document.getType() == Document.Type.IMAGE &&
document instanceof ImageDocument) {
document instanceof ImageDocument) {
final ImageMultiPageDocument multiPageDocument = new ImageMultiPageDocument(
document.getSource(), document.getImportMethod());
addToMultiPageDocumentMemoryStore(multiPageDocument);
Expand Down Expand Up @@ -1694,18 +1714,56 @@ void initCameraController(final Activity activity) {
mCameraController.setPreviewCallback(new CameraInterface.PreviewCallback() {
@Override
public void onPreviewFrame(@NonNull Image image, @NonNull Size imageSize, int rotation, @NonNull CameraInterface.PreviewFrameCallback previewFrameCallback) {
if (mPaymentQRCodeReader == null) {
return;
AtomicInteger previewFrameReferenceCount = new AtomicInteger();
if (ibanRecognizer != null) {
try {
previewFrameReferenceCount.getAndIncrement();

ibanRecognizer.processImage(image, imageSize.width, imageSize.height, rotation, (text) -> {
LOG.debug("IBAN recognized: {}", text);
previewFrameReferenceCount.getAndDecrement();
if (previewFrameReferenceCount.get() == 0) {
previewFrameCallback.onReleaseFrame();
}

return Unit.INSTANCE;
});
} catch (Exception e) {
LOG.error("Failed to process image for IBAN recognition", e);
previewFrameReferenceCount.getAndDecrement();
if (previewFrameReferenceCount.get() == 0) {
previewFrameCallback.onReleaseFrame();
}
}
}

if (mPaymentQRCodeReader != null) {
previewFrameReferenceCount.getAndIncrement();

mPaymentQRCodeReader.readFromImage(image, imageSize, rotation, () -> {
previewFrameReferenceCount.getAndDecrement();
if (previewFrameReferenceCount.get() == 0) {
previewFrameCallback.onReleaseFrame();
}
});
}
mPaymentQRCodeReader.readFromImage(image, imageSize, rotation, previewFrameCallback::onReleaseFrame);
}

@Override
public void onPreviewFrame(@NonNull byte[] image, @NonNull Size imageSize, int rotation) {
if (mPaymentQRCodeReader == null) {
return;
if (mPaymentQRCodeReader != null) {
mPaymentQRCodeReader.readFromByteArray(image, imageSize, rotation);
}
if (ibanRecognizer != null) {
try {
ibanRecognizer.processByteArray(image, imageSize.width, imageSize.height, rotation, (text) -> {
LOG.debug("IBAN recognized: {}", text);
return Unit.INSTANCE;
});
} catch (Exception e) {
LOG.error("Failed to process image for IBAN recognition", e);
}
}
mPaymentQRCodeReader.readFromByteArray(image, imageSize, rotation);
}
});
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package net.gini.android.capture.internal.textrecognition

import android.media.Image
import kotlin.jvm.Throws

/**
* Use this class to recognize IBANs in images.
*
* @param textRecognizer a [TextRecognizer] implementation
*/
internal class IBANRecognizer(private val textRecognizer: TextRecognizer) {

/**
* Processes the given [Image] and returns the recognized IBAN in the callback.
*
* @param image the image to process
* @param width the width of the image
* @param height the height of the image
* @param rotationDegrees the rotation of the image
* @param doneCallback the callback which will receive the recognized IBAN or null if no IBAN was found
*/
@Throws(IllegalArgumentException::class)
fun processImage(image: Image, width: Int, height: Int, rotationDegrees: Int, doneCallback: (String?) -> Unit) {
if (width == 0) {
throw IllegalArgumentException("Image width is 0")
}
if (height == 0) {
throw IllegalArgumentException("Image height is 0")
}
textRecognizer.processImage(image, width, height, rotationDegrees) { recognizedText ->
val withoutWhitespace = recognizedText?.replace("\\s".toRegex(), "")
// TODO: Replace with IBAN recognition logic
val result = if (!withoutWhitespace.isNullOrEmpty()) {
"DE78500105172594181438"
} else {
null
}
doneCallback(result)
}
}

/**
* Processes the given image byte array and returns the recognized IBAN in the callback.
*
* @param byteArray the image byte array to process
* @param width the width of the image
* @param height the height of the image
* @param rotationDegrees the rotation of the image
* @param doneCallback the callback which will receive the recognized IBAN or null if no IBAN was found
*/
@Throws(IllegalArgumentException::class)
fun processByteArray(byteArray: ByteArray, width: Int, height: Int, rotationDegrees: Int, doneCallback: (String?) -> Unit) {
if (width == 0) {
throw IllegalArgumentException("Image width is 0")
}
if (height == 0) {
throw IllegalArgumentException("Image height is 0")
}
textRecognizer.processByteArray(byteArray, width, height, rotationDegrees) { recognizedText ->
val withoutWhitespace = recognizedText?.replace("\\s".toRegex(), "")
val result = if (!withoutWhitespace.isNullOrEmpty()) {
"DE78500105172594181438"
} else {
null
}
doneCallback(result)
}
}

/**
* Closes the IBAN recognizer.
*
* **IMPORTANT**: You must call this method when you are done with the IBAN recognizer.
*/
fun close() {
textRecognizer.close()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
package net.gini.android.capture.internal.textrecognition

import android.media.Image
import com.google.android.gms.tasks.Task
import com.google.mlkit.vision.common.InputImage
import com.google.mlkit.vision.text.Text
import com.google.mlkit.vision.text.TextRecognition
import com.google.mlkit.vision.text.latin.TextRecognizerOptions
import org.slf4j.Logger
import org.slf4j.LoggerFactory

/**
* Use this class to recognize text in images via the ML Kit Text Recognition API.
*/
internal class MLKitTextRecognizer(private val recognizer: com.google.mlkit.vision.text.TextRecognizer) :
TextRecognizer {

private var processingTask: Task<Text>? = null

/**
* Processes the given [Image] and returns the recognized text in the callback.
*
* **IMPORTANT**: If an image is already processing then `doneCallback` will be called with null and the new image
* will not be processed.
*
* @param image the image to process
* @param width the width of the image
* @param height the height of the image
* @param rotationDegrees the rotation of the image
* @param doneCallback the callback which will receive the recognized text or null if no text was found
*/
override fun processImage(
image: Image,
width: Int,
height: Int,
rotationDegrees: Int,
doneCallback: (String?) -> Unit
) {
if (processingTask != null) {
LOG.warn("Text recognizer is already processing an image")
doneCallback(null)
return
}

processingTask = recognizer.process(InputImage.fromMediaImage(image, rotationDegrees))
handleProcessingTask(doneCallback)
}

/**
* Processes the given image byte array and returns the recognized text in the callback.
*
* **IMPORTANT**: If an image is already processing then `doneCallback` will be called with null and the new image
* will not be processed.
*
* @param byteArray the image byte array to process
* @param width the width of the image
* @param height the height of the image
* @param rotationDegrees the rotation of the image
* @param doneCallback the callback which will receive the recognized text or null if no text was found
*/
override fun processByteArray(
byteArray: ByteArray,
width: Int,
height: Int,
rotationDegrees: Int,
doneCallback: (String?) -> Unit
) {
if (processingTask != null) {
if (DEBUG) {
LOG.warn("Text recognizer is already processing an image")
}
doneCallback(null)
return
}

processingTask = recognizer.process(
InputImage.fromByteArray(
byteArray,
width,
height,
rotationDegrees,
InputImage.IMAGE_FORMAT_NV21
)
)
handleProcessingTask(doneCallback)
}

override fun close() {
recognizer.close()
}

private fun handleProcessingTask(doneCallback: (String?) -> Unit) {
processingTask
?.addOnSuccessListener { result ->
if (DEBUG) {
LOG.debug("Text recognizer success: {}", result.text)
}
doneCallback(result.text)
}
?.addOnFailureListener { e ->
if (DEBUG) {
LOG.error("Text recognizer failed", e)
}
doneCallback(null)
}
?.addOnCompleteListener {
processingTask = null
}
}

companion object {
const val DEBUG = false
val LOG: Logger = LoggerFactory.getLogger(MLKitTextRecognizer::class.java)

@JvmStatic
fun newInstance() = MLKitTextRecognizer(TextRecognition.getClient(TextRecognizerOptions.DEFAULT_OPTIONS))
}
}
Loading

0 comments on commit 6ad6d57

Please sign in to comment.