diff --git a/capture-sdk/sdk/build.gradle.kts b/capture-sdk/sdk/build.gradle.kts index 8febd63c3c..d991614201 100644 --- a/capture-sdk/sdk/build.gradle.kts +++ b/capture-sdk/sdk/build.gradle.kts @@ -122,6 +122,7 @@ dependencies { implementation(libs.material) implementation(libs.androidx.cardview) implementation(libs.mlkit.barcodescanning) + implementation(libs.mlkit.textrecognition) implementation(libs.apachecommons.imaging) implementation(libs.completableFuture) diff --git a/capture-sdk/sdk/src/main/AndroidManifest.xml b/capture-sdk/sdk/src/main/AndroidManifest.xml index e04993d093..5292a3fe79 100644 --- a/capture-sdk/sdk/src/main/AndroidManifest.xml +++ b/capture-sdk/sdk/src/main/AndroidManifest.xml @@ -70,7 +70,7 @@ android:value="@integer/google_play_services_version" /> + android:value="barcode,ocr" /> \ No newline at end of file diff --git a/capture-sdk/sdk/src/main/java/net/gini/android/capture/camera/CameraFragmentImpl.java b/capture-sdk/sdk/src/main/java/net/gini/android/capture/camera/CameraFragmentImpl.java index 4f63ec07ce..c742754e3d 100644 --- a/capture-sdk/sdk/src/main/java/net/gini/android/capture/camera/CameraFragmentImpl.java +++ b/capture-sdk/sdk/src/main/java/net/gini/android/capture/camera/CameraFragmentImpl.java @@ -24,6 +24,7 @@ import net.gini.android.capture.AsyncCallback; import net.gini.android.capture.Document; import net.gini.android.capture.DocumentImportEnabledFileTypes; +import net.gini.android.capture.EntryPoint; import net.gini.android.capture.GiniCapture; import net.gini.android.capture.GiniCaptureError; import net.gini.android.capture.ImportImageFileUrisAsyncTask; @@ -51,11 +52,13 @@ import net.gini.android.capture.internal.network.AnalysisNetworkRequestResult; import net.gini.android.capture.internal.network.FailureException; import net.gini.android.capture.internal.network.NetworkRequestsManager; +import net.gini.android.capture.internal.textrecognition.IBANRecognizer; import net.gini.android.capture.internal.qrcode.PaymentQRCodeData; import net.gini.android.capture.internal.qrcode.PaymentQRCodeReader; import net.gini.android.capture.internal.qrcode.QRCodeDetectorTask; import net.gini.android.capture.internal.qrcode.QRCodeDetectorTaskMLKit; import net.gini.android.capture.internal.storage.ImageDiskStore; +import net.gini.android.capture.internal.textrecognition.MLKitTextRecognizer; import net.gini.android.capture.internal.ui.ClickListenerExtKt; import net.gini.android.capture.internal.ui.FragmentImplCallback; import net.gini.android.capture.internal.ui.IntervalClickListener; @@ -92,6 +95,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.concurrent.atomic.AtomicInteger; import androidx.annotation.NonNull; import androidx.annotation.Nullable; @@ -222,6 +226,8 @@ public void noExtractionsFromQRCode(QRCodeDocument qrCodeDocument) { private InjectedViewContainer mLoadingIndicator; private InjectedViewContainer mBottomInjectedContainer; + private IBANRecognizer ibanRecognizer; + CameraFragmentImpl(@NonNull final FragmentImplCallback fragment) { mFragment = fragment; } @@ -393,6 +399,9 @@ public void onStart() { if (isQRCodeScanningEnabled()) { initQRCodeReader(); } + if (GiniCapture.hasInstance() && GiniCapture.getInstance().getEntryPoint() == EntryPoint.FIELD) { + initIBANRecognizer(); + } if (isCameraPermissionGranted()) { openCamera().thenAccept(unused -> { @@ -509,6 +518,13 @@ public void onInterrupted() { }); } + private void initIBANRecognizer() { + if (ibanRecognizer != null) { + return; + } + ibanRecognizer = new IBANRecognizer(MLKitTextRecognizer.newInstance()); + } + private void enableTapToFocus() { mCameraController.enableTapToFocus(new CameraInterface.TapToFocusListener() { @Override @@ -622,6 +638,10 @@ private void closeCamera() { mPaymentQRCodeReader.release(); mPaymentQRCodeReader = null; // NOPMD } + if (ibanRecognizer != null) { + ibanRecognizer.close(); + ibanRecognizer = null; // NOPMD + } mCameraController.disableTapToFocus(); mCameraController.setPreviewCallback(null); mCameraController.stopPreview(); @@ -1104,7 +1124,7 @@ public void documentAccepted() { } else { if (document.isReviewable()) { if (document.getType() == Document.Type.IMAGE && - document instanceof ImageDocument) { + document instanceof ImageDocument) { final ImageMultiPageDocument multiPageDocument = new ImageMultiPageDocument( document.getSource(), document.getImportMethod()); addToMultiPageDocumentMemoryStore(multiPageDocument); @@ -1694,18 +1714,56 @@ void initCameraController(final Activity activity) { mCameraController.setPreviewCallback(new CameraInterface.PreviewCallback() { @Override public void onPreviewFrame(@NonNull Image image, @NonNull Size imageSize, int rotation, @NonNull CameraInterface.PreviewFrameCallback previewFrameCallback) { - if (mPaymentQRCodeReader == null) { - return; + AtomicInteger previewFrameReferenceCount = new AtomicInteger(); + if (ibanRecognizer != null) { + try { + previewFrameReferenceCount.getAndIncrement(); + + ibanRecognizer.processImage(image, imageSize.width, imageSize.height, rotation, (text) -> { + LOG.debug("IBAN recognized: {}", text); + previewFrameReferenceCount.getAndDecrement(); + if (previewFrameReferenceCount.get() == 0) { + previewFrameCallback.onReleaseFrame(); + } + + return Unit.INSTANCE; + }); + } catch (Exception e) { + LOG.error("Failed to process image for IBAN recognition", e); + previewFrameReferenceCount.getAndDecrement(); + if (previewFrameReferenceCount.get() == 0) { + previewFrameCallback.onReleaseFrame(); + } + } + } + + if (mPaymentQRCodeReader != null) { + previewFrameReferenceCount.getAndIncrement(); + + mPaymentQRCodeReader.readFromImage(image, imageSize, rotation, () -> { + previewFrameReferenceCount.getAndDecrement(); + if (previewFrameReferenceCount.get() == 0) { + previewFrameCallback.onReleaseFrame(); + } + }); } - mPaymentQRCodeReader.readFromImage(image, imageSize, rotation, previewFrameCallback::onReleaseFrame); } @Override public void onPreviewFrame(@NonNull byte[] image, @NonNull Size imageSize, int rotation) { - if (mPaymentQRCodeReader == null) { - return; + if (mPaymentQRCodeReader != null) { + mPaymentQRCodeReader.readFromByteArray(image, imageSize, rotation); + } + if (ibanRecognizer != null) { + try { + ibanRecognizer.processByteArray(image, imageSize.width, imageSize.height, rotation, (text) -> { + LOG.debug("IBAN recognized: {}", text); + return Unit.INSTANCE; + }); + } catch (Exception e) { + LOG.error("Failed to process image for IBAN recognition", e); + } } - mPaymentQRCodeReader.readFromByteArray(image, imageSize, rotation); } }); } diff --git a/capture-sdk/sdk/src/main/java/net/gini/android/capture/internal/textrecognition/IBANRecognizer.kt b/capture-sdk/sdk/src/main/java/net/gini/android/capture/internal/textrecognition/IBANRecognizer.kt new file mode 100644 index 0000000000..cf360161d5 --- /dev/null +++ b/capture-sdk/sdk/src/main/java/net/gini/android/capture/internal/textrecognition/IBANRecognizer.kt @@ -0,0 +1,78 @@ +package net.gini.android.capture.internal.textrecognition + +import android.media.Image +import kotlin.jvm.Throws + +/** + * Use this class to recognize IBANs in images. + * + * @param textRecognizer a [TextRecognizer] implementation + */ +internal class IBANRecognizer(private val textRecognizer: TextRecognizer) { + + /** + * Processes the given [Image] and returns the recognized IBAN in the callback. + * + * @param image the image to process + * @param width the width of the image + * @param height the height of the image + * @param rotationDegrees the rotation of the image + * @param doneCallback the callback which will receive the recognized IBAN or null if no IBAN was found + */ + @Throws(IllegalArgumentException::class) + fun processImage(image: Image, width: Int, height: Int, rotationDegrees: Int, doneCallback: (String?) -> Unit) { + if (width == 0) { + throw IllegalArgumentException("Image width is 0") + } + if (height == 0) { + throw IllegalArgumentException("Image height is 0") + } + textRecognizer.processImage(image, width, height, rotationDegrees) { recognizedText -> + val withoutWhitespace = recognizedText?.replace("\\s".toRegex(), "") + // TODO: Replace with IBAN recognition logic + val result = if (!withoutWhitespace.isNullOrEmpty()) { + "DE78500105172594181438" + } else { + null + } + doneCallback(result) + } + } + + /** + * Processes the given image byte array and returns the recognized IBAN in the callback. + * + * @param byteArray the image byte array to process + * @param width the width of the image + * @param height the height of the image + * @param rotationDegrees the rotation of the image + * @param doneCallback the callback which will receive the recognized IBAN or null if no IBAN was found + */ + @Throws(IllegalArgumentException::class) + fun processByteArray(byteArray: ByteArray, width: Int, height: Int, rotationDegrees: Int, doneCallback: (String?) -> Unit) { + if (width == 0) { + throw IllegalArgumentException("Image width is 0") + } + if (height == 0) { + throw IllegalArgumentException("Image height is 0") + } + textRecognizer.processByteArray(byteArray, width, height, rotationDegrees) { recognizedText -> + val withoutWhitespace = recognizedText?.replace("\\s".toRegex(), "") + val result = if (!withoutWhitespace.isNullOrEmpty()) { + "DE78500105172594181438" + } else { + null + } + doneCallback(result) + } + } + + /** + * Closes the IBAN recognizer. + * + * **IMPORTANT**: You must call this method when you are done with the IBAN recognizer. + */ + fun close() { + textRecognizer.close() + } +} diff --git a/capture-sdk/sdk/src/main/java/net/gini/android/capture/internal/textrecognition/MLKitTextRecognizer.kt b/capture-sdk/sdk/src/main/java/net/gini/android/capture/internal/textrecognition/MLKitTextRecognizer.kt new file mode 100644 index 0000000000..dda172c834 --- /dev/null +++ b/capture-sdk/sdk/src/main/java/net/gini/android/capture/internal/textrecognition/MLKitTextRecognizer.kt @@ -0,0 +1,118 @@ +package net.gini.android.capture.internal.textrecognition + +import android.media.Image +import com.google.android.gms.tasks.Task +import com.google.mlkit.vision.common.InputImage +import com.google.mlkit.vision.text.Text +import com.google.mlkit.vision.text.TextRecognition +import com.google.mlkit.vision.text.latin.TextRecognizerOptions +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +/** + * Use this class to recognize text in images via the ML Kit Text Recognition API. + */ +internal class MLKitTextRecognizer(private val recognizer: com.google.mlkit.vision.text.TextRecognizer) : + TextRecognizer { + + private var processingTask: Task? = null + + /** + * Processes the given [Image] and returns the recognized text in the callback. + * + * **IMPORTANT**: If an image is already processing then `doneCallback` will be called with null and the new image + * will not be processed. + * + * @param image the image to process + * @param width the width of the image + * @param height the height of the image + * @param rotationDegrees the rotation of the image + * @param doneCallback the callback which will receive the recognized text or null if no text was found + */ + override fun processImage( + image: Image, + width: Int, + height: Int, + rotationDegrees: Int, + doneCallback: (String?) -> Unit + ) { + if (processingTask != null) { + LOG.warn("Text recognizer is already processing an image") + doneCallback(null) + return + } + + processingTask = recognizer.process(InputImage.fromMediaImage(image, rotationDegrees)) + handleProcessingTask(doneCallback) + } + + /** + * Processes the given image byte array and returns the recognized text in the callback. + * + * **IMPORTANT**: If an image is already processing then `doneCallback` will be called with null and the new image + * will not be processed. + * + * @param byteArray the image byte array to process + * @param width the width of the image + * @param height the height of the image + * @param rotationDegrees the rotation of the image + * @param doneCallback the callback which will receive the recognized text or null if no text was found + */ + override fun processByteArray( + byteArray: ByteArray, + width: Int, + height: Int, + rotationDegrees: Int, + doneCallback: (String?) -> Unit + ) { + if (processingTask != null) { + if (DEBUG) { + LOG.warn("Text recognizer is already processing an image") + } + doneCallback(null) + return + } + + processingTask = recognizer.process( + InputImage.fromByteArray( + byteArray, + width, + height, + rotationDegrees, + InputImage.IMAGE_FORMAT_NV21 + ) + ) + handleProcessingTask(doneCallback) + } + + override fun close() { + recognizer.close() + } + + private fun handleProcessingTask(doneCallback: (String?) -> Unit) { + processingTask + ?.addOnSuccessListener { result -> + if (DEBUG) { + LOG.debug("Text recognizer success: {}", result.text) + } + doneCallback(result.text) + } + ?.addOnFailureListener { e -> + if (DEBUG) { + LOG.error("Text recognizer failed", e) + } + doneCallback(null) + } + ?.addOnCompleteListener { + processingTask = null + } + } + + companion object { + const val DEBUG = false + val LOG: Logger = LoggerFactory.getLogger(MLKitTextRecognizer::class.java) + + @JvmStatic + fun newInstance() = MLKitTextRecognizer(TextRecognition.getClient(TextRecognizerOptions.DEFAULT_OPTIONS)) + } +} diff --git a/capture-sdk/sdk/src/main/java/net/gini/android/capture/internal/textrecognition/TextRecognizer.kt b/capture-sdk/sdk/src/main/java/net/gini/android/capture/internal/textrecognition/TextRecognizer.kt new file mode 100644 index 0000000000..eb8b3505b9 --- /dev/null +++ b/capture-sdk/sdk/src/main/java/net/gini/android/capture/internal/textrecognition/TextRecognizer.kt @@ -0,0 +1,48 @@ +package net.gini.android.capture.internal.textrecognition + +import android.media.Image + +/** + * Interface for recognizing text in images. + */ +internal interface TextRecognizer { + + /** + * Processes the given [Image] and returns the recognized text in the callback. + * + * @param image the image to process + * @param width the width of the image + * @param height the height of the image + * @param rotationDegrees the rotation of the image + * @param doneCallback the callback which will receive the recognized text or null if no text was found + */ + fun processImage(image: Image, + width: Int, + height: Int, + rotationDegrees: Int, + doneCallback: (String?) -> Unit) + + /** + * Processes the given image byte array and returns the recognized text in the callback. + * + * @param byteArray the image byte array to process + * @param width the width of the image + * @param height the height of the image + * @param rotationDegrees the rotation of the image + * @param doneCallback the callback which will receive the recognized text or null if no text was found + */ + fun processByteArray( + byteArray: ByteArray, + width: Int, + height: Int, + rotationDegrees: Int, + doneCallback: (String?) -> Unit + ) + + /** + * Closes the text recognizer. + * + * **IMPORTANT**: You must call this method when you are done with the text recognizer. + */ + fun close() +} \ No newline at end of file diff --git a/capture-sdk/sdk/src/test/java/net/gini/android/capture/internal/textrecognition/IBANRecognizerTest.kt b/capture-sdk/sdk/src/test/java/net/gini/android/capture/internal/textrecognition/IBANRecognizerTest.kt new file mode 100644 index 0000000000..ac7488ec19 --- /dev/null +++ b/capture-sdk/sdk/src/test/java/net/gini/android/capture/internal/textrecognition/IBANRecognizerTest.kt @@ -0,0 +1,206 @@ +package net.gini.android.capture.internal.textrecognition + +import android.media.Image +import com.google.common.truth.Truth.assertThat +import com.nhaarman.mockitokotlin2.mock +import junitparams.JUnitParamsRunner +import junitparams.Parameters +import org.junit.Before +import org.junit.Test +import org.junit.runner.RunWith + +@RunWith(JUnitParamsRunner::class) +class IBANRecognizerTest { + + private lateinit var ibanRecognizer: IBANRecognizer + + @Before + fun setup() { + ibanRecognizer = IBANRecognizer(TextRecognizerDummy()) + } + + @Test + fun `returns null when no IBAN found in image byte array`() { + // Given + val byteArray = ByteArray(100) + ibanRecognizer = IBANRecognizer(TextRecognizerStub("")) + + // When + ibanRecognizer.processByteArray(byteArray, 200, 300, 0) { iban -> + // Then + assertThat(iban).isNull() + } + } + + @Test + fun `returns null when no IBAN found in image`() { + // Given + val image: Image = mock() + ibanRecognizer = IBANRecognizer(TextRecognizerStub("")) + + // When + ibanRecognizer.processImage(image, 200, 300, 0) { iban -> + // Then + assertThat(iban).isNull() + } + } + + @Test + fun `throws IllegalArgumentException if image width is 0 in byte array`() { + // Given + val byteArray = ByteArray(0) + + // When + var exception: Exception? = null + try { + ibanRecognizer.processByteArray(byteArray, 0, 300, 0) { } + } catch (e: Exception) { + exception = e + } + + // Then + assertThat(exception).isInstanceOf(IllegalArgumentException::class.java) + } + + @Test + fun `throws IllegalArgumentException if image width is 0`() { + // Given + val image: Image = mock() + + // When + var exception: Exception? = null + try { + ibanRecognizer.processImage(image, 0, 300, 0) { } + } catch (e: Exception) { + exception = e + } + + // Then + assertThat(exception).isInstanceOf(IllegalArgumentException::class.java) + } + + @Test + fun `throws IllegalArgumentException if image height is 0 in byte array`() { + // Given + val byteArray = ByteArray(0) + + // When + var exception: Exception? = null + try { + val iban = ibanRecognizer.processByteArray(byteArray, 200, 0, 0) { } + } catch (e: Exception) { + exception = e + } + + // Then + assertThat(exception).isInstanceOf(IllegalArgumentException::class.java) + } + + @Test + fun `throws IllegalArgumentException if image height is 0`() { + // Given + val image: Image = mock() + + // When + var exception: Exception? = null + try { + val iban = ibanRecognizer.processImage(image, 200, 0, 0) { } + } catch (e: Exception) { + exception = e + } + + // Then + assertThat(exception).isInstanceOf(IllegalArgumentException::class.java) + } + + @Test + @Parameters(method = "recognizeIBANinImageValues") + @Suppress("JUnitMalformedDeclaration") + fun `recognize IBAN in image byte array`(recognizedText: String, expectedIBAN: String) { + // Given + val byteArray = ByteArray(100) + ibanRecognizer = IBANRecognizer(TextRecognizerStub(recognizedText)) + + // When + ibanRecognizer.processByteArray(byteArray, 200, 300, 0) { iban -> + // Then + assertThat(iban).isEqualTo(expectedIBAN) + } + } + + @Test + @Parameters(method = "recognizeIBANinImageValues") + @Suppress("JUnitMalformedDeclaration") + fun `recognize IBAN in image`(recognizedText: String, expectedIBAN: String) { + // Given + val image: Image = mock() + ibanRecognizer = IBANRecognizer(TextRecognizerStub(recognizedText)) + + // When + ibanRecognizer.processImage(image, 200, 300, 0) { iban -> + // Then + assertThat(iban).isEqualTo(expectedIBAN) + } + } + + private fun recognizeIBANinImageValues(): Array = arrayOf( + // recognizedText, expectedIBAN + arrayOf("DE78500105172594181438", "DE78500105172594181438"), + arrayOf("DE78 5001 0517 2594 1814 38", "DE78500105172594181438") + // TODO: Add more values + ) + + class TextRecognizerDummy : TextRecognizer { + override fun processImage( + image: Image, + width: Int, + height: Int, + rotationDegrees: Int, + doneCallback: (String?) -> Unit + ) { + doneCallback(null) + } + + override fun processByteArray( + byteArray: ByteArray, + width: Int, + height: Int, + rotationDegrees: Int, + doneCallback: (String?) -> Unit + ) { + doneCallback(null) + } + + override fun close() { + + } + + } + + class TextRecognizerStub(private val text: String?) : TextRecognizer { + override fun processImage( + image: Image, + width: Int, + height: Int, + rotationDegrees: Int, + doneCallback: (String?) -> Unit + ) { + doneCallback(text) + } + + override fun processByteArray( + byteArray: ByteArray, + width: Int, + height: Int, + rotationDegrees: Int, + doneCallback: (String?) -> Unit + ) { + doneCallback(text) + } + + override fun close() { + + } + + } +} \ No newline at end of file diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 5752e46549..fa1269d660 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -76,7 +76,8 @@ turbine = "app.cash.turbine:turbine:0.12.3" junit = "junit:junit:4.13.2" detekt-gradle = "io.gitlab.arturbosch.detekt:detekt-gradle-plugin:1.18.1" ktlint-gradle = "org.jlleitschuh.gradle:ktlint-gradle:10.2.0" -mlkit-barcodescanning = "com.google.android.gms:play-services-mlkit-barcode-scanning:18.2.0" +mlkit-barcodescanning = "com.google.android.gms:play-services-mlkit-barcode-scanning:18.3.0" +mlkit-textrecognition = "com.google.android.gms:play-services-mlkit-text-recognition:19.0.0" apachecommons-imaging = "org.apache.commons:commons-imaging:1.0-alpha2" completableFuture = "org.glassfish.jersey.bundles.repackaged:jersey-jsr166e:2.25.1" truth = "com.google.truth:truth:1.1.3"