It's no secret that Python has firmly taken the lead in ML and Data Science. What if you look at other languages ββand platforms? How convenient is it to make similar decisions in them?
For example, text recognition in a picture.
tesseract. Python , , , OpenCV. C++ , . jvm , , Kotlin.
Kotlin. Data Science. jvm Β« Python jvmΒ». Kotlin Apache Spark.
tesseract. , wiki. , tesseract :
tesseract input_file.jpg stdout -l eng --tessdata-dir /usr/local/share/tessdata/
--tessdata-dir β tesseract (/usr/local/share/tessdata/ macos). stdout .
tesseract jvm . :
implementation("net.sourceforge.tess4j:tess4j:4.5.3")
, jvm, . Java 13+. sdkman. Intellij IDEA, Community version. IDE (new project -> Kotlin, gradle Kotlin) github, start.
tesseract . :
val api = Tesseract()
api.setDatapath("/usr/local/share/tessdata/")
api.setLanguage("eng")
val image = ImageIO.read(File("input_file.jpg"))
val result: String = api.doOCR(image)
, . , , macos jna.library.path
, dylib- tesseract.
val libPath = "/usr/local/lib"
val libTess = File(libPath, "libtesseract.dylib")
if (libTess.exists()) {
val jnaLibPath = System.getProperty("jna.library.path")
if (jnaLibPath == null) {
System.setProperty("jna.library.path", libPath)
} else {
System.setProperty("jna.library.path", libPath + File.pathSeparator + jnaLibPath)
}
}
.
OpenCV. Python - , pip. OpenCV java , . jvm- , - , . - , (, , djl-pytorch), . , OpenCV , :
implementation("org.openpnp:opencv:4.3.0-2")
OpenCV :
nu.pattern.OpenCV.loadLocally()
. , , - :
Imgproc.cvtColor(mat, mat, Imgproc.COLOR_BGR2GRAY)
, OpenCV Mat, - OpenCV jvm, BufferedImage.
Mat Python imread
:
val mat = Imgcodecs.imread("input.jpg")
OpenCV . Java BufferedImage, , , pipeline . BufferedImage Mat:
val image: BufferedImage = ...
val pixels = (image.raster.dataBuffer as DataBufferByte).data
val mat = Mat(image.height, image.width, CvType.CV_8UC3)
.apply { put(0, 0, pixels) }
Mat BufferedImage:
val mat = ...
var type = BufferedImage.TYPE_BYTE_GRAY
if (mat.channels() > 1) {
type = BufferedImage.TYPE_3BYTE_BGR
}
val bufferSize = mat.channels() * mat.cols() * mat.rows()
val b = ByteArray(bufferSize)
mat[0, 0, b] // get all the pixels
val image = BufferedImage(mat.cols(), mat.rows(), type)
val targetPixels = (image.raster.dataBuffer as DataBufferByte).data
System.arraycopy(b, 0, targetPixels, 0, b.size)
, tesseract doOCR
, BufferedImage. , OpenCV, Mat Bufferedimage tesseract.
, :
. doOCR
getWords
, confidence (score Python-) :
val image = ImageIO.read(URL("http://img.ifcdn.com/images/b313c1f095336b6d681f75888f8932fc8a531eacd4bc436e4d4aeff7b599b600_1.jpg"))
val result = api.getWords(preparedImage, ITessAPI.TessPageIteratorLevel.RIL_WORD)
«»:
[ie, [Confidence: 2.014679 Bounding box: 100 0 13 14], bad [Confidence: 61.585358 Bounding box: 202 0 11 14], oy [Confidence: 24.619446 Bounding box: 21 68 18 22], ' [Confidence: 4.998787 Bounding box: 185 40 11 18], | [Confidence: 60.889648 Bounding box: 315 62 4 14], ae. [Confidence: 27.592728 Bounding box: 0 129 320 126], c [Confidence: 0.000000 Bounding box: 74 301 3 2], ai [Confidence: 24.988930 Bounding box: 133 283 41 11], ee [Confidence: 27.483231 Bounding box: 186 283 126 41]]
, , , , threshold , :
:
// convert to gray
Imgproc.cvtColor(mat, mat, Imgproc.COLOR_BGR2GRAY)
// text -> white, other -> black
Imgproc.threshold(mat, mat, 244.0, 255.0, Imgproc.THRESH_BINARY)
// inverse
Core.bitwise_not(mat, mat)
( Imgcodecs.imwrite("output.jpg", mat)
)
getWords
, :
[WHEN [Confidence: 94.933418 Bounding box: 48 251 52 14], SHE [Confidence: 95.249252 Bounding box: 109 251 34 15], CATCHES [Confidence: 95.973259 Bounding box: 151 251 80 15], YOU [Confidence: 96.446579 Bounding box: 238 251 33 15], CHEATING [Confidence: 96.458656 Bounding box: 117 278 86 15]]
, .
:
import net.sourceforge.tess4j.ITessAPI
import net.sourceforge.tess4j.Tesseract
import nu.pattern.OpenCV
import org.opencv.core.Core
import org.opencv.core.CvType
import org.opencv.core.Mat
import org.opencv.imgproc.Imgproc
import java.awt.image.BufferedImage
import java.awt.image.DataBufferByte
import java.io.File
import java.net.URL
import javax.imageio.ImageIO
fun main() {
setupOpenCV()
setupTesseract()
val image = ImageIO.read(URL("http://img.ifcdn.com/images/b313c1f095336b6d681f75888f8932fc8a531eacd4bc436e4d4aeff7b599b600_1.jpg"))
val mat = image.toMat()
Imgproc.cvtColor(mat, mat, Imgproc.COLOR_BGR2GRAY)
Imgproc.threshold(mat, mat, 244.0, 255.0, Imgproc.THRESH_BINARY)
Core.bitwise_not(mat, mat)
val preparedImage = mat.toBufferedImage()
val api = Tesseract()
api.setDatapath("/usr/local/share/tessdata/")
api.setLanguage("eng")
val result = api.getWords(preparedImage, ITessAPI.TessPageIteratorLevel.RIL_WORD)
println(result)
}
private fun setupTesseract() {
val libPath = "/usr/local/lib"
val libTess = File(libPath, "libtesseract.dylib")
if (libTess.exists()) {
val jnaLibPath = System.getProperty("jna.library.path")
if (jnaLibPath == null) {
System.setProperty("jna.library.path", libPath)
} else {
System.setProperty("jna.library.path", libPath + File.pathSeparator + jnaLibPath)
}
}
}
private fun setupOpenCV() {
OpenCV.loadLocally()
}
private fun BufferedImage.toMat(): Mat {
val pixels = (raster.dataBuffer as DataBufferByte).data
return Mat(height, width, CvType.CV_8UC3)
.apply { put(0, 0, pixels) }
}
private fun Mat.toBufferedImage(): BufferedImage {
var type = BufferedImage.TYPE_BYTE_GRAY
if (channels() > 1) {
type = BufferedImage.TYPE_3BYTE_BGR
}
val bufferSize = channels() * cols() * rows()
val b = ByteArray(bufferSize)
this[0, 0, b] // get all the pixels
val image = BufferedImage(cols(), rows(), type)
val targetPixels = (image.raster.dataBuffer as DataBufferByte).data
System.arraycopy(b, 0, targetPixels, 0, b.size)
return image
}
Python-, . ( , , Mat BufferedImage).
Python OpenCV tesseract. Python , .
jvm- . , , . , , , , , jvm Kotlin .
Python , c, ML. . , . , - Python, , .
, - . , :
djl.ai β Deep Learning jvm, pytorch tensorflow
deeplearning4j.org β tensorflow keras
kotlinlang.org/docs/reference/data-science-overview β Data Science Kotlin ( Java)