Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pytesser.TesseractNotFound #9

Open
PGillner opened this issue Mar 21, 2018 · 1 comment
Open

pytesser.TesseractNotFound #9

PGillner opened this issue Mar 21, 2018 · 1 comment

Comments

@PGillner
Copy link

I keep getting the error:

Traceback (most recent call last):
  File "C:\Users\phijon0412\Desktop\Ny mapp (2)\test.py", line 6, in <module>
    txt = pytesser.image_to_string(image)
  File "C:\Users\phijon0412\Desktop\Ny mapp (2)\pytesser.py", line 60, in image_to_string
    txt = image_file_to_string(TEMP_IMAGE, lang, psm)
  File "C:\Users\phijon0412\Desktop\Ny mapp (2)\pytesser.py", line 65, in image_file_to_string
    check_path() #Check if tesseract available in the path
  File "C:\Users\phijon0412\Desktop\Ny mapp (2)\pytesser.py", line 35, in check_path
    raise TesseractNotFound()
pytesser.TesseractNotFound

But I'm really unsure why.

pytesser.py

import sys
from subprocess import Popen, PIPE
import os
import tempfile
import cv2

PROG_NAME = 'tesseract.exe'
TEMP_IMAGE = tempfile.mktemp()+'.bmp'
TEMP_FILE = tempfile.mktemp()

#All the PSM arguments as a variable name (avoid having to know them)
PSM_OSD_ONLY = 0
PSM_SEG_AND_OSD = 1
PSM_SEG_ONLY = 2
PSM_AUTO = 3
PSM_SINGLE_COLUMN = 4
PSM_VERTICAL_ALIGN = 5
PSM_UNIFORM_BLOCK = 6
PSM_SINGLE_LINE = 7
PSM_SINGLE_WORD = 8
PSM_SINGLE_WORD_CIRCLE = 9
PSM_SINGLE_CHAR = 10

class TesseractException(Exception): #Raised when tesseract does not return 0
    pass

class TesseractNotFound(Exception): #When tesseract is not found in the path
    pass

def check_path(): #Check if tesseract is in the path raise TesseractNotFound otherwise
    for path in os.environ.get('PATH', '').split(':'):
        filepath = os.path.join(path, PROG_NAME)
        if os.path.exists(filepath) and not os.path.isdir(filepath):
            return True
    raise TesseractNotFound()

def process_request(input_file, output_file, lang=None, psm=None):
    args = [PROG_NAME, input_file, output_file] #Create the arguments
    if lang is not None:
        args.append("-l")
        args.append(lang)
    if psm is not None:
        args.append("-psm")
        args.append(str(psm))
    proc = Popen(args, stdout=PIPE, stderr=PIPE) #Open process
    ret = proc.communicate() #Launch it

    code = proc.returncode
    if code != 0:
        if code == 2:
            raise TesseractException("File not found")
        if code == -11:
            raise TesseractException("Language code invalid: "+ret[1])
        else:
            raise TesseractException(ret[1])

def image_to_string(im, lang=None, psm=None):
    grayscale_image = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    cv2.imwrite(TEMP_IMAGE, grayscale_image)
    txt = image_file_to_string(TEMP_IMAGE, lang, psm)
    os.remove(TEMP_IMAGE)
    return txt

def image_file_to_string(file, lang=None, psm=None):
    check_path() #Check if tesseract available in the path
    grayscale_image = cv2.cvtColor(cv2.imread(file), cv2.COLOR_BGR2GRAY)
    cv2.imwrite(TEMP_IMAGE, grayscale_image)
    #process_request(file, TEMP_FILE, lang, psm)
    process_request(TEMP_IMAGE, TEMP_FILE, lang, psm)
    f = open(TEMP_FILE+".txt", "r") #Open back the file
    txt = f.read()
    f.close()
    os.remove(TEMP_FILE+".txt")
    os.remove(TEMP_IMAGE)
    return txt


if __name__ =='__main__':
    print(image_file_to_string(sys.argv[2], sys.argv[1], PSM_AUTO))
@RobinDavid
Copy link
Owner

Is the tesseract.exe executable in you path ? It has to be reachable so that subprocess can launch it. pytesser is just a simple wrapper around this executable.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants