diff --git a/README.md b/README.md index 4085a69..533ce54 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -## Erpnext Ocr +## Erpnext OCR -OCR +OCR with [tesseract](https://github.com/tesseract-ocr/tesseract). #### License @@ -22,21 +22,25 @@ Examples to implement OCR(Optical Character Recognition) using tesseract using P ``` sudo apt-get install tesseract-ocr ``` -- Install python binding for tesseract, pytesseract, using this pip command: +- Install python binding for tesseract, [pytesseract](https://pypi.org/project/pytesseract/), using this pip command: ``` pip install pytesseract ``` -- Install image processing library in python, pillow using this pip command: +- Install image processing library in python, [pillow](https://pypi.org/project/Pillow/), using this pip command: ``` pip install pillow ``` +- Install HTTP library in python, [requests](https://pypi.org/project/requests/) using this pip command: + ``` + pip install requests + ``` **For working with pdf files:** - Install imagemagick using this command: ``` sudo apt-get install imagemagick ``` -- Install python binding for imagemagick, wand, using this pip command: +- Install python binding for imagemagick, [wand](https://pypi.org/project/Wand/), using this pip command: ``` pip install wand ``` \ No newline at end of file diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py index d983066..5ea0a28 100644 --- a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py +++ b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py @@ -5,6 +5,7 @@ from __future__ import unicode_literals import frappe from frappe.model.document import Document +import os #Alternative to "File Upload Disconnected. Please try again." @@ -40,9 +41,24 @@ def force_attach_file_doc(filename,name): class OCRRead(Document): def read_image(self): from PIL import Image + import requests import pytesseract - fullpath = frappe.get_site_path() + self.file_to_read + path = self.file_to_read + + if path.startswith('/assets/'): + # from public folder + fullpath = os.path.abspath(path) + elif path.startswith('/files/'): + # public file + fullpath = frappe.get_site_path() + '/public' + path + elif path.startswith('/private/files/'): + # private file + fullpath = frappe.get_site_path() + path + else: + # external link + fullpath = requests.get(path, stream=True).raw + im = Image.open(fullpath) text = pytesseract.image_to_string(im, lang='eng') diff --git a/requirements.txt b/requirements.txt index 5ac1c81..508a314 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -frappe \ No newline at end of file +frappe +requests \ No newline at end of file