jvfiel · madmath03 · Jun 27, 2019 · Jun 27, 2019 · Jun 27, 2019 · Jun 27, 2019
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
-## Erpnext Ocr
+## Erpnext OCR
 
-OCR
+OCR with [tesseract](https://github.com/tesseract-ocr/tesseract).
 
 #### License
 
@@ -22,21 +22,25 @@ Examples to implement OCR(Optical Character Recognition) using tesseract using P
   ```
   sudo apt-get install tesseract-ocr
   ```
-- Install python binding for tesseract, pytesseract, using this pip command:
+- Install python binding for tesseract, [pytesseract](https://pypi.org/project/pytesseract/), using this pip command:
   ```
   pip install pytesseract
   ```
-- Install image processing library in python, pillow using this pip command:
+- Install image processing library in python, [pillow](https://pypi.org/project/Pillow/), using this pip command:
   ```
   pip install pillow
   ```
+- Install HTTP library in python, [requests](https://pypi.org/project/requests/) using this pip command:
+  ```
+  pip install requests
+  ```
 
 **For working with pdf files:**
 - Install imagemagick using this command:
   ```
   sudo apt-get install imagemagick
   ```
-- Install python binding for imagemagick, wand, using this pip command:
+- Install python binding for imagemagick, [wand](https://pypi.org/project/Wand/), using this pip command:
   ```
   pip install wand
   ```
diff --git a/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py b/erpnext_ocr/erpnext_ocr/doctype/ocr_read/ocr_read.py
@@ -5,6 +5,7 @@
 from __future__ import unicode_literals
 import frappe
 from frappe.model.document import Document
+import os
 
 #Alternative to "File Upload Disconnected. Please try again."
 
@@ -40,9 +41,24 @@ def force_attach_file_doc(filename,name):
 class OCRRead(Document):
     def read_image(self):
         from PIL import Image
+        import requests
         import pytesseract
 
-        fullpath = frappe.get_site_path() + self.file_to_read
+        path = self.file_to_read
+
+        if path.startswith('/assets/'):
+            # from public folder
+            fullpath = os.path.abspath(path)
+        elif path.startswith('/files/'):
+            # public file
+            fullpath = frappe.get_site_path() + '/public' + path
+        elif path.startswith('/private/files/'):
+            # private file
+            fullpath = frappe.get_site_path() + path
+        else:
+            # external link
+            fullpath = requests.get(path, stream=True).raw
+
         im = Image.open(fullpath)
 
         text = pytesseract.image_to_string(im, lang='eng')

diff --git a/requirements.txt b/requirements.txt
@@ -1 +1,2 @@
-frappe
+frappe
+requests