diff --git a/.github/workflows/docs-build.yml b/.github/workflows/docs-build.yml new file mode 100644 index 0000000..a0cd304 --- /dev/null +++ b/.github/workflows/docs-build.yml @@ -0,0 +1,55 @@ +name: docs-build + +on: + push: + branches: + - master + pull_request: + branches-ignore: [gh-pages] + paths: ['docs/**'] + +jobs: + docs-build: + + runs-on: ubuntu-latest + timeout-minutes: 30 + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python 3.10 + uses: actions/setup-python@v3 + with: + python-version: '3.10' + + - name: Install python dependencies + run: | + pip install --upgrade pip + pip install -e .[docs] + + - name: Build HTML docs + id: linkcheck + run: | + make -C docs html linkcheck 2>&1 | tee check.log + echo "broken=$(grep '(line\s*[0-9]*)\(\s\)broken\(\s\)' check.log)" >> $GITHUB_OUTPUT + env: + SPHINXOPTS: -nW --keep-going + + - name: Show docs build check results + run: | + if [ -z "${{ steps.linkcheck.outputs.broken }}" ]; then + echo "No broken links found." + exit 0 + else + echo "Broken links found:" + echo "${{ steps.linkcheck.outputs.broken }}" + exit 1 + fi + + - name: Deploy docs to GitHub Pages + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_branch: gh-pages + publish_dir: docs/build/html + force_orphan: true diff --git a/README.md b/README.md index d9538e2..cf222c0 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,8 @@ The OCR and translation is performed using freely available machine learning mod The server is designed to be used together with [this browser extension](https://github.com/Crivella/ocr_extension), acting as a front-end providing the images and controlling the model languages and models being used. +[Full Documentation](https://crivella.github.io/ocr_translate/) + ## Running the server If you plan to use a different settings (eg. database, or model location), you can either: @@ -203,7 +205,7 @@ The second section of variables is defined at the project level and is only avai | --- | --- | --- | | `LOAD_ON_START`| false[/true] | Will automatically load the most used source/destination languages and most used models for that language combination at server start| | `AUTOCREATE_LANGUAGES` | false[/true] | Will automatically create the Language entries in the database as defined in [languages.json](ocr_translate/OCR_TSL/languages.json) | -| `AUTOCREATE_VALIDATED_MODELS` | false[/true] | Will automatically create the model entries that have been tested and defined in [models.json](ocr_translate/OCR_TSL/models.json). NOTE: Creation of the models requires the involved languages to already exist in the database | +| `AUTOCREATE_VALIDATED_MODELS` | false[/true] | Will automatically create the model entries defined in code and plugins `entrypoints`. | | `DEVICE` | cpu[/cuda] | Which device to use with torch | | `EASYOCR_MODULE_PATH` | `$HOME/.EasyOCR` | Directory where EasyOCR store its downloaded models | | `TRANSFORMERS_CACHE` | `$HOME/.cache/huggingface/hub/` | Directory where [Hugging Face](https://huggingface.co/) models are being stored (either downloaded manually or downloaded by `transformers`) | diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..747ffb7 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..8a8d017 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,39 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = 'ocr_translate' +copyright = '2023, Davide Grassano' +author = 'Davide Grassano' + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + 'sphinx.ext.extlinks', + 'sphinx_design', + 'sphinx_rtd_dark_mode', + 'sphinxcontrib.openapi', +] + +templates_path = ['_templates'] +exclude_patterns = [] + +# -- Options for extlinks extension ------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/extensions/extlinks.html#module-sphinx.ext.extlinks +extlinks = { + 'github': ('https://github.com/Crivella/ocr_translate/%s', ''), + 'dockerhub': ('https://hub.docker.com/r/crivella1/ocr_translate/%s', ''), +} + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +default_dark_mode = True +html_theme = 'rtd' +html_static_path = ['_static'] diff --git a/docs/source/contrib/api.rst b/docs/source/contrib/api.rst new file mode 100644 index 0000000..8c0fefd --- /dev/null +++ b/docs/source/contrib/api.rst @@ -0,0 +1,4 @@ +API Endpoints +============= + +.. openapi:: ../../../openapi.yml diff --git a/docs/source/contrib/index.rst b/docs/source/contrib/index.rst new file mode 100644 index 0000000..2914fce --- /dev/null +++ b/docs/source/contrib/index.rst @@ -0,0 +1,61 @@ +Contributor's guide +=================== + +.. toctree:: + :maxdepth: 2 + + plugins + api + +This is the guide for contributing to the main codebase of the project. + +Development dependencies +------------------------ + +When installing the python package with pip you can install various sets of optional dependencies. +For development you should install django-ocr_translate (from inside a clone of your fork) with the following: + +.. code-block:: shell + + pip install .[tests,pre-commit] + +If you also plan to add to the documentation than you should install the `docs` dependencies: + +.. code-block:: shell + + pip install .[docs] + +pre-commit hooks +---------------- + +Once the python extra dependencies are installed, install the `pre-commit `_ hooks into your repo. These are used to enforce code style and run tests before commits. (This will be enforced on pull requests by the CI workflow, so you might as well do it from the beginning) + +.. code-block:: shell + + pre-commit install + +You can also manually run the pre-commit command on all files: + +.. code-block:: shell + + pre-commit run --all-files + +Note that some of the hooks will modify the files in place, so you might need to re-add them to the staging area. + +Testing +------- + +If you are adding new code to the codebase, make sure to add tests for it. +You can check if your code is covered by tests by running: + +.. code-block:: shell + + pytest --cov=ocr_translate --cov-report=html tests + +And opening the `htmlcov/index.html` file generated in your working direcotry. + +If you are running tests with and IDE like VSCode, make sure that the following is set in your environment. + +.. code-block:: + + `DJANGO_SETTINGS_MODULE = "mysite.settings"` diff --git a/docs/source/contrib/plugins.rst b/docs/source/contrib/plugins.rst new file mode 100644 index 0000000..f318a29 --- /dev/null +++ b/docs/source/contrib/plugins.rst @@ -0,0 +1,182 @@ +Writing plugins +=============== + +New models and proxy model classes can be added without modifying the core codebase by creating a python package and using the following entrypoints. + +When Creating a plugin, the following example show the minimal methods that need to be redefined. +Beside this, the plugin can also redefine any of the methods to change eg how inputs are preprocessed, but be careful with this as it might break provenance (eg. `options` are used differently). + +IMPORTANT: The model class are supposed to be proxy classes and should ence contain + +.. code-block:: python + + class Meta: + proxy = True + +- :code:`ocr_translate.box_data`: Point this entrypoint to a dictionary with the info required to create a new :code:`OCRBoxModel` + +.. code-block:: python + + easyocr_box_model_data = { + # Name of the model + 'name': 'easyocr', + # List of ISO-639-1 codes supported by the model + 'lang': ['en', 'ja', 'zh', 'ko'], + # How the model requires the codes to be passed (one of 'iso1', 'iso2b', 'iso2t', 'iso3') + # If the models codes only partially match or are totally different from one of the ISO standards, see iso1_map + 'lang_code': 'iso1', + # Name of the entrypoint for the model (should match what is used in pyproject.toml) + 'entrypoint': 'easyocr.box', + # Maps ISO-639-1 codes to the codes used by the model. Does not need to map every language, only those that are + # different from getattr(lang: m.Language, lang_code) + 'iso1_map': { + 'ce': 'che', + 'zh': 'ch_sim', + 'zht': 'ch_tra', + 'tg': 'tjk', + } + } + +- :code:`ocr_translate.ocr_data`: Point this entrypoint to a dictionary with the info required to create a new :code:`OCRModel` + +.. code-block:: python + + khawhite_ocr_model_data = { + 'name': 'kha-white/manga-ocr-base', + 'lang': ['ja'], + 'lang_code': 'iso1', + 'entrypoint': 'hugginface.ved' + } + +- :code:`ocr_translate.tsl_data`: Point this entrypoint to a dictionary with the info required to create a new :code:`TSLModel` + +.. code-block:: python + + staka_fugumt_ja_en_tsl_model_data = { + 'name': 'staka/fugumt-ja-en', + 'lang_src': ['ja'], + 'lang_dst': ['en'], + 'lang_code': 'iso1', + 'default_options': { + 'break_newlines': True + }, + 'entrypoint': 'hugginface.seq2seq' + } + +- :code:`ocr_translate.box_models`: Point this entrypoint to a class that subclasses :code:`OCRBoxModel`. Should redefine atleast the following methods + +.. code-block:: python + + class SomeNewClassName(m.OCRBoxModel): + """OCRBoxtranslate plugin to allow usage of ... for box detection.""" + class Meta: + proxy = True + + def load(self): + """Load the model into memory.""" + # Do something here to load the model or nothing if not needed (should still be defined) + + def unload(self) -> None: + """Unload the model from memory.""" + # Do something here to unload the model or nothing if not needed (should still be defined) + + + def _box_detection( + self, + image: PILImage, options: dict = None + ) -> list[tuple[int, int, int, int]]: + """Perform box OCR on an image. + + Args: + image (Image.Image): A Pillow image on which to perform OCR. + options (dict, optional): A dictionary of options. + + Raises: + NotImplementedError: The type of model specified is not implemented. + + Returns: + list[tuple[int, int, int, int]]: A list of bounding boxes in lrbt format. + """ + # Redefine this method with the same signature as above + # Should return a list of `lrbt` boxes after processing the input PILImage + +- :code:`ocr_translate.ocr_models`: Point this entrypoint to a class that subclasses :code:`OCRModel`. Should redefine atleast the following methods + +.. code-block:: python + + class SomeNewClassName(m.OCRModel): + """OCRBoxtranslate plugin to allow usage of ... for box detection.""" + class Meta: + proxy = True + + def load(self): + """Load the model into memory.""" + # Do something here to load the model or nothing if not needed (should still be defined) + + def unload(self) -> None: + """Unload the model from memory.""" + # Do something here to unload the model or nothing if not needed (should still be defined) + + + def _ocr( + self, + img: Image.Image, lang: str = None, options: dict = None + ) -> str: + """Perform OCR on an image. + + Args: + img (Image.Image): A Pillow image on which to perform OCR. + lang (str, optional): The language to use for OCR. (Not every model will use this) + bbox (tuple[int, int, int, int], optional): The bounding box of the text on the image in lbrt format. + options (dict, optional): A dictionary of options to pass to the OCR model. + + Raises: + TypeError: If img is not a Pillow image. + + Returns: + str: The text extracted from the image. + """ + # Redefine this method with the same signature as above + # Should return a sring with the result of the OCR performed on the input PILImage. + # Unless the methods `prepare_image` or `ocr` are also being overwritten, the input image will be the result of the CROP on the original image using the bounding boxes given by the box detection model. + +- :code:`ocr_translate.tsl_models`: Point this entrypoint to a class that subclasses :code:`TSLModel`. Should redefine atleast the following methods + +.. code-block:: python + + class SomeNewClassName(m.TSLModel): + """OCRBoxtranslate plugin to allow usage of ... for box detection.""" + class Meta: + proxy = True + + def load(self): + """Load the model into memory.""" + # Do something here to load the model or nothing if not needed (should still be defined) + + def unload(self) -> None: + """Unload the model from memory.""" + # Do something here to unload the model or nothing if not needed (should still be defined) + + + def _translate( + self, + tokens: list, src_lang: str, dst_lang: str, options: dict = None) -> str | list[str]: + """Translate a text using a the loaded model. + + Args: + tokens (list): list or list[list] of string tokens to be translated. + lang_src (str): Source language. + lang_dst (str): Destination language. + options (dict, optional): Options for the translation. Defaults to {}. + + Raises: + TypeError: If text is not a string or a list of strings. + + Returns: + Union[str,list[str]]: Translated text. If text is a list, returns a list of translated strings. + """ + # Redefine this method with the same signature as above + # Should return a sring with the translated text. + # IMPORTANT: the main codebase treats this function as batchable: + # The input `tokens` can be a list of strings or a list of list of strings. The output should match the input being a string or list of strings. + # (This is used to leverage the capability of pytorch to batch inputs and outputs for faster performances, or it can also used to write a plugin for an online service by using a single request for multiple inputs using some separator that the service will leave unaltered.) diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..d7746d9 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,80 @@ +.. ocr_translate documentation master file, created by + sphinx-quickstart on Thu Sep 21 10:37:43 2023. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to ocr_translate's documentation! +========================================= + +This is a Django app for creating back-end server aimed at performing OCR and translation of images received via a POST request. + +The OCR and translation is performed using freely available machine learning models and packages (see below for what is currently implemented). + +The server is designed to be used together with this browser `extension`_, acting as a front-end providing the images and controlling the model languages and models being used. + +.. grid:: 2 + :gutter: 3 + + .. grid-item-card:: Installation + + Guide on installing ocr_translate with various methods. + + +++ + + .. button-ref:: install/index + :expand: + :color: secondary + :click-parent: + + Installation Guides + + .. grid-item-card:: Running + + Guide on running ocr_translate from one of the installations. + + +++ + + .. button-ref:: running/index + :expand: + :color: secondary + :click-parent: + + Running Guides + + .. grid-item-card:: User Guides + + User guide for ocr_translate. + + +++ + + .. button-ref:: user/index + :expand: + :color: secondary + :click-parent: + + User Guides + + .. grid-item-card:: Contributor Guides + + Want to add to the codebase? + The contributing guidelines will guide you through the + process of improving ocr_translate. + + +++ + + .. button-ref:: contrib/index + :expand: + :color: secondary + :click-parent: + + Contributor guides + +.. _extension: https://github.com/Crivella/ocr_extension + +.. toctree:: + :hidden: + + running/index + install/index + user/index + contrib/index diff --git a/docs/source/install/index.rst b/docs/source/install/index.rst new file mode 100644 index 0000000..5fe0010 --- /dev/null +++ b/docs/source/install/index.rst @@ -0,0 +1,53 @@ + +Installation +==================== + +For both the Githyb and PyPI installation it is strongly suggested to install this project using a `virtual environment `_. + +.. toctree:: + :maxdepth: 3 + +From Release file (Windows only) +-------------------------------- + +From the :github:`github releases page ` you can download either: + +- The :github:`CPU only version ` +- The GPU version split in :github:`file1 ` and :github:`file2 ` (The CUDA dependencies makes it take much more space), wich can be restored using tools like `7zip `_ and `NanaZip `_. + +From Github +----------- + +- Clone or download the repository + - :code:`git clone https://github.com/Crivella/ocr_translate.git` +- Install the project dependencies (choose the appropriate files depending if you wanna run on GPU or CPU only): + - :code:`pip install -r requirements-torch-[cpu/cuda].txt` + - :code:`pip install -r requirementscs.txt` + +From Docker +----------- + +CPU and CUDA specific images are available on :dockerhub:`Dockerhub <>`: + +- CPU: :code:`docker pull crivella1/ocr_translate:latest-cpu` +- GPU: :code:`docker pull crivella1/ocr_translate:latest-gpu` + +Manually create your image: + +- Create a .pip-cache-[cpu/gpu] directory inside your project. +- Optional: re-install the project dependencies pointing this as the cache folder for pip (will make the build process much faster, by reusing the cached dependencies) +- Run :code:`docker build -t IMAGE_TAG -f Dockerfile-[cpu/gpu] .` + +From PyPI +--------- + +Run the command + +- :code:`pip install django-ocr_translate` + +By default torch 2.x will come in its CUDA enabled version. While this works also for CPU, it will also install ~1 GB of cuda dependencies. +If you wish to run on CPU only, download the file [requirements-torch-cpu.txt](requirements-torch-cpu.txt) first and run + +- :code:`pip install -r requirements-torch-cpu.txt` + +before installing the python package. diff --git a/docs/source/running/from_docker.rst b/docs/source/running/from_docker.rst new file mode 100644 index 0000000..5359e29 --- /dev/null +++ b/docs/source/running/from_docker.rst @@ -0,0 +1,18 @@ +From Docker image +----------------- + +See the section on how to :doc:`install from DockerHUB ` first. + +This section assumes you have docker installed and the image of the project. + +Run the command: + +- :code:`docker run --name CONTAINER_NAME -v PATH_TO_YOUR_MODEL_DIRECTORY:/models -v PATH_TO_DIR_WITH_SQLITE_FILE:/data --env-file=PATH_TO_AND_ENV_VARIABLE_FILE -p SERVER_PORT:4000 -d ocr_translate` + +See the [Environment variables](#environment-variables) section for configuring your environment variable file. Additionaly the docker image defines several other variables to automatically create an admin user for managing the database via the django-admin interface: + +- :code:`UID`: UID of the user owning the files in /models and /data +- :code:`GID`: GID of the user owning the files in /models and /data +- :code:`NUM_WEB_WORKERS`: Number of gunicorn workers for the server +- :code:`DJANGO_SUPERUSER_USERNAME`: The username of the admin user to be created. +- :code:`DJANGO_SUPERUSER_PASSWORD`: The password of the admin user to be created. diff --git a/docs/source/running/from_github.rst b/docs/source/running/from_github.rst new file mode 100644 index 0000000..5f35b54 --- /dev/null +++ b/docs/source/running/from_github.rst @@ -0,0 +1,27 @@ +From Github installation +------------------------ + +See the section on how to :doc:`install from Github ` first. + +The Github repo provides not only the Django app files, but also the already configured project files used to start the server. + +Create/Initialize your database by running + +- :code:`python manage.py migrate` + +inside your project folder. + +Run the server using for example one of the following options: + +- Django development server. This is more oriented for developing than deploying, but is fine for a self-hosted single-user server accepting connections only on *localhost* + - From inside the project directory: :code:`python manage.py runserver PORT` + - The suggested PORT would be 4000 as it is the one set by default in the extension +- `Nginx `_ + `Gunicorn `_: + - Check the :code:`Dockerfile`, as this is what the provided image makes use of. + +At least for the first time, it is suggested to run the server with the :doc:`Environment variables <../user/envs>` `AUTOCREATE_LANGUAGES` and `AUTOCREATE_VALIDATED_MODELS` set to `"true"` to automatically load the validated languages and models provided by the project. + +Notes: + +- Gunicorn workers will each spawn a separate instance of the loaded models, each taking its own space in the memory. This can quickly fill up the memory especially if running on GPU. Ideally set this to 1. +- Django development server will spawn new threads for handling incoming requests (if no currently existing thread is free), which share the same memory. Running more than one worker per loaded model concurrently might slow down the actual computation and in some case also block the execution. diff --git a/docs/source/running/from_pypi.rst b/docs/source/running/from_pypi.rst new file mode 100644 index 0000000..c2d2244 --- /dev/null +++ b/docs/source/running/from_pypi.rst @@ -0,0 +1,17 @@ +From PyPI installation +---------------------- + +See the section on how to :doc:`install from PyPI ` first. + +When installing the project from PyPI, only the app is available. +This will need to be integrated in a Django project in order to be used. +These are the minimal instruction for creating a project and start running the server: + +- Run :code:`django-admin startproject mysite` to create a django project +- Configure the server by replacing the automatically created files (strongly recommended): + - [settings.py](mysite/settings.py) with the one available on the repo. + - [urls.py](mysite/urls.py) with the one available on the repo. +- or by manually editing the files: + - settings.py: Add the :code:`ocr_translated` app to the :code:`INSTALLED_APPS` + - urls.py: Include the :code:`'ocr_translate.urls'` into your project urls. +- From here follow the same instructions as when starting :doc:`from Github ` diff --git a/docs/source/running/from_release.rst b/docs/source/running/from_release.rst new file mode 100644 index 0000000..4f9a2d6 --- /dev/null +++ b/docs/source/running/from_release.rst @@ -0,0 +1,20 @@ +From Release (Windows) +---------------------- + +Tested for Windows11 + +- Unzip the release file downloaded in the previous step. +- From inside the folder, run the :code:`run_server-XXX.exe` file (XXX=cpu/gpu) + +The server will run with sensible defaults. Most notably the models files and database will be downloaded/created under `%userprofile%/.ocr_translate`. +Also the gpu version will attempt to run on GPU by default, and fall-back to CPU if the former is not available. + +For customization, you can set the :doc:`Environment variables <../user/envs>` yourself: + +- Powershell: + +.. code-block:: powershell + + $env:ENV_VAR_NAME="XXX" + +- by searching for :code:`environment variable` in the settings menu. diff --git a/docs/source/running/index.rst b/docs/source/running/index.rst new file mode 100644 index 0000000..2b953c9 --- /dev/null +++ b/docs/source/running/index.rst @@ -0,0 +1,29 @@ + +Running +==================== + +This is a guide on running the ocr_translate server using different methods. + +.. toctree:: + :maxdepth: 2 + + from_release + from_github + from_pypi + from_docker + +General info +------------ + +If you plan to use a different settings (eg. database, or model location), you can either: + +- Manually edit the :code:`settings.py` file +- Use the provided :doc:`Environment variables <../user/envs>` + +See below for a :doc:`list of supported databases <../user/index>` + +You will also have to modify the :code:`ALLOWED_HOSTS` in case you plan to access the server from somewhere other than `localhost`. + +All the different way to run the server may provide different set of default values (each of them is targeted for a different level of usage). + +.. _settings.py: mysite/settings.py diff --git a/docs/source/user/envs.rst b/docs/source/user/envs.rst new file mode 100644 index 0000000..bfd312e --- /dev/null +++ b/docs/source/user/envs.rst @@ -0,0 +1,122 @@ +Environment Variables +===================== + +The server will check a number of environment variables to configure itself. + +Setting Environment variables +----------------------------- + +Environment variables can be set in many ways depending on the the OS and/or the tool used to launch the server. +This is a list of common possible ways: + +- Windows Powershell: :code:`$env:VARIABLE_NAME = "value"` (code must be run in the same shell) +- Windows Command Prompt: :code:`set VARIABLE_NAME=value` (code must be run in the same shell) +- Windows Settings: :code:`Control Panel > System > Advanced System Settings > Environment Variables` +- Linux BASH: :code:`export VARIABLE_NAME=value` (code must be run in the same shell) +- VSCode: :code:`launch.json > env > VARIABLE_NAME` + +.. code-block:: + + { + "version": "0.2.0", + "configurations": [ + { + ... + "env": { + "VARIABLE_NAME_1": "VALUE_1", + ... + }, + ... + } + ] + } + + +App variable List +----------------- + +Variables used by the application. + +.. list-table:: Title + :widths: 25 25 50 + :header-rows: 1 + + * - Variable + - Values + - Usage + * - :code:`LOAD_ON_START` + - false[/true] + - Will automatically load the most used source/destination languages and most used models for that language combination at server start + * - :code:`AUTOCREATE_LANGUAGES` + - false[/true] + - Will automatically create the Language entries in the database as defined in [languages.json](ocr_translate/OCR_TSL/languages.json) + * - :code:`AUTOCREATE_VALIDATED_MODELS` + - false[/true] + - Will automatically create the model entries defined in code and plugins `entrypoints`. + * - :code:`DEVICE` + - cpu[/cuda] + - Which device to use with torch + * - :code:`EASYOCR_MODULE_PATH` + - :code:`$HOME/.EasyOCR` + - Directory where EasyOCR store its downloaded models + * - :code:`TRANSFORMERS_CACHE` + - :code:`$HOME/.cache/huggingface/hub/` + - Directory where [Hugging Face](https://huggingface.co/) models are being stored (either downloaded manually or downloaded by `transformers`) + * - :code:`TRANSFORMERS_OFFLINE` + - 1[/0] + - By default `transformers` will try to download missing models. Set this to 0 to only work in offline mode + * - :code:`TESSERACT_PREFIX` + - :code:`$TRANSFORMERS_CACHE/tesseract` + - Directory where tesseract will store and look for models + * - :code:`TESSERACT_ALLOW_DOWNLOAD` + - false[/true] + - Control whether the app should download missing models (true) or work in offline mode only (false) + * - :code:`NUM_MAIN_WORKERS` + - 4 + - Number of `WorkerMessageQueue` workers handling incoming OCR_TSL post requests + * - :code:`NUM_BOX_WORKERS` + - 1 + - Number of `WorkerMessageQueue` workers handling box_ocr pipelines (Should be set as 1 until the pipeline is build to handle multiple concurrent request efficiently without slowdowns) + * - :code:`NUM_OCR_WORKERS` + - 1 + - Number of `WorkerMessageQueue` workers handling ocr pipelines (Should be set as 1 until the pipeline is build to handle multiple concurrent request efficiently without slowdowns) + * - :code:`NUM_TSL_WORKERS` + - 1 + - Number of `WorkerMessageQueue` workers handling translation pipelines (Should be set as 1 until the pipeline is build to handle multiple concurrent request efficiently without slowdowns) + +Server variable List +-------------------- + +Variables used specifically by the DJANGO server. + +.. list-table:: Title + :widths: 25 25 50 + :header-rows: 1 + + * - Variable + - Values + - Usage + * - :code:`DJANGO_DEBUG` + - false[/true] + - Whether to run the server in debug (true) or production (false) mode + * - :code:`DJANGO_LOG_LEVEL` + - INFO + - python `logging` level for + * - :code:`DATABASE_NAME` + - *db.sqlite3* + - For `sqlite3` this is the path to the database file. For other backend it should be the name of the database + * - :code:`DATABASE_ENGINE` + - `django.db.backends.sqlite3` + - Change this to either a Django or 3rd party provided backend to use another Database type + * - :code:`DATABASE_HOST` + - optional + - Required if using another db back-end + * - :code:`DATABASE_PORT` + - optional + - Required if using another db back-end + * - :code:`DATABASE_USER` + - optional + - Probably required if using another db back-end + * - :code:`DATABASE_PASSWORD` + - optional + - Probably required if using another db back-end diff --git a/docs/source/user/index.rst b/docs/source/user/index.rst new file mode 100644 index 0000000..0c33ba3 --- /dev/null +++ b/docs/source/user/index.rst @@ -0,0 +1,21 @@ + +User's guide +============ + +.. toctree:: + :maxdepth: 2 + + envs + +Known validated plugins +----------------------- + +- `ocr_translate-google `_: Enables usage of GoogleTranslate for translations. + +Supported/tested databases +-------------------------- + +- `SQLite `_: This is mostly fine for a self-hosted server accessed by a single or few users (and it's probably gonna be faster than any other database not running on the same network as the server because of latency). +- `MySQL `_ +- `MariaDB `_ +- `PostgreSQL `_ diff --git a/openapi.yml b/openapi.yml new file mode 100644 index 0000000..733cc54 --- /dev/null +++ b/openapi.yml @@ -0,0 +1,245 @@ +openapi: 3.0.0 +info: + title: ocr_translate server API + description: Description of the API for communicating with an [ocr_translate server](https://github.com/Crivella/ocr_translate/) via HTTP requests. + version: 0.2.1 +servers: + - url: http://127.0.0.1:4000 + description: This will be the address to which you deploy the server (the specified one is the default/expected one) +paths: + /: + get: + summary: Server handshake. + description: Return a JSON response containing information about the available languages/models and the currently in use src/dst language and box/ocr/tsl models. + responses: + '405': # status code + description: Method not allowed. + '200': # status code + description: A JSON dictionary with handshake info. + content: + application/json: + schema: + type: object + properties: + Languages: + type: array + items: + type: string + BOXModels: + type: array + items: + type: string + OCRModels: + type: array + items: + type: string + TSLModels: + type: array + items: + type: string + box_selected: + type: string + ocr_selected: + type: string + tsl_selected: + type: string + lang_src: + type: string + lang_dst: + type: string + /get_trans/: + get: + summary: Get translation. + description: Return a JSON response containing all the available translation of the specified text. + parameters: + - in: query + name: text + schema: + type: string + required: true + description: The text for which to search for translations. + responses: + '400': # status code + description: Bad request. + content: + application/json: + schema: + type: object + properties: + error: + type: string + '405': # status code + description: Method not allowed. + '404': # status code + description: Text not found. + '200': # status code + description: A JSON dictionary with the translation. + content: + application/json: + schema: + type: object + properties: + translation: + type: array + items: + type: object + properties: + text: + type: string + model: + type: string + /set_lang/: + post: + summary: Set source and destination languages. + description: Set the source and destination languages for the translation. + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + lang_src: + type: string + lang_dst: + type: string + responses: + '400': # status code + description: Bad request. + content: + application/json: + schema: + type: object + properties: + error: + type: string + '405': # status code + description: Method not allowed. + '200': # status code + description: Success. + /set_models/: + post: + summary: Set box, ocr and tsl models. + description: Set the box, ocr and tsl models for the translation. + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + box_model_id: + type: string + ocr_model_id: + type: string + tsl_model_id: + type: string + responses: + '400': # status code + description: Bad request. + content: + application/json: + schema: + type: object + properties: + error: + type: string + '405': # status code + description: Method not allowed. + '200': # status code + description: Success. + /run_tsl/: + post: + summary: Run translation on text. + description: Run translation on text using active model. + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + text: + type: string + responses: + '400': # status code + description: Bad request. + content: + application/json: + schema: + type: object + properties: + error: + type: string + '405': # status code + description: Method not allowed. + '200': # status code + description: A JSON dictionary with the translated text. + content: + application/json: + schema: + type: object + properties: + text: + type: string + /run_ocrtsl/: + post: + summary: Run OCR and translation on image. + description: Run OCR and translation on image () using active models. + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + contents: + type: string + description: Base64 encoded image. + md5: + type: string + description: MD5 hash of the image. + force: + type: boolean + description: Force OCR+translation even if the image is already in the cache/database. + default: false + options: + type: object + description: Options dictionary for the OCR and translation. + responses: + '400': # status code + description: Bad request. + content: + application/json: + schema: + type: object + properties: + error: + type: string + '405': # status code + description: Method not allowed. + '406': # status code + description: Can't lazyload with only md5. + '200': # status code + description: A JSON dictionary with the translated text. + content: + application/json: + schema: + type: object + properties: + result: + type: array + items: + type: object + properties: + ocr: + type: string + tsl: + type: string + box: + type: array + items: + type: integer + description: "l, b, r, t" + minItems: 4 + maxItems: 4 diff --git a/pyproject.toml b/pyproject.toml index 9cf1d74..65fa23f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,13 @@ mysql = [ postgres = [ "psycopg[binary]==3.1.9", ] +docs = [ + "sphinx~=7.2.6", + "sphinx_design~=0.5.0", + "sphinx-rtd-theme~=1.3.0", + "sphinx-rtd-dark-mode~=1.3.0", + "sphinxcontrib-openapi" +] tests = [ "pytest", "pytest-cov", @@ -111,6 +118,7 @@ exclude = [ "icon.ico", "run_server.py", "build.sh", "Dockerfile-cpu", "Dockerfile-gpu", "nginx.default", "start-server.sh", "mysite/", "mysite/*", "manage.py", "tests/", "tests/*", + "docs/", "docs/*" ] [tool.pytest.ini_options]