NVIDIA · fayejf · Jun 22, 2023 · Jun 22, 2023 · Jun 22, 2023
diff --git a/README.rst b/README.rst
@@ -290,6 +290,14 @@ Transformer Engine already supports Flash Attention for GPT models. If you want
   pip install flash-attn
   pip install triton==2.0.0.dev20221202
 
+NLP inference UI
+~~~~~~~~~~~~~~~~~~~~
+To launch the inference web UI server, please install the gradio `gradio <https://gradio.app/>`_. 
+
+.. code-block:: bash
+
+  pip install gradio==3.34.0
+
 NeMo Text Processing
 ~~~~~~~~~~~~~~~~~~~~
 NeMo Text Processing, specifically (Inverse) Text Normalization, is now a separate repository `https://github.com/NVIDIA/NeMo-text-processing <https://github.com/NVIDIA/NeMo-text-processing>`_.

diff --git a/nemo/collections/nlp/modules/common/chatbot_component.py b/nemo/collections/nlp/modules/common/chatbot_component.py
@@ -19,9 +19,29 @@
 """
 from __future__ import annotations
 
-from gradio.components import *
+import warnings
+
 from markdown2 import Markdown
 
+try:
+    from typing import Any, Callable, Dict, List, Literal, Tuple
+
+    from gradio.components import (
+        Changeable,
+        Component,
+        Enum,
+        EventListenerMethod,
+        IOComponent,
+        JSONSerializable,
+        Selectable,
+        document,
+        processing_utils,
+    )
+
+    GRADIO_AVAILABLE = True
+except (ImportError, ModuleNotFoundError):
+    GRADIO_AVAILABLE = False
+
 
 class _Keywords(Enum):
     NO_VALUE = "NO_VALUE"  # Used as a sentinel to determine if nothing is provided as a argument for `value` in `Component.update()`

diff --git a/nemo/collections/nlp/modules/common/megatron_web_server.py b/nemo/collections/nlp/modules/common/megatron_web_server.py
@@ -14,10 +14,14 @@
 
 import asyncio
 
-import gradio as gr
+try:
+    import gradio as gr
+
+    GRADIO_AVAILABLE = True
+except (ImportError, ModuleNotFoundError):
+    GRADIO_AVAILABLE = False
 
 from nemo.collections.nlp.modules.common.chat_css import CSS
-from nemo.collections.nlp.modules.common.chatbot_component import Chatbot
 from nemo.collections.nlp.modules.common.megatron.retrieval_services.util import (
     convert_retrieved_to_md,
     request_data,
@@ -30,8 +34,17 @@
 
 DEFAULT_SYSTEM = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n"
 SYSTEM_TOKEN = '<extra_id_0>System\n'
-# HUMAN_TOKEN = 'Human:'
-# ASSITANT_TOKEN = 'Assistant:'
+
+
+def check_gradio_import():
+    if not GRADIO_AVAILABLE:
+        msg = (
+            f"could not find the gradio library.\n"
+            f"****************************************************************\n"
+            f"To install it, please follow the steps below:\n"
+            f"pip install gradio==3.34.0\n"
+        )
+        raise ImportError(msg)
 
 
 def create_gen_function(port=5555, chat=False):
@@ -89,6 +102,7 @@ def get_generation(
 
 
 def get_demo(share, username, password, server_port=5555, web_port=9889, loop=None):
+    check_gradio_import()
     asyncio.set_event_loop(loop)
     with gr.Blocks() as demo:
         with gr.Row():
@@ -132,6 +146,9 @@ def get_demo(share, username, password, server_port=5555, web_port=9889, loop=No
 
 
 def get_chatbot_demo(share, username, password, server_port=5555, web_port=9889, loop=None):
+    check_gradio_import()
+    from nemo.collections.nlp.modules.common.chatbot_component import Chatbot
+
     asyncio.set_event_loop(loop)
     with gr.Blocks(css=CSS) as demo:
         # store the mutliple turn conversation
@@ -294,6 +311,7 @@ def reset_index(self):
         return request_data(data, self.combo_service_ip, self.combo_service_port)
 
     def run_demo(self, share, username, password, port):
+        check_gradio_import()
         with gr.Blocks(css="table, th, td { border: 1px solid blue; table-layout: fixed; width: 100%; }") as demo:
             with gr.Row():
                 with gr.Column(scale=2, width=200):

diff --git a/requirements/requirements_nlp.txt b/requirements/requirements_nlp.txt
@@ -5,7 +5,6 @@ fasttext
 flask_restful
 ftfy
 gdown
-gradio>=3.28.3
 h5py
 ijson
 inflect
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,7 +5,6 @@ fasttext @@
     flask_restful
     ftfy
     gdown
-    gradio>=3.28.3
     h5py
     ijson
     inflect
@@ Expand Down @@