facebookresearch · maryamdaneshi · Aug 26, 2021 · Aug 25, 2021 · Aug 25, 2021 · Aug 25, 2021
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -291,8 +291,7 @@ commands:
           name: check for bad links
           working_directory: ~/ParlAI/
           command: |
-            sudo apt-get update
-            sudo apt-get install linkchecker
+            pip install linkchecker
             python -m http.server --directory website/build >/dev/null &
             linkchecker http://localhost:8000/
             kill %1

diff --git a/parlai/scripts/torchscript.py b/parlai/scripts/torchscript.py
@@ -20,8 +20,6 @@
 def export_model(opt: Opt):
     """
     Export a model to TorchScript so that inference can be run outside of ParlAI.
-
-    Currently, only CPU greedy-search inference on BART models is supported.
     """
 
     if version.parse(torch.__version__) < version.parse("1.7.0"):
@@ -34,8 +32,7 @@ def export_model(opt: Opt):
         from parlai.torchscript.modules import TorchScriptGreedySearch
 
     overrides = {
-        "no_cuda": True,  # TorchScripting is CPU only
-        "model_parallel": False,  # model_parallel is not currently supported when TorchScripting
+        "model_parallel": False  # model_parallel is not currently supported when TorchScripting,
     }
     if opt.get("script_module"):
         script_module_name, script_class_name = opt["script_module"].split(":", 1)
@@ -54,7 +51,10 @@ def export_model(opt: Opt):
     original_module = script_class(agent)
 
     # Script the module and save
-    scripted_module = torch.jit.script(script_class(agent))
+    instantiated = script_class(agent)
+    if not opt["no_cuda"]:
+        instantiated = instantiated.cuda()
+    scripted_module = torch.jit.script(instantiated)
     with PathManager.open(opt["scripted_model_file"], "wb") as f:
         torch.jit.save(scripted_module, f)
 

diff --git a/parlai/torchscript/modules.py b/parlai/torchscript/modules.py
@@ -40,7 +40,7 @@ def __init__(self, agent: TorchAgent):
         super().__init__()
 
         self.is_bart = agent.opt["model"] == "bart"
-
+        self.device = agent.model.encoder.embeddings.weight.device
         # Dictionary/tokenization setup
         for key, val in self.CAIRAOKE_DICT_PARAMS.items():
             assert (
@@ -98,7 +98,10 @@ def __init__(self, agent: TorchAgent):
         wrapped_model = ModelIncrStateFlattener(agent.model)
 
         # Create sample inputs for tracing
-        sample_tokens = torch.tensor([[1, 2, 3, 4, 5]], dtype=torch.long)
+        sample_tokens = torch.tensor(
+            [[1, 2, 3, 4, 5]], dtype=torch.long, device=self.device
+        )
+        sample_tokens = sample_tokens.to(self.device)
         encoder_states = agent.model.encoder(sample_tokens)
         initial_generations = self._get_initial_decoder_input(sample_tokens)
         latent, initial_incr_state = wrapped_decoder(
@@ -137,6 +140,9 @@ def __init__(self, agent: TorchAgent):
             wrapped_decoder, (generations, encoder_states, incr_state), strict=False
         )
 
+    def get_device(self):
+        return self.encoder.embeddings.weight.device
+
     def _get_initial_decoder_input(self, x: torch.Tensor) -> torch.Tensor:
         """
         Workaround because we can't use TGM._get_initial_decoder_input() directly.
@@ -147,7 +153,9 @@ def _get_initial_decoder_input(self, x: torch.Tensor) -> torch.Tensor:
         """
         bsz = x.size(0)
         return (
-            torch.tensor(self.initial_decoder_input, dtype=torch.long)
+            torch.tensor(
+                self.initial_decoder_input, dtype=torch.long, device=self.device
+            )
             .expand(bsz, len(self.initial_decoder_input))
             .to(x.device)
         )
@@ -213,6 +221,8 @@ def forward(self, context: str, max_len: int = 128) -> str:
             )
 
         # Pass through the encoder and decoder to generate tokens
+
+        flattened_text_vec = flattened_text_vec.to(self.get_device())
         batch_text_vec = torch.unsqueeze(flattened_text_vec, dim=0)  # Add batch dim
         encoder_states = self.encoder(batch_text_vec)
         generations = self._get_initial_decoder_input(batch_text_vec)
@@ -255,6 +265,7 @@ def forward(self, context: str, max_len: int = 128) -> str:
     def postprocess_output_generations(self, label: str) -> str:
         """
         Post-process the model output.
+
         Returns the model output by default, override to add custom logic
         """
         return label

diff --git a/tests/nightly/gpu/test_torchscript.py b/tests/nightly/gpu/test_torchscript.py
@@ -143,7 +143,7 @@ def test_torchscript_agent(self):
 
             # Export the BART model
             export_opt = TorchScript.setup_args().parse_kwargs(
-                model='bart', scripted_model_file=scripted_model_file
+                model='bart', scripted_model_file=scripted_model_file, no_cuda=True
             )
             TorchScript(export_opt).run()
 
@@ -157,6 +157,36 @@ def test_torchscript_agent(self):
             act = bart.act()
             self.assertEqual(act['text'], test_phrase)
 
+    def test_gpu_torchscript_agent(self):
+        """
+        Test exporting a model to TorchScript for GPU and then testing it on sample
+        data.
+        """
+
+        from parlai.scripts.torchscript import TorchScript
+
+        test_phrase = "Don't have a cow, man!"  # From test_bart.py
+
+        with testing_utils.tempdir() as tmpdir:
+
+            scripted_model_file = os.path.join(tmpdir, 'scripted_model.pt')
+
+            # Export the BART model for GPU
+            export_opt = TorchScript.setup_args().parse_kwargs(
+                model='bart', scripted_model_file=scripted_model_file, no_cuda=False
+            )
+            TorchScript(export_opt).run()
+
+            # Test the scripted GPU BART model
+            scripted_opt = ParlaiParser(True, True).parse_kwargs(
+                model='parlai.torchscript.agents:TorchScriptAgent',
+                model_file=scripted_model_file,
+            )
+            bart = create_agent(scripted_opt)
+            bart.observe({'text': test_phrase, 'episode_done': True})
+            act = bart.act()
+            self.assertEqual(act['text'], test_phrase)
+
 
 if __name__ == '__main__':
     unittest.main()