facebookresearch · maryamdaneshi · Aug 26, 2021 · Aug 25, 2021 · Aug 25, 2021 · Aug 25, 2021
diff --git a/parlai/scripts/torchscript.py b/parlai/scripts/torchscript.py
@@ -20,8 +20,6 @@
 def export_model(opt: Opt):
     """
     Export a model to TorchScript so that inference can be run outside of ParlAI.
-
-    Currently, only CPU greedy-search inference on BART models is supported.
     """
 
     if version.parse(torch.__version__) < version.parse("1.7.0"):
@@ -34,9 +32,12 @@ def export_model(opt: Opt):
         from parlai.torchscript.modules import TorchScriptGreedySearch
 
     overrides = {
-        "no_cuda": True,  # TorchScripting is CPU only
-        "model_parallel": False,  # model_parallel is not currently supported when TorchScripting
+        "model_parallel": False,  # model_parallel is not currently supported when TorchScripting,
     }
+    if opt.get("script_for_gpu", False):
+        opt["no_cuda"] = False
+    else:
+        opt["no_cuda"] = True
     if opt.get("script_module"):
         script_module_name, script_class_name = opt["script_module"].split(":", 1)
         script_module = importlib.import_module(script_module_name)
@@ -54,7 +55,10 @@ def export_model(opt: Opt):
     original_module = script_class(agent)
 
     # Script the module and save
-    scripted_module = torch.jit.script(script_class(agent))
+    instantiated = script_class(agent)
+    if not opt["no_cuda"]:
+        instantiated = instantiated.cuda()
+    scripted_module = torch.jit.script(instantiated)
     with PathManager.open(opt["scripted_model_file"], "wb") as f:
         torch.jit.save(scripted_module, f)
 
@@ -90,6 +94,13 @@ def setup_args() -> ParlaiParser:
         default="parlai.torchscript.modules:TorchScriptGreedySearch",
         help="module to TorchScript. Example: parlai.torchscript.modules:TorchScriptGreedySearch",
     )
+    parser.add_argument(
+        "-sfg",
+        "--script-for-gpu",
+        type=bool,
+        default="parlai.torchscript.modules:TorchScriptGreedySearch",
+        help="should torchscript for gpu",
+    )
     return parser
 
 

diff --git a/parlai/torchscript/modules.py b/parlai/torchscript/modules.py
@@ -40,7 +40,7 @@ def __init__(self, agent: TorchAgent):
         super().__init__()
 
         self.is_bart = agent.opt["model"] == "bart"
-
+        self.device = agent.model.encoder.embeddings.weight.device
         # Dictionary/tokenization setup
         for key, val in self.CAIRAOKE_DICT_PARAMS.items():
             assert (
@@ -98,7 +98,10 @@ def __init__(self, agent: TorchAgent):
         wrapped_model = ModelIncrStateFlattener(agent.model)
 
         # Create sample inputs for tracing
-        sample_tokens = torch.tensor([[1, 2, 3, 4, 5]], dtype=torch.long)
+        sample_tokens = torch.tensor(
+            [[1, 2, 3, 4, 5]], dtype=torch.long, device=self.device
+        )
+        sample_tokens = sample_tokens.to(self.device)
         encoder_states = agent.model.encoder(sample_tokens)
         initial_generations = self._get_initial_decoder_input(sample_tokens)
         latent, initial_incr_state = wrapped_decoder(
@@ -137,6 +140,9 @@ def __init__(self, agent: TorchAgent):
             wrapped_decoder, (generations, encoder_states, incr_state), strict=False
         )
 
+    def get_device(self):
+        return self.encoder.embeddings.weight.device
+
     def _get_initial_decoder_input(self, x: torch.Tensor) -> torch.Tensor:
         """
         Workaround because we can't use TGM._get_initial_decoder_input() directly.
@@ -147,7 +153,9 @@ def _get_initial_decoder_input(self, x: torch.Tensor) -> torch.Tensor:
         """
         bsz = x.size(0)
         return (
-            torch.tensor(self.initial_decoder_input, dtype=torch.long)
+            torch.tensor(
+                self.initial_decoder_input, dtype=torch.long, device=self.device
+            )
             .expand(bsz, len(self.initial_decoder_input))
             .to(x.device)
         )
@@ -213,6 +221,8 @@ def forward(self, context: str, max_len: int = 128) -> str:
             )
 
         # Pass through the encoder and decoder to generate tokens
+
+        flattened_text_vec = flattened_text_vec.to(self.get_device())
         batch_text_vec = torch.unsqueeze(flattened_text_vec, dim=0)  # Add batch dim
         encoder_states = self.encoder(batch_text_vec)
         generations = self._get_initial_decoder_input(batch_text_vec)