From 5ffde9b4bcfe0745134dd1a981ca7b1fd9eb841e Mon Sep 17 00:00:00 2001
From: Stas Bekman <stas@stason.org>
Date: Sun, 20 Sep 2020 15:30:12 -0700
Subject: [PATCH 1/4] make it easier to run the scripts

---
 examples/seq2seq/README.md                      |  14 +++++++-------
 examples/seq2seq/convert_model_to_fp16.py       |   2 ++
 examples/seq2seq/convert_pl_checkpoint_to_hf.py |   2 ++
 examples/seq2seq/distillation.py                |   2 ++
 examples/seq2seq/download_wmt.py                |   2 ++
 examples/seq2seq/finetune.py                    |   9 ++++++++-
 examples/seq2seq/finetune.sh                    |   3 ---
 examples/seq2seq/minify_dataset.py              |   2 ++
 examples/seq2seq/pack_dataset.py                |   2 ++
 examples/seq2seq/run_distributed_eval.py        |   2 ++
 examples/seq2seq/run_eval.py                    |   2 ++
 examples/seq2seq/run_eval_search.py             |   2 ++
 examples/seq2seq/save_len_file.py               |   9 +++------
 examples/seq2seq/test_bash_script.py            |   2 ++
 examples/seq2seq/test_data/wmt_en_ro/train.len  | Bin 26 -> 34 bytes
 examples/seq2seq/test_data/wmt_en_ro/val.len    | Bin 40 -> 48 bytes
 examples/seq2seq/test_datasets.py               |   9 ++++-----
 examples/seq2seq/test_fsmt_bleu_score.py        |  10 ++--------
 18 files changed, 44 insertions(+), 30 deletions(-)
 mode change 100644 => 100755 examples/seq2seq/convert_model_to_fp16.py
 mode change 100644 => 100755 examples/seq2seq/convert_pl_checkpoint_to_hf.py
 mode change 100644 => 100755 examples/seq2seq/distillation.py
 mode change 100644 => 100755 examples/seq2seq/download_wmt.py
 mode change 100644 => 100755 examples/seq2seq/finetune.py
 mode change 100644 => 100755 examples/seq2seq/minify_dataset.py
 mode change 100644 => 100755 examples/seq2seq/pack_dataset.py
 mode change 100644 => 100755 examples/seq2seq/run_distributed_eval.py
 mode change 100644 => 100755 examples/seq2seq/run_eval.py
 mode change 100644 => 100755 examples/seq2seq/run_eval_search.py
 mode change 100644 => 100755 examples/seq2seq/save_len_file.py

diff --git a/examples/seq2seq/README.md b/examples/seq2seq/README.md
index 2eb9c6e4121f32..bd0fd1a9d4c255 100644
--- a/examples/seq2seq/README.md
+++ b/examples/seq2seq/README.md
@@ -92,7 +92,7 @@ All finetuning bash scripts call finetune.py (or distillation.py) with reasonabl
 To see all the possible command line options, run:
 
 ```bash
-./finetune.sh --help  # this calls python finetune.py --help
+ --help  # this calls ./finetune.py --help
 ```
 
 ### Finetuning Training Params
@@ -189,7 +189,7 @@ If 'translation' is in your task name, the computed metric will be BLEU. Otherwi
 For t5, you need to specify --task translation_{src}_to_{tgt} as follows:
 ```bash
 export DATA_DIR=wmt_en_ro
-python run_eval.py t5-base \
+./run_eval.py t5-base \
     $DATA_DIR/val.source t5_val_generations.txt \
     --reference_path $DATA_DIR/val.target \
     --score_path enro_bleu.json \
@@ -203,7 +203,7 @@ python run_eval.py t5-base \
 This command works for MBART, although the BLEU score is suspiciously low.
 ```bash
 export DATA_DIR=wmt_en_ro
-python run_eval.py facebook/mbart-large-en-ro $DATA_DIR/val.source mbart_val_generations.txt \
+./run_eval.py facebook/mbart-large-en-ro $DATA_DIR/val.source mbart_val_generations.txt \
     --reference_path $DATA_DIR/val.target \
     --score_path enro_bleu.json \
     --task translation \
@@ -216,7 +216,7 @@ python run_eval.py facebook/mbart-large-en-ro $DATA_DIR/val.source mbart_val_gen
 Summarization (xsum will be very similar):
 ```bash
 export DATA_DIR=cnn_dm
-python run_eval.py sshleifer/distilbart-cnn-12-6 $DATA_DIR/val.source dbart_val_generations.txt \
+./run_eval.py sshleifer/distilbart-cnn-12-6 $DATA_DIR/val.source dbart_val_generations.txt \
     --reference_path $DATA_DIR/val.target \
     --score_path cnn_rouge.json \
     --task summarization \
@@ -230,7 +230,7 @@ python run_eval.py sshleifer/distilbart-cnn-12-6 $DATA_DIR/val.source dbart_val_
 ### Multi-GPU Evalulation
 here is a command to run xsum evaluation on 8 GPUS. It is more than linearly faster than run_eval.py in some cases 
 because it uses SortishSampler to minimize padding. You can also use it on 1 GPU. `data_dir` must have 
-`{type_path}.source` and `{type_path}.target`. Run `python run_distributed_eval.py --help` for all clargs.
+`{type_path}.source` and `{type_path}.target`. Run `./run_distributed_eval.py --help` for all clargs.
 
 ```bash
 python -m torch.distributed.launch --nproc_per_node=8  run_distributed_eval.py \
@@ -363,11 +363,11 @@ This feature can only be used:
 - with fairseq installed
 - on 1 GPU
 - without sortish sampler
-- after calling `python save_len_file.py $tok $data_dir`
+- after calling `./save_len_file.py $tok $data_dir`
 
 For example, 
 ```bash
-python save_len_file.py Helsinki-NLP/opus-mt-en-ro  wmt_en_ro
+./save_len_file.py Helsinki-NLP/opus-mt-en-ro  wmt_en_ro
 ./dynamic_bs_example.sh --max_tokens_per_batch=2000 --output_dir benchmark_dynamic_bs
 ```
 splits `wmt_en_ro/train` into 11,197 uneven lengthed batches and can finish 1 epoch in 8 minutes on a v100.
diff --git a/examples/seq2seq/convert_model_to_fp16.py b/examples/seq2seq/convert_model_to_fp16.py
old mode 100644
new mode 100755
index 24042cc0e7e581..26b1ff8fd8f664
--- a/examples/seq2seq/convert_model_to_fp16.py
+++ b/examples/seq2seq/convert_model_to_fp16.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 from typing import Union
 
 import fire
diff --git a/examples/seq2seq/convert_pl_checkpoint_to_hf.py b/examples/seq2seq/convert_pl_checkpoint_to_hf.py
old mode 100644
new mode 100755
index ccae1672919315..5f3c984f3724c1
--- a/examples/seq2seq/convert_pl_checkpoint_to_hf.py
+++ b/examples/seq2seq/convert_pl_checkpoint_to_hf.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 import os
 from pathlib import Path
 from typing import Dict, List
diff --git a/examples/seq2seq/distillation.py b/examples/seq2seq/distillation.py
old mode 100644
new mode 100755
index 3b1ce10d0d5fbc..1ffd02e9a18450
--- a/examples/seq2seq/distillation.py
+++ b/examples/seq2seq/distillation.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 import argparse
 import gc
 import os
diff --git a/examples/seq2seq/download_wmt.py b/examples/seq2seq/download_wmt.py
old mode 100644
new mode 100755
index efe5ff0b9129f5..bef04726c45ede
--- a/examples/seq2seq/download_wmt.py
+++ b/examples/seq2seq/download_wmt.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 from pathlib import Path
 
 import fire
diff --git a/examples/seq2seq/finetune.py b/examples/seq2seq/finetune.py
old mode 100644
new mode 100755
index f54f15c1d55be3..1f5a7b6e10593c
--- a/examples/seq2seq/finetune.py
+++ b/examples/seq2seq/finetune.py
@@ -1,7 +1,10 @@
+#!/usr/bin/env python
+
 import argparse
 import glob
 import logging
 import os
+import sys
 import time
 from collections import defaultdict
 from pathlib import Path
@@ -13,7 +16,6 @@
 from torch.utils.data import DataLoader
 
 from callbacks import Seq2SeqLoggingCallback, get_checkpoint_callback, get_early_stopping_callback
-from lightning_base import BaseTransformer, add_generic_args, generic_train
 from transformers import MBartTokenizer, T5ForConditionalGeneration
 from transformers.modeling_bart import shift_tokens_right
 from utils import (
@@ -35,6 +37,11 @@
 )
 
 
+# need the parent dir scripts
+sys.path.insert(2, str(Path(__file__).resolve().parents[1]))
+from lightning_base import BaseTransformer, add_generic_args, generic_train  # noqa
+
+
 logger = logging.getLogger(__name__)
 
 
diff --git a/examples/seq2seq/finetune.sh b/examples/seq2seq/finetune.sh
index 4d140db48e0a52..683c2d7752df13 100755
--- a/examples/seq2seq/finetune.sh
+++ b/examples/seq2seq/finetune.sh
@@ -1,6 +1,3 @@
-# Add parent directory to python path to access lightning_base.py
-export PYTHONPATH="../":"${PYTHONPATH}"
-
 # the proper usage is documented in the README, you need to specify data_dir, output_dir and model_name_or_path
 # run ./finetune.sh --help to see all the possible options
 python finetune.py \
diff --git a/examples/seq2seq/minify_dataset.py b/examples/seq2seq/minify_dataset.py
old mode 100644
new mode 100755
index da70ced60ab481..c441db565c7f68
--- a/examples/seq2seq/minify_dataset.py
+++ b/examples/seq2seq/minify_dataset.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 from pathlib import Path
 
 import fire
diff --git a/examples/seq2seq/pack_dataset.py b/examples/seq2seq/pack_dataset.py
old mode 100644
new mode 100755
index 4274054a920212..11351b75a7b3d0
--- a/examples/seq2seq/pack_dataset.py
+++ b/examples/seq2seq/pack_dataset.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 """Fill examples with bitext up to max_tokens without breaking up examples.
 [['I went', 'yo fui'],
 ['to the store', 'a la tienda']
diff --git a/examples/seq2seq/run_distributed_eval.py b/examples/seq2seq/run_distributed_eval.py
old mode 100644
new mode 100755
index e8218e19171be2..985c02929bbebf
--- a/examples/seq2seq/run_distributed_eval.py
+++ b/examples/seq2seq/run_distributed_eval.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 import argparse
 import shutil
 import time
diff --git a/examples/seq2seq/run_eval.py b/examples/seq2seq/run_eval.py
old mode 100644
new mode 100755
index 09ff4c9a53f5cc..0269ea24385c30
--- a/examples/seq2seq/run_eval.py
+++ b/examples/seq2seq/run_eval.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 import argparse
 import datetime
 import json
diff --git a/examples/seq2seq/run_eval_search.py b/examples/seq2seq/run_eval_search.py
old mode 100644
new mode 100755
index 2a819e169f9d34..292918c9f3d93e
--- a/examples/seq2seq/run_eval_search.py
+++ b/examples/seq2seq/run_eval_search.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 import argparse
 import itertools
 import operator
diff --git a/examples/seq2seq/save_len_file.py b/examples/seq2seq/save_len_file.py
old mode 100644
new mode 100755
index edd2cf07a670b0..15413cab165428
--- a/examples/seq2seq/save_len_file.py
+++ b/examples/seq2seq/save_len_file.py
@@ -1,14 +1,11 @@
+#!/usr/bin/env python
+
 import fire
 from torch.utils.data import DataLoader
 from tqdm import tqdm
 
 from transformers import AutoTokenizer
-
-
-try:
-    from .utils import Seq2SeqDataset, pickle_save
-except ImportError:
-    from utils import Seq2SeqDataset, pickle_save
+from utils import Seq2SeqDataset, pickle_save
 
 
 def save_len_file(
diff --git a/examples/seq2seq/test_bash_script.py b/examples/seq2seq/test_bash_script.py
index 7d163d1c35dc84..45c928ad33c8c4 100644
--- a/examples/seq2seq/test_bash_script.py
+++ b/examples/seq2seq/test_bash_script.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 import argparse
 import os
 import sys
diff --git a/examples/seq2seq/test_data/wmt_en_ro/train.len b/examples/seq2seq/test_data/wmt_en_ro/train.len
index 33ce003c8ae3139914a389a714812a2ab13aece4..2632a33e8b8a3a601bdf93f3b8e6783a0c7bba60 100644
GIT binary patch
literal 34
lcmZo*nJUfz0kKmwye+)ry(fEDdzX5%dDnR7dRM3F0RV-J2?PKD

literal 26
hcmZo*jxA)+@V4-d_nz!s?Op24=3V2R>s_6y2LNfV2s8iy

diff --git a/examples/seq2seq/test_data/wmt_en_ro/val.len b/examples/seq2seq/test_data/wmt_en_ro/val.len
index 897314a960b28d927b597805693e63f9de71d903..fdf8fa353eb8d41a6d92ccae9df4b2f8aaf970b5 100644
GIT binary patch
delta 19
WcmdNe;B8=;s>%QXu~R0pDgyu?i~~*p

delta 11
ScmXreU~OQIEo7L;s|)}RKmu?8

diff --git a/examples/seq2seq/test_datasets.py b/examples/seq2seq/test_datasets.py
index 671543133e886f..aaf94fa5e0f732 100644
--- a/examples/seq2seq/test_datasets.py
+++ b/examples/seq2seq/test_datasets.py
@@ -6,14 +6,13 @@
 import pytest
 from torch.utils.data import DataLoader
 
+from pack_dataset import pack_data_dir
+from save_len_file import save_len_file
+from test_seq2seq_examples import ARTICLES, BART_TINY, MARIAN_TINY, MBART_TINY, SUMMARIES, T5_TINY, make_test_data_dir
 from transformers import AutoTokenizer
 from transformers.modeling_bart import shift_tokens_right
 from transformers.testing_utils import slow
-
-from .pack_dataset import pack_data_dir
-from .save_len_file import save_len_file
-from .test_seq2seq_examples import ARTICLES, BART_TINY, MARIAN_TINY, MBART_TINY, SUMMARIES, T5_TINY, make_test_data_dir
-from .utils import FAIRSEQ_AVAILABLE, DistributedSortishSampler, LegacySeq2SeqDataset, Seq2SeqDataset
+from utils import FAIRSEQ_AVAILABLE, DistributedSortishSampler, LegacySeq2SeqDataset, Seq2SeqDataset
 
 
 BERT_BASE_CASED = "bert-base-cased"
diff --git a/examples/seq2seq/test_fsmt_bleu_score.py b/examples/seq2seq/test_fsmt_bleu_score.py
index 95f475698f6f8f..beb7f2bc9857fd 100644
--- a/examples/seq2seq/test_fsmt_bleu_score.py
+++ b/examples/seq2seq/test_fsmt_bleu_score.py
@@ -14,19 +14,13 @@
 # limitations under the License.
 
 import io
-import unittest
-
-
-try:
-    from .utils import calculate_bleu
-except ImportError:
-    from utils import calculate_bleu
-
 import json
+import unittest
 
 from parameterized import parameterized
 from transformers import FSMTForConditionalGeneration, FSMTTokenizer
 from transformers.testing_utils import get_tests_dir, require_torch, slow, torch_device
+from utils import calculate_bleu
 
 
 filename = get_tests_dir() + "/test_data/fsmt/fsmt_val_data.json"

From ee9e6ec0e3b20a5dd27f392010b9c849299ce12d Mon Sep 17 00:00:00 2001
From: Stas Bekman <stas@stason.org>
Date: Sun, 20 Sep 2020 15:35:19 -0700
Subject: [PATCH 2/4] another script

---
 examples/seq2seq/distillation.py | 7 ++++++-
 examples/seq2seq/finetune.py     | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/examples/seq2seq/distillation.py b/examples/seq2seq/distillation.py
index 1ffd02e9a18450..2ad6d32a6dc91d 100755
--- a/examples/seq2seq/distillation.py
+++ b/examples/seq2seq/distillation.py
@@ -3,6 +3,7 @@
 import argparse
 import gc
 import os
+import sys
 import warnings
 from pathlib import Path
 from typing import List
@@ -15,7 +16,6 @@
 from finetune import SummarizationModule, TranslationModule
 from finetune import main as ft_main
 from initialization_utils import copy_layers, init_student
-from lightning_base import generic_train
 from transformers import AutoModelForSeq2SeqLM, MBartTokenizer, T5Config, T5ForConditionalGeneration
 from transformers.modeling_bart import shift_tokens_right
 from utils import (
@@ -29,6 +29,11 @@
 )
 
 
+# need the parent dir module
+sys.path.insert(2, str(Path(__file__).resolve().parents[1]))
+from lightning_base import generic_train  # noqa
+
+
 class BartSummarizationDistiller(SummarizationModule):
     """Supports Bart, Pegasus and other models that inherit from Bart."""
 
diff --git a/examples/seq2seq/finetune.py b/examples/seq2seq/finetune.py
index 1f5a7b6e10593c..ae75753cba61b2 100755
--- a/examples/seq2seq/finetune.py
+++ b/examples/seq2seq/finetune.py
@@ -37,7 +37,7 @@
 )
 
 
-# need the parent dir scripts
+# need the parent dir module
 sys.path.insert(2, str(Path(__file__).resolve().parents[1]))
 from lightning_base import BaseTransformer, add_generic_args, generic_train  # noqa
 

From b318640b66981e8fbe525a77ce9cc231b67f9072 Mon Sep 17 00:00:00 2001
From: Stas Bekman <stas00@users.noreply.github.com>
Date: Thu, 24 Sep 2020 12:09:42 -0700
Subject: [PATCH 3/4] oops

---
 examples/seq2seq/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/seq2seq/README.md b/examples/seq2seq/README.md
index bd0fd1a9d4c255..f32d7ab7dc080f 100644
--- a/examples/seq2seq/README.md
+++ b/examples/seq2seq/README.md
@@ -92,7 +92,7 @@ All finetuning bash scripts call finetune.py (or distillation.py) with reasonabl
 To see all the possible command line options, run:
 
 ```bash
- --help  # this calls ./finetune.py --help
+ ./finetune --help 
 ```
 
 ### Finetuning Training Params

From c855ebe163e8d4338c37ab33d50206bb302fb9ca Mon Sep 17 00:00:00 2001
From: Stas Bekman <stas00@users.noreply.github.com>
Date: Thu, 24 Sep 2020 12:10:19 -0700
Subject: [PATCH 4/4] typo

---
 examples/seq2seq/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/seq2seq/README.md b/examples/seq2seq/README.md
index f32d7ab7dc080f..bdc675e0870ba0 100644
--- a/examples/seq2seq/README.md
+++ b/examples/seq2seq/README.md
@@ -92,7 +92,7 @@ All finetuning bash scripts call finetune.py (or distillation.py) with reasonabl
 To see all the possible command line options, run:
 
 ```bash
- ./finetune --help 
+ ./finetune.py --help 
 ```
 
 ### Finetuning Training Params