From 8f21968f40306ce3d7bd69cb085a91b35f033f57 Mon Sep 17 00:00:00 2001 From: loubnabnl Date: Thu, 18 Aug 2022 09:08:06 +0000 Subject: [PATCH 01/10] add examples subfolder --- .../codeparrot/examples/README.md | 16 ++ .../codeparrot/examples/requirements.txt | 5 + .../examples/train_complexity_predictor.py | 154 ++++++++++++++++++ 3 files changed, 175 insertions(+) create mode 100644 examples/research_projects/codeparrot/examples/README.md create mode 100644 examples/research_projects/codeparrot/examples/requirements.txt create mode 100644 examples/research_projects/codeparrot/examples/train_complexity_predictor.py diff --git a/examples/research_projects/codeparrot/examples/README.md b/examples/research_projects/codeparrot/examples/README.md new file mode 100644 index 000000000000..7c8e59688618 --- /dev/null +++ b/examples/research_projects/codeparrot/examples/README.md @@ -0,0 +1,16 @@ +# Examples +In this folder we showcase some examples to use code models for downstream tasks. + +## Complexity prediction +In this task we want to predict the complexity of Java programs in [CodeComplex](https://huggingface.co/datasets/codeparrot/codecomplex) dataset. Using Hugging Face `trainer`, we finetuned [multilingual CodeParrot](https://huggingface.co/codeparrot/codeparrot-small-multi) and [UniXcoder](https://huggingface.co/microsoft/unixcoder-base-nine) on it, and we used the latter to build this Java complexity prediction [space](https://huggingface.co/spaces/codeparrot/code-complexity-predictor) on Hugging Face hub. + +To fine-tune a model on this dataset you can use the following commands: + +```python +python train_complexity_predictor.py \ + --model_ckpt microsoft/unixcoder-base-nine \ + --num_epochs 60 \ + --num_warmup_steps 10 \ + --batch_size 8 \ + --learning_rate 5e-4 +``` \ No newline at end of file diff --git a/examples/research_projects/codeparrot/examples/requirements.txt b/examples/research_projects/codeparrot/examples/requirements.txt new file mode 100644 index 000000000000..997334e27e18 --- /dev/null +++ b/examples/research_projects/codeparrot/examples/requirements.txt @@ -0,0 +1,5 @@ +datasets==2.3.2 +transformers==4.21.1 +wandb==0.13.1 +evaluate==0.2.2 +scikit-learn==1.1.2 \ No newline at end of file diff --git a/examples/research_projects/codeparrot/examples/train_complexity_predictor.py b/examples/research_projects/codeparrot/examples/train_complexity_predictor.py new file mode 100644 index 000000000000..fa3e09fd602a --- /dev/null +++ b/examples/research_projects/codeparrot/examples/train_complexity_predictor.py @@ -0,0 +1,154 @@ +import numpy as np +import argparse +from copy import deepcopy + +from torch.optim import AdamW +from datasets import load_dataset, DatasetDict, ClassLabel +from evaluate import load +from transformers import ( + AutoModelForSequenceClassification, + AutoTokenizer, + TrainingArguments, + Trainer, + TrainerCallback, + DataCollatorWithPadding, + get_scheduler, + set_seed, +) + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--model_ckpt", type=str, default="microsoft/unixcoder-base-nine") + parser.add_argument("--num_epochs", type=int, default=5) + parser.add_argument("--batch_size", type=int, default=6) + parser.add_argument("--gradient_accumulation_steps", type=int, default=1) + parser.add_argument("--freeze", type=bool, default=True) + parser.add_argument("--learning_rate", type=float, default=5e-4) + parser.add_argument("--seed", type=int, default=0) + parser.add_argument("--lr_scheduler_type", type=str, default="cosine") + parser.add_argument("--num_warmup_steps", type=int, default=10) + parser.add_argument("--weight_decay", type=float, default=0.01) + return parser.parse_args() + + +metric = load("accuracy") + + +def compute_metrics(eval_pred): + predictions, labels = eval_pred + predictions = np.argmax(predictions, axis=1) + return metric.compute(predictions=predictions, references=labels) + + +def get_grouped_params(model, args, no_decay=["bias", "ln_1.weight", "ln_2.weight", "ln_f.weight"]): + params_with_wd, params_without_wd = [], [] + for n, p in model.named_parameters(): + if any(nd in n for nd in no_decay): + params_without_wd.append(p) + else: + params_with_wd.append(p) + return [ + {"params": params_with_wd, "weight_decay": args.weight_decay}, + {"params": params_without_wd, "weight_decay": 0.0}, + ] + + +class CustomCallback(TrainerCallback): + def __init__(self, trainer) -> None: + super().__init__() + self._trainer = trainer + + def on_epoch_end(self, args, state, control, **kwargs): + if control.should_evaluate: + control_copy = deepcopy(control) + self._trainer.evaluate(eval_dataset=self._trainer.train_dataset, metric_key_prefix="train") + return control_copy + + +def main(): + args = get_args() + set_seed(args.seed) + + dataset = load_dataset("codeparrot/codecomplex", split="train") + train_test = dataset.train_test_split(test_size=0.2) + test_validation = train_test["test"].train_test_split(test_size=0.5) + train_test_validation = DatasetDict( + { + "train": train_test["train"], + "test": test_validation["train"], + "valid": test_validation["test"], + } + ) + + print("Loading tokenizer and model") + tokenizer = AutoTokenizer.from_pretrained(args.model_ckpt) + tokenizer.pad_token = tokenizer.eos_token + model = AutoModelForSequenceClassification.from_pretrained(args.model_ckpt, num_labels=7) + model.config.pad_token_id = model.config.eos_token_id + + if args.freeze: + for param in model.roberta.parameters(): + param.requires_grad = False + + labels = ClassLabel(num_classes=7, names=list(set(train_test_validation["train"]["complexity"]))) + + def tokenize(example): + inputs = tokenizer(example["src"], truncation=True, max_length=1024) + label = labels.str2int(example["complexity"]) + return { + "input_ids": inputs["input_ids"], + "attention_mask": inputs["attention_mask"], + "label": label, + } + + tokenized_datasets = train_test_validation.map( + tokenize, + batched=True, + remove_columns=train_test_validation["train"].column_names, + ) + data_collator = DataCollatorWithPadding(tokenizer=tokenizer) + + # Prepare the optimizer and learning rate scheduler + optimizer = AdamW(get_grouped_params(model, args), lr=args.learning_rate) + lr_scheduler = get_scheduler( + name=args.lr_scheduler_type, + optimizer=optimizer, + num_training_steps=args.num_epochs, + num_warmup_steps=args.num_warmup_steps, + ) + + training_args = TrainingArguments( + output_dir="./results_java", + learning_rate=args.learning_rate, + evaluation_strategy="epoch", + save_strategy="epoch", + logging_strategy="epoch", + per_device_train_batch_size=args.batch_size, + per_device_eval_batch_size=args.batch_size, + num_train_epochs=args.num_epochs, + gradient_accumulation_steps=args.gradient_accumulation_steps, + weight_decay=0.01, + metric_for_best_model="accuracy", + run_name="complexity-java", + report_to="wandb", + ) + + trainer = Trainer( + model=model, + args=training_args, + train_dataset=tokenized_datasets["train"], + eval_dataset=tokenized_datasets["valid"], + tokenizer=tokenizer, + data_collator=data_collator, + compute_metrics=compute_metrics, + optimizers=(optimizer, lr_scheduler), + ) + + print("Training...") + trainer.add_callback(CustomCallback(trainer)) + trainer.train() + + +if __name__ == "__main__": + main() From f8263d18df97e0710f04cdb87a350854e145b8ba Mon Sep 17 00:00:00 2001 From: loubnabnl Date: Thu, 18 Aug 2022 09:08:19 +0000 Subject: [PATCH 02/10] reformat file --- .../codeparrot/scripts/minhash_deduplication.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/research_projects/codeparrot/scripts/minhash_deduplication.py b/examples/research_projects/codeparrot/scripts/minhash_deduplication.py index cd72dcb70c9e..9e1ef11ff07d 100644 --- a/examples/research_projects/codeparrot/scripts/minhash_deduplication.py +++ b/examples/research_projects/codeparrot/scripts/minhash_deduplication.py @@ -63,7 +63,6 @@ def add(self, code_key: Tuple, min_hash: MinHash) -> None: self._index.insert(code_key, min_hash) if len(close_duplicates) > 0: - for base_duplicate in close_duplicates: if base_duplicate in self._duplicate_clusters: self._duplicate_clusters[base_duplicate].add(code_key) From 3794de6e54302162cad71170af03e89adbd0ffc4 Mon Sep 17 00:00:00 2001 From: Loubna Ben Allal <44069155+loubnabnl@users.noreply.github.com> Date: Thu, 18 Aug 2022 11:17:06 +0200 Subject: [PATCH 03/10] mention examples in codeparrot readme --- examples/research_projects/codeparrot/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/research_projects/codeparrot/README.md b/examples/research_projects/codeparrot/README.md index ef92606c545a..076ac245d1dd 100644 --- a/examples/research_projects/codeparrot/README.md +++ b/examples/research_projects/codeparrot/README.md @@ -12,7 +12,7 @@ This is an open-source effort to train and evaluate code generation models. Code - continuously push checkpoints to the hub with `huggingface_hub` - stream the dataset with `datasets` during training to avoid disk bottlenecks - apply the `code_eval` metric in `datasets` to evaluate on [OpenAI's _HumanEval_ benchmark](https://huggingface.co/datasets/openai_humaneval) - +- showcase examples for downstream tasks with code models in [examples](https://github.com/huggingface/transformers/tree/main/examples/research_projects/codeparrot/examples) folder ## Installation To install the dependencies simply run the following command: ```bash From c9612538a454f59fa0b5ee682e2c171855ee559f Mon Sep 17 00:00:00 2001 From: loubnabnl Date: Thu, 18 Aug 2022 09:33:33 +0000 Subject: [PATCH 04/10] reformat imports --- .../codeparrot/examples/train_complexity_predictor.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/research_projects/codeparrot/examples/train_complexity_predictor.py b/examples/research_projects/codeparrot/examples/train_complexity_predictor.py index fa3e09fd602a..b2ee7e5eb9f8 100644 --- a/examples/research_projects/codeparrot/examples/train_complexity_predictor.py +++ b/examples/research_projects/codeparrot/examples/train_complexity_predictor.py @@ -1,17 +1,18 @@ -import numpy as np import argparse from copy import deepcopy +import numpy as np +from datasets import ClassLabel, DatasetDict, load_dataset from torch.optim import AdamW -from datasets import load_dataset, DatasetDict, ClassLabel + from evaluate import load from transformers import ( AutoModelForSequenceClassification, AutoTokenizer, - TrainingArguments, + DataCollatorWithPadding, Trainer, TrainerCallback, - DataCollatorWithPadding, + TrainingArguments, get_scheduler, set_seed, ) From 7ab673e8a791dc56325c3b7afdb222c914cf9e39 Mon Sep 17 00:00:00 2001 From: loubnabnl Date: Thu, 18 Aug 2022 12:36:46 +0000 Subject: [PATCH 05/10] use Trainer optimizer and scheduler type and add output_dir as argument --- .../examples/train_complexity_predictor.py | 59 ++++++------------- 1 file changed, 19 insertions(+), 40 deletions(-) diff --git a/examples/research_projects/codeparrot/examples/train_complexity_predictor.py b/examples/research_projects/codeparrot/examples/train_complexity_predictor.py index b2ee7e5eb9f8..b7be3131c5e6 100644 --- a/examples/research_projects/codeparrot/examples/train_complexity_predictor.py +++ b/examples/research_projects/codeparrot/examples/train_complexity_predictor.py @@ -3,24 +3,18 @@ import numpy as np from datasets import ClassLabel, DatasetDict, load_dataset -from torch.optim import AdamW - from evaluate import load -from transformers import ( - AutoModelForSequenceClassification, - AutoTokenizer, - DataCollatorWithPadding, - Trainer, - TrainerCallback, - TrainingArguments, - get_scheduler, - set_seed, -) + +from transformers import (AutoModelForSequenceClassification, AutoTokenizer, + DataCollatorWithPadding, Trainer, TrainerCallback, + TrainingArguments, set_seed) def get_args(): parser = argparse.ArgumentParser() - parser.add_argument("--model_ckpt", type=str, default="microsoft/unixcoder-base-nine") + parser.add_argument( + "--model_ckpt", type=str, default="microsoft/unixcoder-base-nine" + ) parser.add_argument("--num_epochs", type=int, default=5) parser.add_argument("--batch_size", type=int, default=6) parser.add_argument("--gradient_accumulation_steps", type=int, default=1) @@ -30,6 +24,7 @@ def get_args(): parser.add_argument("--lr_scheduler_type", type=str, default="cosine") parser.add_argument("--num_warmup_steps", type=int, default=10) parser.add_argument("--weight_decay", type=float, default=0.01) + parser.add_argument("--output_dir", type=str, default="./results") return parser.parse_args() @@ -42,19 +37,6 @@ def compute_metrics(eval_pred): return metric.compute(predictions=predictions, references=labels) -def get_grouped_params(model, args, no_decay=["bias", "ln_1.weight", "ln_2.weight", "ln_f.weight"]): - params_with_wd, params_without_wd = [], [] - for n, p in model.named_parameters(): - if any(nd in n for nd in no_decay): - params_without_wd.append(p) - else: - params_with_wd.append(p) - return [ - {"params": params_with_wd, "weight_decay": args.weight_decay}, - {"params": params_without_wd, "weight_decay": 0.0}, - ] - - class CustomCallback(TrainerCallback): def __init__(self, trainer) -> None: super().__init__() @@ -63,7 +45,9 @@ def __init__(self, trainer) -> None: def on_epoch_end(self, args, state, control, **kwargs): if control.should_evaluate: control_copy = deepcopy(control) - self._trainer.evaluate(eval_dataset=self._trainer.train_dataset, metric_key_prefix="train") + self._trainer.evaluate( + eval_dataset=self._trainer.train_dataset, metric_key_prefix="train" + ) return control_copy @@ -85,14 +69,18 @@ def main(): print("Loading tokenizer and model") tokenizer = AutoTokenizer.from_pretrained(args.model_ckpt) tokenizer.pad_token = tokenizer.eos_token - model = AutoModelForSequenceClassification.from_pretrained(args.model_ckpt, num_labels=7) + model = AutoModelForSequenceClassification.from_pretrained( + args.model_ckpt, num_labels=7 + ) model.config.pad_token_id = model.config.eos_token_id if args.freeze: for param in model.roberta.parameters(): param.requires_grad = False - labels = ClassLabel(num_classes=7, names=list(set(train_test_validation["train"]["complexity"]))) + labels = ClassLabel( + num_classes=7, names=list(set(train_test_validation["train"]["complexity"])) + ) def tokenize(example): inputs = tokenizer(example["src"], truncation=True, max_length=1024) @@ -110,18 +98,10 @@ def tokenize(example): ) data_collator = DataCollatorWithPadding(tokenizer=tokenizer) - # Prepare the optimizer and learning rate scheduler - optimizer = AdamW(get_grouped_params(model, args), lr=args.learning_rate) - lr_scheduler = get_scheduler( - name=args.lr_scheduler_type, - optimizer=optimizer, - num_training_steps=args.num_epochs, - num_warmup_steps=args.num_warmup_steps, - ) - training_args = TrainingArguments( - output_dir="./results_java", + output_dir=args.output_dir, learning_rate=args.learning_rate, + lr_scheduler_type=args.lr_scheduler_type, evaluation_strategy="epoch", save_strategy="epoch", logging_strategy="epoch", @@ -143,7 +123,6 @@ def tokenize(example): tokenizer=tokenizer, data_collator=data_collator, compute_metrics=compute_metrics, - optimizers=(optimizer, lr_scheduler), ) print("Training...") From 1015a06e7663c1171d2e4fbe3da4d4c8758cfb28 Mon Sep 17 00:00:00 2001 From: loubnabnl Date: Thu, 18 Aug 2022 13:02:45 +0000 Subject: [PATCH 06/10] add example of text-to-python and python-to-text models --- .../codeparrot/examples/README.md | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/examples/research_projects/codeparrot/examples/README.md b/examples/research_projects/codeparrot/examples/README.md index 7c8e59688618..c1980262d827 100644 --- a/examples/research_projects/codeparrot/examples/README.md +++ b/examples/research_projects/codeparrot/examples/README.md @@ -13,4 +13,46 @@ python train_complexity_predictor.py \ --num_warmup_steps 10 \ --batch_size 8 \ --learning_rate 5e-4 +``` + +## Code generation: text to python +In this task we want to train a model to generate code from english text. We finetuned Codeparrot-small on [github-jupyter-text-to-code](https://huggingface.co/datasets/codeparrot/github-jupyter-text-to-code), a dataset where the samples are a succession of docstrings and their Python code, originally extracted from Jupyter notebooks parsed in this [dataset](https://huggingface.co/datasets/codeparrot/github-jupyter-parsed). + +To fine-tune a model on this dataset we use the same [script](https://github.com/huggingface/transformers/blob/main/examples/research_projects/codeparrot/scripts/codeparrot_training.py) as the pretraining of codeparrot: + +```python +accelerate launch scripts/codeparrot_training.py \ + --model_ckpt codeparrot/codeparrot-small \ + --dataset_name_train codeparrot/github-jupyter-text-to-code \ + --dataset_name_valid codeparrot/github-jupyter-text-to-code \ + --train_batch_size 12 \ + --valid_batch_size 12 \ + --learning_rate 5e-4 \ + --num_warmup_steps 100 \ + --gradient_accumulation 1 \ + --gradient_checkpointing False \ + --max_train_steps 3000 \ + --save_checkpoint_steps 200 \ + --save_dir jupyter-text-to-python +``` + +## Code explanation: python to text +In this task we want to train a model to explain python code. We finetuned Codeparrot-small on [github-jupyter-code-to-text](https://huggingface.co/datasets/codeparrot/github-jupyter-code-to-text), a dataset where the samples are a succession of Python code and its explanation as a docstring, we just inverted the order of text and code pairs in github-jupyter-code-to-text dataset and added the delimiters "Explanation:" and "End of explanation" inside the doctrings. + +To fine-tune a model on this dataset we use the same [script](https://github.com/huggingface/transformers/blob/main/examples/research_projects/codeparrot/scripts/codeparrot_training.py) as the pretraining of codeparrot: + +```python +accelerate launch scripts/codeparrot_training.py \ + --model_ckpt codeparrot/codeparrot-small \ + --dataset_name_train codeparrot/github-jupyter-code-to-text \ + --dataset_name_valid codeparrot/github-jupyter-code-to-text \ + --train_batch_size 12 \ + --valid_batch_size 12 \ + --learning_rate 5e-4 \ + --num_warmup_steps 100 \ + --gradient_accumulation 1 \ + --gradient_checkpointing False \ + --max_train_steps 3000 \ + --save_checkpoint_steps 200 \ + --save_dir jupyter-python-to-text ``` \ No newline at end of file From fa8a0df4c10a98557c7026d2cd96b311e8c2a8b2 Mon Sep 17 00:00:00 2001 From: loubnabnl Date: Thu, 18 Aug 2022 13:05:18 +0000 Subject: [PATCH 07/10] reformat imports --- .../examples/train_complexity_predictor.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/examples/research_projects/codeparrot/examples/train_complexity_predictor.py b/examples/research_projects/codeparrot/examples/train_complexity_predictor.py index b7be3131c5e6..9fbde6416571 100644 --- a/examples/research_projects/codeparrot/examples/train_complexity_predictor.py +++ b/examples/research_projects/codeparrot/examples/train_complexity_predictor.py @@ -5,9 +5,15 @@ from datasets import ClassLabel, DatasetDict, load_dataset from evaluate import load -from transformers import (AutoModelForSequenceClassification, AutoTokenizer, - DataCollatorWithPadding, Trainer, TrainerCallback, - TrainingArguments, set_seed) +from transformers import ( + AutoModelForSequenceClassification, + AutoTokenizer, + DataCollatorWithPadding, + Trainer, + TrainerCallback, + TrainingArguments, + set_seed, +) def get_args(): From da21da8183a98dad02a7f60d2b1abcf322a1e5ff Mon Sep 17 00:00:00 2001 From: loubnabnl Date: Thu, 18 Aug 2022 13:08:32 +0000 Subject: [PATCH 08/10] mention the downstream examples in the readme --- examples/research_projects/codeparrot/README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/examples/research_projects/codeparrot/README.md b/examples/research_projects/codeparrot/README.md index 076ac245d1dd..a227b29a1691 100644 --- a/examples/research_projects/codeparrot/README.md +++ b/examples/research_projects/codeparrot/README.md @@ -12,7 +12,11 @@ This is an open-source effort to train and evaluate code generation models. Code - continuously push checkpoints to the hub with `huggingface_hub` - stream the dataset with `datasets` during training to avoid disk bottlenecks - apply the `code_eval` metric in `datasets` to evaluate on [OpenAI's _HumanEval_ benchmark](https://huggingface.co/datasets/openai_humaneval) -- showcase examples for downstream tasks with code models in [examples](https://github.com/huggingface/transformers/tree/main/examples/research_projects/codeparrot/examples) folder +- showcase examples for downstream tasks with code models in [examples](https://github.com/huggingface/transformers/tree/main/examples/research_projects/codeparrot/examples) folder: + - Algorithmic complexity prediction + - Code eneration from english text + - Code explanation + ## Installation To install the dependencies simply run the following command: ```bash From 17e508d914bd8b7c7ead504f5fb995d55d225e4f Mon Sep 17 00:00:00 2001 From: loubnabnl Date: Thu, 18 Aug 2022 13:09:32 +0000 Subject: [PATCH 09/10] reformat code --- .../examples/train_complexity_predictor.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/examples/research_projects/codeparrot/examples/train_complexity_predictor.py b/examples/research_projects/codeparrot/examples/train_complexity_predictor.py index 9fbde6416571..8fc30b912468 100644 --- a/examples/research_projects/codeparrot/examples/train_complexity_predictor.py +++ b/examples/research_projects/codeparrot/examples/train_complexity_predictor.py @@ -3,8 +3,8 @@ import numpy as np from datasets import ClassLabel, DatasetDict, load_dataset -from evaluate import load +from evaluate import load from transformers import ( AutoModelForSequenceClassification, AutoTokenizer, @@ -18,9 +18,7 @@ def get_args(): parser = argparse.ArgumentParser() - parser.add_argument( - "--model_ckpt", type=str, default="microsoft/unixcoder-base-nine" - ) + parser.add_argument("--model_ckpt", type=str, default="microsoft/unixcoder-base-nine") parser.add_argument("--num_epochs", type=int, default=5) parser.add_argument("--batch_size", type=int, default=6) parser.add_argument("--gradient_accumulation_steps", type=int, default=1) @@ -51,9 +49,7 @@ def __init__(self, trainer) -> None: def on_epoch_end(self, args, state, control, **kwargs): if control.should_evaluate: control_copy = deepcopy(control) - self._trainer.evaluate( - eval_dataset=self._trainer.train_dataset, metric_key_prefix="train" - ) + self._trainer.evaluate(eval_dataset=self._trainer.train_dataset, metric_key_prefix="train") return control_copy @@ -75,18 +71,14 @@ def main(): print("Loading tokenizer and model") tokenizer = AutoTokenizer.from_pretrained(args.model_ckpt) tokenizer.pad_token = tokenizer.eos_token - model = AutoModelForSequenceClassification.from_pretrained( - args.model_ckpt, num_labels=7 - ) + model = AutoModelForSequenceClassification.from_pretrained(args.model_ckpt, num_labels=7) model.config.pad_token_id = model.config.eos_token_id if args.freeze: for param in model.roberta.parameters(): param.requires_grad = False - labels = ClassLabel( - num_classes=7, names=list(set(train_test_validation["train"]["complexity"])) - ) + labels = ClassLabel(num_classes=7, names=list(set(train_test_validation["train"]["complexity"]))) def tokenize(example): inputs = tokenizer(example["src"], truncation=True, max_length=1024) From 6f49ca4f5b0837fc4c1053f8cea1f89215d152ad Mon Sep 17 00:00:00 2001 From: loubnabnl Date: Thu, 18 Aug 2022 13:18:22 +0000 Subject: [PATCH 10/10] fix typo --- examples/research_projects/codeparrot/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/research_projects/codeparrot/README.md b/examples/research_projects/codeparrot/README.md index a227b29a1691..6c57c4350fbc 100644 --- a/examples/research_projects/codeparrot/README.md +++ b/examples/research_projects/codeparrot/README.md @@ -14,7 +14,7 @@ This is an open-source effort to train and evaluate code generation models. Code - apply the `code_eval` metric in `datasets` to evaluate on [OpenAI's _HumanEval_ benchmark](https://huggingface.co/datasets/openai_humaneval) - showcase examples for downstream tasks with code models in [examples](https://github.com/huggingface/transformers/tree/main/examples/research_projects/codeparrot/examples) folder: - Algorithmic complexity prediction - - Code eneration from english text + - Code generation from english text - Code explanation ## Installation