huggingface · ArthurZucker · Dec 7, 2023 · Jun 6, 2023 · Jun 6, 2023 · Jun 21, 2023
diff --git a/docs/source/en/index.md b/docs/source/en/index.md
@@ -1,4 +1,4 @@
-<!--Copyright 2020 The HuggingFace Team. All rights reserved.
+ <!--Copyright 2020 The HuggingFace Team. All rights reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
@@ -94,7 +94,7 @@ Flax), PyTorch, and/or TensorFlow.
 | [CLIPSeg](model_doc/clipseg) | ✅ | ❌ | ❌ |
 | [CLVP](model_doc/clvp) | ✅ | ❌ | ❌ |
 | [CodeGen](model_doc/codegen) | ✅ | ❌ | ❌ |
-| [CodeLlama](model_doc/code_llama) | ✅ | ❌ | ❌ |
+| [CodeLlama](model_doc/code_llama) | ✅ | ❌ | ✅ |
 | [Conditional DETR](model_doc/conditional_detr) | ✅ | ❌ | ❌ |
 | [ConvBERT](model_doc/convbert) | ✅ | ✅ | ❌ |
 | [ConvNeXT](model_doc/convnext) | ✅ | ✅ | ❌ |
@@ -167,8 +167,8 @@ Flax), PyTorch, and/or TensorFlow.
 | [LED](model_doc/led) | ✅ | ✅ | ❌ |
 | [LeViT](model_doc/levit) | ✅ | ❌ | ❌ |
 | [LiLT](model_doc/lilt) | ✅ | ❌ | ❌ |
-| [LLaMA](model_doc/llama) | ✅ | ❌ | ❌ |
-| [Llama2](model_doc/llama2) | ✅ | ❌ | ❌ |
+| [LLaMA](model_doc/llama) | ✅ | ❌ | ✅ |
+| [Llama2](model_doc/llama2) | ✅ | ❌ | ✅ |
 | [Longformer](model_doc/longformer) | ✅ | ✅ | ❌ |
 | [LongT5](model_doc/longt5) | ✅ | ❌ | ✅ |
 | [LUKE](model_doc/luke) | ✅ | ❌ | ❌ |

diff --git a/docs/source/en/model_doc/llama.md b/docs/source/en/model_doc/llama.md
@@ -50,6 +50,9 @@ come in several checkpoints they each contain a part of each weight of the model
 
 - The LLaMA tokenizer is a BPE model based on [sentencepiece](https://github.com/google/sentencepiece). One quirk of sentencepiece is that when decoding a sequence, if the first token is the start of the word (e.g. "Banana"), the tokenizer does not prepend the prefix space to the string.
 
+This model was contributed by [zphang](https://huggingface.co/zphang) with contributions from [BlackSamorez](https://huggingface.co/BlackSamorez). The code of the implementation in Hugging Face is based on GPT-NeoX [here](https://github.com/EleutherAI/gpt-neox). The original code of the authors can be found [here](https://github.com/facebookresearch/llama). The Flax version of the implementation was contributed by [afmck](https://huggingface.co/afmck) with the code in the implementation based on Hugging Face's Flax GPT-Neo.
+
+
 Based on the original LLaMA model, Meta AI has released some follow-up works:
 
 - **Llama2**: Llama2 is an improved version of Llama with some architectural tweaks (Grouped Query Attention), and is pre-trained on 2Trillion tokens. Refer to the documentation of Llama2 which can be found [here](llama2).
@@ -112,3 +115,13 @@ A list of official Hugging Face and community (indicated by 🌎) resources to h
 
 [[autodoc]] LlamaForSequenceClassification
  - forward
+
+## FlaxLlamaModel
+
+[[autodoc]] FlaxLlamaModel
+ - __call__
+
+## FlaxLlamaForCausalLM
+
+[[autodoc]] FlaxLlamaForCausalLM
+ - __call__
diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
@@ -4191,6 +4191,7 @@
  ["FlaxGPTNeoForCausalLM", "FlaxGPTNeoModel", "FlaxGPTNeoPreTrainedModel"]
  )
  _import_structure["models.gptj"].extend(["FlaxGPTJForCausalLM", "FlaxGPTJModel", "FlaxGPTJPreTrainedModel"])
+ _import_structure["models.llama"].extend(["FlaxLlamaForCausalLM", "FlaxLlamaModel", "FlaxLlamaPreTrainedModel"])
  _import_structure["models.longt5"].extend(
  ["FlaxLongT5ForConditionalGeneration", "FlaxLongT5Model", "FlaxLongT5PreTrainedModel"]
  )
@@ -7788,6 +7789,7 @@
  from .models.gpt2 import FlaxGPT2LMHeadModel, FlaxGPT2Model, FlaxGPT2PreTrainedModel
  from .models.gpt_neo import FlaxGPTNeoForCausalLM, FlaxGPTNeoModel, FlaxGPTNeoPreTrainedModel
  from .models.gptj import FlaxGPTJForCausalLM, FlaxGPTJModel, FlaxGPTJPreTrainedModel
+ from .models.llama import FlaxLlamaForCausalLM, FlaxLlamaModel, FlaxLlamaPreTrainedModel
  from .models.longt5 import FlaxLongT5ForConditionalGeneration, FlaxLongT5Model, FlaxLongT5PreTrainedModel
  from .models.marian import FlaxMarianModel, FlaxMarianMTModel, FlaxMarianPreTrainedModel
  from .models.mbart import (

diff --git a/src/transformers/models/auto/modeling_flax_auto.py b/src/transformers/models/auto/modeling_flax_auto.py
@@ -43,6 +43,7 @@
  ("gpt2", "FlaxGPT2Model"),
  ("gpt_neo", "FlaxGPTNeoModel"),
  ("gptj", "FlaxGPTJModel"),
+ ("llama", "FlaxLlamaModel"),
  ("longt5", "FlaxLongT5Model"),
  ("marian", "FlaxMarianModel"),
  ("mbart", "FlaxMBartModel"),
@@ -146,6 +147,7 @@
  ("gpt2", "FlaxGPT2LMHeadModel"),
  ("gpt_neo", "FlaxGPTNeoForCausalLM"),
  ("gptj", "FlaxGPTJForCausalLM"),
+ ("llama", "FlaxLlamaForCausalLM"),
  ("opt", "FlaxOPTForCausalLM"),
  ("roberta", "FlaxRobertaForCausalLM"),
  ("roberta-prelayernorm", "FlaxRobertaPreLayerNormForCausalLM"),

diff --git a/src/transformers/models/llama/__init__.py b/src/transformers/models/llama/__init__.py
@@ -16,6 +16,7 @@
 from ...utils import (
  OptionalDependencyNotAvailable,
  _LazyModule,
+ is_flax_available,
  is_sentencepiece_available,
  is_tokenizers_available,
  is_torch_available,
@@ -55,6 +56,14 @@
  "LlamaForSequenceClassification",
  ]
 
+try:
+ if not is_flax_available():
+ raise OptionalDependencyNotAvailable()
+except OptionalDependencyNotAvailable:
+ pass
+else:
+ _import_structure["modeling_flax_llama"] = ["FlaxLlamaForCausalLM", "FlaxLlamaModel", "FlaxLlamaPreTrainedModel"]
+
 
 if TYPE_CHECKING:
  from .configuration_llama import LLAMA_PRETRAINED_CONFIG_ARCHIVE_MAP, LlamaConfig
@@ -83,6 +92,14 @@
  else:
  from .modeling_llama import LlamaForCausalLM, LlamaForSequenceClassification, LlamaModel, LlamaPreTrainedModel
 
+ try:
+ if not is_flax_available():
+ raise OptionalDependencyNotAvailable()
+ except OptionalDependencyNotAvailable:
+ pass
+ else:
+ from .modeling_flax_llama import FlaxLlamaForCausalLM, FlaxLlamaModel, FlaxLlamaPreTrainedModel
+
 
 else:
  import sys