clean underperforming models (#2563)

intel · Dec 5, 2024 · aee3d8f · aee3d8f
1 parent 8518549
commit aee3d8f
Show file tree

Hide file tree

Showing 41 changed files with 37 additions and 4,329 deletions.
diff --git a/README.md b/README.md
@@ -70,14 +70,12 @@ For best performance on Intel® Data Center GPU Flex and Max Series, please chec
 | [BERT large](https://arxiv.org/pdf/1810.04805.pdf) [Sapphire Rapids](https://www.intel.com/content/www/us/en/newsroom/opinion/updates-next-gen-data-center-platform-sapphire-rapids.html#gs.blowcx) | Tensorflow | Training | [FP32 BFloat16 BFloat32](/quickstart/language_modeling/tensorflow/bert_large/training/cpu/README.md) | [SQuAD](https://github.com/IntelAI/models/tree/master/datasets/bert_data/README.md#inference) |
 | [BERT large (Hugging Face)](https://arxiv.org/pdf/1810.04805.pdf) | TensorFlow | Inference | [FP32 FP16 BFloat16 BFloat32](/benchmarks/language_modeling/tensorflow/bert_large_hf/inference/README.md) | [SQuAD](https://github.com/IntelAI/models/tree/master/datasets/bert_data/README.md#inference) |
 | [BERT large](https://arxiv.org/pdf/1810.04805.pdf)   | PyTorch | Inference | [FP32 Int8 BFloat16 BFloat32](/models_v2/pytorch/bert_large/inference/cpu/README.md) | BERT Large SQuAD1.1 |
-| [BERT large](https://arxiv.org/pdf/1810.04805.pdf)   | PyTorch | Training  | [FP32 BFloat16 BFloat32](/models_v2/pytorch/bert_large/training/cpu/README.md) | [preprocessed text dataset](https://drive.google.com/drive/folders/1cywmDnAsrP5-2vsr8GDc6QUc7VWe-M3v) |
 | [DistilBERT base](https://arxiv.org/abs/1910.01108)  | PyTorch | Inference | [FP32 BF32 BF16Int8-FP32 Int8-BFloat16 BFloat32](/models_v2/pytorch/distilbert/inference/cpu/README.md) | [ DistilBERT Base SQuAD1.1](https://huggingface.co/distilbert-base-uncased-distilled-squad) |
 | [RNN-T](https://arxiv.org/abs/2007.15188)            | PyTorch | Inference | [FP32 BFloat16 BFloat32](/models_v2/pytorch/rnnt/inference/cpu/README.md) | [RNN-T dataset](/models_v2/pytorch/rnnt/inference/cpu/download_dataset.sh) |
 | [RNN-T](https://arxiv.org/abs/2007.15188)            | PyTorch | Training  | [FP32 BFloat16 BFloat32](/models_v2/pytorch/rnnt/training/cpu/README.md) | [RNN-T dataset](/models_v2/pytorch/rnnt/training/cpu/download_dataset.sh) |
 | [GPTJ 6B](https://huggingface.co/EleutherAI/gpt-j-6b) | PyTorch | Inference | [FP32 FP16 BFloat16 BF32 INT8](/models_v2/pytorch/gptj/inference/cpu/README.md) | |
 | [GPTJ 6B MLPerf](https://github.com/mlcommons/inference/tree/master/language/gpt-j#datasets--models) | PyTorch | Inference | [INT4](/models_v2/pytorch/gpt-j_mlperf/inference/cpu/README.md) | [CNN-Daily Mail dataset](https://huggingface.co/datasets/cnn_dailymail)|
 | [LLAMA2 7B](https://huggingface.co/meta-llama/Llama-2-7b-hf) | PyTorch | Inference | [FP32 FP16 BFloat16 BF32 INT8](/models_v2/pytorch/llama/inference/cpu/README.md) | |
-| [LLAMA2 7B](https://huggingface.co/meta-llama/Llama-2-7b-hf) | PyTorch | Training | [FP32 FP16 BFloat16 BF32](/models_v2/pytorch/llama/training/cpu/README.md) | |
 | [LLAMA2 13B](https://huggingface.co/meta-llama/Llama-2-13b-hf) | PyTorch | Inference | [FP32 FP16 BFloat16 BF32 INT8](/models_v2/pytorch/llama/inference/cpu/README.md) | |
 | [ChatGLMv3 6B](https://huggingface.co/THUDM/chatglm3-6b) | PyTorch | Inference | [FP32 FP16 BFloat16 BF32 INT8](/models_v2/pytorch/chatglm/inference/cpu/README.md) | |
 

diff --git a/docker/pytorch/docker-compose.yml b/docker/pytorch/docker-compose.yml
@@ -32,15 +32,15 @@ services:
       dockerfile: docker/pytorch/bert_large/inference/cpu/pytorch-bert-large-inference.Dockerfile-${BASE_IMAGE_NAME:-ubuntu}
     command: >
       bash -c "python -c 'import torch; import intel_extension_for_pytorch as ipex; print(\"torch:\", torch.__version__, \" ipex:\",ipex.__version__)'"
-  bert_large-training-cpu:
-    image: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-language-modeling-bert-large-training
-    pull_policy: always
-    build:
-      context: ../../
-      dockerfile: docker/pytorch/bert_large/training/cpu/pytorch-bert-large-training.Dockerfile-${BASE_IMAGE_NAME:-ubuntu}
-    extends: bert_large-inference-cpu
-    command: >
-      bash -c "python -c 'import torch; import intel_extension_for_pytorch as ipex; print(\"torch:\", torch.__version__, \" ipex:\",ipex.__version__)'"
+  # bert_large-training-cpu:
+  #   image: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-language-modeling-bert-large-training
+  #   pull_policy: always
+  #   build:
+  #     context: ../../
+  #     dockerfile: docker/pytorch/bert_large/training/cpu/pytorch-bert-large-training.Dockerfile-${BASE_IMAGE_NAME:-ubuntu}
+  #   extends: bert_large-inference-cpu
+  #   command: >
+  #     bash -c "python -c 'import torch; import intel_extension_for_pytorch as ipex; print(\"torch:\", torch.__version__, \" ipex:\",ipex.__version__)'"
   maskrcnn-inference-cpu:
     image: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-object-detection-maskrcnn-inference
     pull_policy: always
@@ -185,15 +185,15 @@ services:
     extends: bert_large-inference-cpu
     command: >
       bash -c "python -c 'import torch; import intel_extension_for_pytorch as ipex; print(\"torch:\", torch.__version__, \" ipex:\",ipex.__version__)'"
-  llama-training-cpu:
-    image: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-generative-ai-llama-training
-    pull_policy: always
-    build:
-      context: ../../
-      dockerfile: docker/pytorch/llama/training/cpu/pytorch-llama-training.Dockerfile-${BASE_IMAGE_NAME:-ubuntu}
-    extends: bert_large-inference-cpu
-    command: >
-      bash -c "python -c 'import torch; import intel_extension_for_pytorch as ipex; print(\"torch:\", torch.__version__, \" ipex:\",ipex.__version__)'"
+  # llama-training-cpu:
+  #   image: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-generative-ai-llama-training
+  #   pull_policy: always
+  #   build:
+  #     context: ../../
+  #     dockerfile: docker/pytorch/llama/training/cpu/pytorch-llama-training.Dockerfile-${BASE_IMAGE_NAME:-ubuntu}
+  #   extends: bert_large-inference-cpu
+  #   command: >
+  #     bash -c "python -c 'import torch; import intel_extension_for_pytorch as ipex; print(\"torch:\", torch.__version__, \" ipex:\",ipex.__version__)'"
   vit-inference-cpu:
     image: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-image-recognition-vit-inference
     pull_policy: always

diff --git a/docs/general/CPU_DEVCATALOG.md b/docs/general/CPU_DEVCATALOG.md
@@ -13,7 +13,6 @@ The tables below link to documentation on how to run each use case using docker
 | --------| ------------------------------------------------------ | ---------- | ------| --------------------- |
 | PyTorch | [GPT-J](../../models_v2/pytorch/gptj/inference/cpu/CONTAINER.md) | FP32,BF32,BF16,FP16,INT8-FP32 | Inference | LAMBADA |
 | PyTorch | [Llama 2](../../models_v2/pytorch/llama/inference/cpu/CONTAINER.md) 7B,13B | FP32,BF32,BF16,FP16,INT8-FP32 | Inference | LAMBADA |
-| PyTorch | [Llama 2](../../models_v2/pytorch/llama/training/cpu/CONTAINER.md) 7B | FP32,BF32,BF16,FP16 | Training | LAMBADA | 
 | PyTorch | [ChatGLM](../../models_v2/pytorch/chatglm/inference/cpu/CONTAINER.md) | FP32,BF32,BF16,FP16,INT8-FP32 | Inference | LAMBADA | 
 | PyTorch | [LCM](../../models_v2/pytorch/LCM/inference/cpu/CONTAINER.md) |  FP32,BF32,BF16,FP16,INT8-FP32,INT8-BF16 | Inference | COCO 2017 |
 | PyTorch | [Stable Diffusion](../../models_v2/pytorch/stable_diffusion/inference/cpu/CONTAINER.md) | FP32,BF32,BF16,FP16,INT8-FP32,INT8-BF16 | Inference | COCO 2017 |
@@ -40,7 +39,6 @@ The tables below link to documentation on how to run each use case using docker
 
 | Framework | Model                                                  | Precisions | Mode |  Dataset |
 | --------| ------------------------------------------------------ | ---------- | ------| --------------------- |
-| PyTorch | [BERT large](../../models_v2/pytorch/bert_large/training/cpu/CONTAINER.md) | FP32,BF32,BF16,FP16 | Training | Preprocessed Text dataset |
 | PyTorch |[BERT large](../../models_v2/pytorch/bert_large/inference/cpu/CONTAINER.md) | FP32,BF32,BF16,INT8 | Inference | SQuAD1.0 |
 | PyTorch | [RNN-T](../../models_v2/pytorch/rnnt/training/cpu/CONTAINER.md) | FP32,BF32,BF16,INT8 | Inference | LibriSpeech |
 | PyTorch |[RNN-T](../../models_v2/pytorch/rnnt/inference/cpu/CONTAINER.md) | FP32,BF32,FP16 | Training | LibriSpeech |

diff --git a/models_v2/pytorch/bert_large/inference/cpu/CONTAINER.md b/models_v2/pytorch/bert_large/inference/cpu/CONTAINER.md
@@ -45,7 +45,7 @@ To run the BERT Large inference scripts, set environment variables to specify th
 ```bash
 export EVAL_DATA_FILE=<path to the eval data>
 export OUTPUT_DIR=<directory where log files will be written>
-export PRECISION=<specify the precision>
+export PRECISION=<provide bf16, fp32, fp16, int8, avx-int8, avx-fp32 for throughput and bf16, bf32, fp32, fp16, int8, avx-fp32, avx-int8, fp8 for accuracy and realtime>
 export FINETUNED_MODELL=<path to pre-trained model>
 export TEST_MODE=<provide either REALTIME,THROUGHPUT OR ACCURACY mode>
 export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX_FP16 (for FP16 precision)

diff --git a/models_v2/pytorch/bert_large/inference/cpu/README.md b/models_v2/pytorch/bert_large/inference/cpu/README.md
@@ -95,7 +95,7 @@ export FINETUNED_MODEL=$(pwd)/bert_squad_model
 | **TEST_MODE** (THROUGHPUT, ACCURACY, REALTIME)              | `export TEST_MODE=THROUGHPUT (THROUGHPUT, ACCURACY, REALTIME)`                  |
 | **EVAL_DATA_FILE**              | `export EVAL_DATA_FILE=<path to dev-v1.1.json file>`                  |
 | **OUTPUT_DIR**               |                               `export OUTPUT_DIR=<path to an output directory>`                               |
-| **PRECISION**     |                  `export PRECISION=bf16` (bf16, bf32, fp32, fp16, int8, avx-int8, avx-fp32 for throughput and bf16, bf32, fp32, fp16, int8, avx-fp32, avx-int8, fp8 for accuracy) |
+| **PRECISION**     |                  `export PRECISION=bf16` (bf16, fp32, fp16, int8, avx-int8, avx-fp32 for throughput and bf16, bf32, fp32, fp16, int8, avx-fp32, avx-int8, fp8 for accuracy and realtime) |
 | **FINETUNED_MODEL**               |                               `export FINETUNED_MODEL=<path to the fine tuned model>`                               |
 | **MODEL_DIR**               |                               `export MODEL_DIR=$(pwd)`                               |
 | **BATCH_SIZE** (optional)    |                               `export BATCH_SIZE=<set a value for batch size, else it will run with default batch size>`                                |

diff --git a/models_v2/pytorch/bert_large/training/cpu/CONTAINER.md b/models_v2/pytorch/bert_large/training/cpu/CONTAINER.md