Merge branch 'r2.1' into master

mlcommons · Jul 26, 2022 · a4e89ee · a4e89ee
2 parents 7c3c697 + 67eb6a0
commit a4e89ee
Show file tree

Hide file tree

Showing 17 changed files with 82 additions and 69 deletions.
diff --git a/README.md b/README.md
@@ -13,6 +13,20 @@ Please see the [MLPerf Inference benchmark paper](https://arxiv.org/abs/1911.025
     primaryClass={cs.LG}
 }
 ```
+## MLPerf Inference v2.1 (submission 08/05/2022)
+Use the r2.1 branch (```git checkout r2.1```) if you want to submit or reproduce v2.1 results.
+
+See the individual Readme files in the reference app for details.
+
+| model | reference app | framework | dataset |
+| ---- | ---- | ---- | ---- |
+| resnet50-v1.5 | [vision/classification_and_detection](https://github.com/mlcommons/inference/tree/master/vision/classification_and_detection) | tensorflow, pytorch, onnx | imagenet2012 |
+| ssd-resnext50 800x800 | [vision/classification_and_detection](https://github.com/mlcommons/inference/tree/master/vision/classification_and_detection) | pytorch, onnx | openimages resized to 800x800|
+| bert | [language/bert](https://github.com/mlcommons/inference/tree/master/language/bert) | tensorflow, pytorch, onnx | squad-1.1 |
+| dlrm | [recommendation/dlrm](https://github.com/mlcommons/inference/tree/master/recommendation/dlrm/pytorch) | pytorch, tensorflow(?), onnx(?) | Criteo Terabyte |
+| 3d-unet | [vision/medical_imaging/3d-unet-kits19](https://github.com/mlcommons/inference/tree/master/vision/medical_imaging/3d-unet-kits19) | pytorch, tensorflow, onnx | KiTS19 |
+| rnnt | [speech_recognition/rnnt](https://github.com/mlcommons/inference/tree/master/speech_recognition/rnnt) | pytorch | OpenSLR LibriSpeech Corpus |
+
 
 ## MLPerf Inference v2.0 (submission 02/25/2022)
 Use the r2.0 branch (```git checkout r2.0```) if you want to submit or reproduce v2.0 results.

diff --git a/compliance/nvidia/TEST01/verify_performance.py b/compliance/nvidia/TEST01/verify_performance.py
@@ -51,12 +51,12 @@ def main():
             continue
 
         if ref_mode == "SingleStream":
-            if re.match("Early stopping 90th percentile estimate", line):
+            if re.match(".*Early stopping 90th percentile estimate", line):
                 ref_score = line.split(": ",1)[1].strip()
                 continue
 
         if ref_mode == "MultiStream":
-            if re.match("Early stopping 99th percentile estimate", line):
+            if re.match(".*Early stopping 99th percentile estimate", line):
                 ref_score = line.split(": ",1)[1].strip()
                 continue
 
@@ -89,12 +89,12 @@ def main():
             continue
 
         if test_mode == "SingleStream":
-            if re.match("Early stopping 90th percentile estimate", line):
+            if re.match(".*Early stopping 90th percentile estimate", line):
                 test_score = line.split(": ",1)[1].strip()
                 continue
 
         if test_mode == "MultiStream":
-            if re.match("Early stopping 99th percentile estimate", line):
+            if re.match(".*Early stopping 99th percentile estimate", line):
                 test_score = line.split(": ",1)[1].strip()
                 continue
 

diff --git a/compliance/nvidia/TEST04/verify_performance.py b/compliance/nvidia/TEST04/verify_performance.py
@@ -50,13 +50,13 @@ def main():
             continue
 
         if ref_mode == "SingleStream":
-            if re.match("Early stopping 90th percentile estimate", line):
+            if re.match(".*Early stopping 90th percentile estimate", line):
                 ref_score = line.split(": ",1)[1].strip()
                 ref_score = 1e9 / float(ref_score)
                 continue
 
         if ref_mode == "MultiStream":
-            if re.match("Early stopping 99th percentile estimate", line):
+            if re.match(".*Early stopping 99th percentile estimate", line):
                 ref_score = line.split(": ",1)[1].strip()
                 ref_score = 1e9 / float(ref_score)
                 continue
@@ -90,13 +90,13 @@ def main():
             continue
 
         if test_mode == "SingleStream":
-            if re.match("Early stopping 90th percentile estimate", line):
+            if re.match(".*Early stopping 90th percentile estimate", line):
                 test_score = line.split(": ",1)[1].strip()
                 test_score = 1e9 / float(test_score)
                 continue
 
         if test_mode == "MultiStream":
-            if re.match("Early stopping 99th percentile estimate", line):
+            if re.match(".*Early stopping 99th percentile estimate", line):
                 test_score = line.split(": ",1)[1].strip()
                 test_score = 1e9 / float(test_score)
                 continue

diff --git a/compliance/nvidia/TEST05/verify_performance.py b/compliance/nvidia/TEST05/verify_performance.py
@@ -51,12 +51,12 @@ def main():
             continue
 
         if ref_mode == "SingleStream":
-            if re.match("Early stopping 90th percentile estimate", line):
+            if re.match(".*Early stopping 90th percentile estimate", line):
                 ref_score = line.split(": ",1)[1].strip()
                 continue
 
         if ref_mode == "MultiStream":
-            if re.match("Early stopping 99th percentile estimate", line):
+            if re.match(".*Early stopping 99th percentile estimate", line):
                 ref_score = line.split(": ",1)[1].strip()
                 continue
 
@@ -89,12 +89,12 @@ def main():
             continue
 
         if test_mode == "SingleStream":
-            if re.match("Early stopping 90th percentile estimate", line):
+            if re.match(".*Early stopping 90th percentile estimate", line):
                 test_score = line.split(": ",1)[1].strip()
                 continue
 
         if test_mode == "MultiStream":
-            if re.match("Early stopping 99th percentile estimate", line):
+            if re.match(".*Early stopping 99th percentile estimate", line):
                 test_score = line.split(": ",1)[1].strip()
                 continue
 

diff --git a/loadgen/setup.py b/loadgen/setup.py
@@ -68,13 +68,13 @@
 
 mlperf_loadgen_module = Extension(
         "mlperf_loadgen",
-        define_macros=[("MAJOR_VERSION", "1"), ("MINOR_VERSION", "1")],
+        define_macros=[("MAJOR_VERSION", "2"), ("MINOR_VERSION", "1")],
         include_dirs=[".", "../third_party/pybind/include"],
         sources=mlperf_loadgen_sources,
         depends=mlperf_loadgen_headers)
 
 setup(name="mlperf_loadgen",
-      version="1.1",
+      version="2.1",
       description="MLPerf Inference LoadGen python bindings",
       url="https://mlperf.org",
       ext_modules=[mlperf_loadgen_module])
diff --git a/loadgen/version_generator.py b/loadgen/version_generator.py
@@ -93,7 +93,7 @@ def generate_loadgen_version_definitions(cc_filename, loadgen_root):
     ofile.write("// DO NOT EDIT: Autogenerated by version_generator.py.\n\n")
     ofile.write("#include <string>\n\n")
     ofile.write("namespace mlperf {\n\n")
-    ofile.write(func_def("Version", "\"2.0\""))
+    ofile.write(func_def("Version", "\"2.1\""))
 
     date_time_now_local = datetime.datetime.now().isoformat()
     date_time_now_utc = datetime.datetime.utcnow().isoformat()

diff --git a/mlperf.conf b/mlperf.conf
@@ -15,12 +15,13 @@ rnnt.*.performance_sample_count_override = 2513
 3d-unet.*.performance_sample_count_override = 0
 
 # Set seeds. The seeds will be distributed two weeks before the submission.
-# 0x5c5c885919353c12 
-*.*.qsl_rng_seed = 6655344265603136530
-# 0xdc260b99a7995230
-*.*.sample_index_rng_seed = 15863379492028895792
-# 0xafbb4808bed4f58f 
-*.*.schedule_rng_seed = 12662793979680847247
+# 0xc63bb0c8c280fdef
+*.*.qsl_rng_seed = 14284205019438841327
+# 0x39c9343412051148
+*.*.sample_index_rng_seed = 4163916728725999944
+# 0x04267cf482328355
+*.*.schedule_rng_seed = 299063814864929621
+
 
 *.SingleStream.target_latency_percentile = 90
 *.SingleStream.min_duration = 600000

diff --git a/tools/submission/submission-checker.py b/tools/submission/submission-checker.py
@@ -566,14 +566,14 @@
     },
     "v2.1": {
         "models": [
-            "resnet", "ssd-resnext50", "rnnt",
+            "resnet", "retinanet", "rnnt",
             "bert-99", "bert-99.9",
             "dlrm-99", "dlrm-99.9",
             "3d-unet-99", "3d-unet-99.9",
         ],
         "required-scenarios-datacenter": {
             "resnet": ["Server", "Offline"],
-            "ssd-resnext50": ["Server", "Offline"],
+            "retinanet": ["Server", "Offline"],
             "rnnt": ["Server", "Offline"],
             "bert-99": ["Server", "Offline"],
             "bert-99.9": ["Server", "Offline"],
@@ -586,7 +586,7 @@
         },
         "required-scenarios-edge": {
             "resnet": ["SingleStream", "MultiStream", "Offline"],
-            "ssd-resnext50": ["SingleStream", "MultiStream", "Offline"],
+            "retinanet": ["SingleStream", "MultiStream", "Offline"],
             "rnnt": ["SingleStream", "Offline"],
             "bert-99": ["SingleStream", "Offline"],
             "3d-unet-99": ["SingleStream", "Offline"],
@@ -596,7 +596,7 @@
         },
         "required-scenarios-datacenter-edge": {
             "resnet": ["SingleStream", "Offline", "MultiStream", "Server"],
-            "ssd-resnext50": ["SingleStream", "Offline", "MultiStream", "Server"],
+            "retinanet": ["SingleStream", "Offline", "MultiStream", "Server"],
             "rnnt": ["SingleStream", "Offline", "Server"],
             "bert-99": ["SingleStream", "Offline", "Server"],
             "bert-99.9": ["Offline", "Server"],
@@ -609,8 +609,7 @@
         },
         "accuracy-target": {
             "resnet": ("acc", 76.46 * 0.99),
-            # TODO: Update accuracy target for ssd-resnext50
-            "ssd-resnext50": ("mAP", 37.5 * 0.99),
+            "retinanet": ("mAP", 37.55 * 0.99),
             "rnnt": ("WER", (100 - 7.452) * 0.99),
             "bert-99": ("F1", 90.874 * 0.99),
             "bert-99.9": ("F1", 90.874 * 0.999),
@@ -621,8 +620,8 @@
         },
         "performance-sample-count": {
             "resnet": 1024,
-            # TODO: Update perf sample count for ssd-resnext50
-            "ssd-resnext50": 64,
+            # TODO: Update perf sample count for retinanet
+            "retinanet": 64,
             "rnnt": 2513,
             "bert-99": 10833,
             "bert-99.9": 10833,
@@ -656,11 +655,10 @@
             "ssd_resnet50_v1_fpn_640x640": "ssd-small",
             "ssd_resnet50_v1_fpn_1024x1024": "ssd-large",
         },
-        # TODO: Update with the real v2.1 seeds.
         "seeds": {
-            "qsl_rng_seed": 6655344265603136530,
-            "sample_index_rng_seed": 15863379492028895792,
-            "schedule_rng_seed": 12662793979680847247,
+            "qsl_rng_seed": 14284205019438841327,
+            "sample_index_rng_seed": 4163916728725999944,
+            "schedule_rng_seed": 299063814864929621,
         },
         "test05_seeds": {
             "qsl_rng_seed" : 313588358309856706,
@@ -671,7 +669,7 @@
         ],
         "latency-constraint": {
             "resnet": {"Server": 15000000},
-            "ssd-resnext50": {"Server": 100000000},
+            "retinanet": {"Server": 100000000},
             "rnnt": {"Server": 1000000000},
             "bert-99": {"Server": 130000000},
             "bert-99.9": {"Server": 130000000},
@@ -680,7 +678,7 @@
         },
         "min-queries": {
             "resnet": {"SingleStream": 1024, "MultiStream": 270336, "Server": 270336, "Offline": 1},
-            "ssd-resnext50": {"SingleStream": 1024, "MultiStream": 270336, "Server": 270336, "Offline": 1},
+            "retinanet": {"SingleStream": 1024, "MultiStream": 270336, "Server": 270336, "Offline": 1},
             "rnnt": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
             "bert-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
             "bert-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1},

diff --git a/vision/classification_and_detection/README.md b/vision/classification_and_detection/README.md
@@ -28,8 +28,8 @@ You can find a short tutorial how to use this benchmark [here](https://github.co
 | ssd-resnet34 1200x1200 (removed since mlperf-v2.1)| pytorch | mAP 0.20 | coco resized to 1200x1200 | [from zenodo](https://zenodo.org/record/3236545/files/resnet34-ssd1200.pytorch) | [from mlperf](https://github.com/mlperf/inference/tree/master/others/cloud/single_stage_detector/pytorch) | fp32 | NCHW |
 | ssd-resnet34 1200x1200 (removed since mlperf-v2.1) | onnx | mAP 0.20 | coco resized to 1200x1200 | from zenodo [opset-8](https://zenodo.org/record/3228411/files/resnet34-ssd1200.onnx) | [from mlperf](https://github.com/mlperf/inference/tree/master/others/cloud/single_stage_detector) converted using the these [instructions](https://github.com/BowenBao/inference/tree/master/cloud/single_stage_detector/pytorch#6-onnx) | fp32 | Converted from pytorch model. |
 | ssd-resnet34 1200x1200 (removed since mlperf-v2.1) | onnx | mAP 0.20 | coco resized to 1200x1200 | from zenodo [opset-11](https://zenodo.org/record/4735664/files/ssd_resnet34_mAP_20.2.onnx) | [from zenodo](https://zenodo.org/record/3345892/files/tf_ssd_resnet34_22.1.zip) converted using [this script](https://github.com/mlcommons/inference/blob/master/vision/classification_and_detection/tools/convert-to-onnx.sh) | fp32 | Converted from the tensorflow model and uses the same interface as the tensorflow model. |
-| retinanet-resnext50 800x800 | pytorch | mAP 0.375 | OpenImages mlperf validation set resized to 800x800 | [from zenodo](https://zenodo.org/record/6617981/files/resnext50_32x4d_fpn.pth) | from mlperf. [Source Code](https://github.com/mlcommons/training/tree/master/single_stage_detector/ssd/model) and [Weights](https://zenodo.org/record/6605272) | fp32 | NCHW |
-| retinanet-resnext50 800x800 | onnx | mAP 0.375 | OpenImages mlperf validation set resized to 800x800 | [from zenodo](https://zenodo.org/record/6617879/files/resnext50_32x4d_fpn.onnx) | from mlperf converted from the pytorch model. [Source Code](https://github.com/mlcommons/training/tree/master/single_stage_detector/ssd/model) and [Weights](https://zenodo.org/record/6605272) | fp32 | NCHW |
+| retinanet 800x800 | pytorch | mAP 0.3755 | OpenImages mlperf validation set resized to 800x800 | [from zenodo](https://zenodo.org/record/6617981/files/resnext50_32x4d_fpn.pth) | from mlperf. [Source Code](https://github.com/mlcommons/training/tree/master/single_stage_detector/ssd/model) and [Weights](https://zenodo.org/record/6605272) | fp32 | NCHW |
+| retinanet 800x800 | onnx | mAP 0.3757 | OpenImages mlperf validation set resized to 800x800 | [from zenodo](https://zenodo.org/record/6617879/files/resnext50_32x4d_fpn.onnx) | from mlperf converted from the pytorch model. [Source Code](https://github.com/mlcommons/training/tree/master/single_stage_detector/ssd/model) and [Weights](https://zenodo.org/record/6605272) | fp32 | NCHW |
 
 ## Disclaimer
 This benchmark app is a reference implementation that is not meant to be the fastest implementation possible.

diff --git a/vision/classification_and_detection/python/dataset.py b/vision/classification_and_detection/python/dataset.py
@@ -273,7 +273,7 @@ def pre_process_coco_resnet34_tf(img, dims=None, need_transpose=False):
     return img
 
 
-def pre_process_openimages_resnext50(img, dims=None, need_transpose=False):
+def pre_process_openimages_retinanet(img, dims=None, need_transpose=False):
     img = maybe_resize(img, dims)
     img /= 255.
     # transpose if needed

diff --git a/vision/classification_and_detection/python/main.py b/vision/classification_and_detection/python/main.py
@@ -50,17 +50,17 @@
     "coco-300-pt":
         (coco.Coco, dataset.pre_process_coco_pt_mobilenet, coco.PostProcessCocoPt(False,0.3),
          {"image_size": [300, 300, 3]}),
-    "openimages-300-resnext":
-        (openimages.OpenImages, dataset.pre_process_openimages_resnext50, openimages.PostProcessOpenImagesResnext(False,0.05,300,300), 
+    "openimages-300-retinanet":
+        (openimages.OpenImages, dataset.pre_process_openimages_retinanet, openimages.PostProcessOpenImagesRetinanet(False,0.05,300,300), 
         {"image_size": [300, 300, 3]}),
-    "openimages-800-resnext":
-        (openimages.OpenImages, dataset.pre_process_openimages_resnext50, openimages.PostProcessOpenImagesResnext(False,0.05,800,800), 
+    "openimages-800-retinanet":
+        (openimages.OpenImages, dataset.pre_process_openimages_retinanet, openimages.PostProcessOpenImagesRetinanet(False,0.05,800,800), 
         {"image_size": [800, 800, 3]}),
-    "openimages-1200-resnext":
-        (openimages.OpenImages, dataset.pre_process_openimages_resnext50, openimages.PostProcessOpenImagesResnext(False,0.05,1200,1200), 
+    "openimages-1200-retinanet":
+        (openimages.OpenImages, dataset.pre_process_openimages_retinanet, openimages.PostProcessOpenImagesRetinanet(False,0.05,1200,1200), 
         {"image_size": [1200, 1200, 3]}),
-    "openimages-800-resnext-onnx":
-        (openimages.OpenImages, dataset.pre_process_openimages_resnext50, openimages.PostProcessOpenImagesResnext(False,0.05,800,800,False), 
+    "openimages-800-retinanet-onnx":
+        (openimages.OpenImages, dataset.pre_process_openimages_retinanet, openimages.PostProcessOpenImagesRetinanet(False,0.05,800,800,False), 
         {"image_size": [800, 800, 3]}),       
     "coco-1200":
         (coco.Coco, dataset.pre_process_coco_resnet34, coco.PostProcessCoco(),
@@ -181,20 +181,20 @@
         "model-name": "ssd-resnet34",
     },
 
-    # ssd-resnext50
-    "ssd-resnext50-pytorch": {
+    # retinanet
+    "retinanet-pytorch": {
         "inputs": "image",
         "outputs": "boxes,labels,scores",
-        "dataset": "openimages-800-resnext",
+        "dataset": "openimages-800-retinanet",
         "backend": "pytorch-native",
-        "model-name": "ssd-resnext50",
+        "model-name": "retinanet",
     },
-    "ssd-resnext50-onnxruntime": {
+    "retinanet-onnxruntime": {
         "inputs": "images",
         "outputs": "boxes,labels,scores",
-        "dataset": "openimages-800-resnext-onnx",
+        "dataset": "openimages-800-retinanet-onnx",
         "backend": "onnxruntime",
-        "model-name": "ssd-resnext50",
+        "model-name": "retinanet",
         "max-batchsize": 1
     },
 }

diff --git a/vision/classification_and_detection/python/openimages.py b/vision/classification_and_detection/python/openimages.py
@@ -223,9 +223,9 @@ def finalize(self, result_dict, ds=None, output_dir=None):
         result_dict["mAP"] = cocoEval.stats[0]
 
 
-class PostProcessOpenImagesResnext(PostProcessOpenImages):
+class PostProcessOpenImagesRetinanet(PostProcessOpenImages):
     """
-    Post processing required by ssd-resnext50 / pytorch & onnx
+    Post processing required by retinanet / pytorch & onnx
     """
     def __init__(self, use_inv_map, score_threshold, height, width, dict_format=True):
         """
@@ -245,7 +245,7 @@ def __init__(self, use_inv_map, score_threshold, height, width, dict_format=True
     def __call__(self, results, ids, expected=None, result_dict=None):
         if self.dict_format:
             # If the output of the model is in dictionary format. This happens
-            # for the model ssd-resnext50-pytorch
+            # for the model retinanet-pytorch
             bboxes_ = [e['boxes'].cpu() for e in results]
             labels_ = [e['labels'].cpu() for e in results]
             scores_ = [e['scores'].cpu() for e in results]

diff --git a/vision/classification_and_detection/run_common.sh b/vision/classification_and_detection/run_common.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 if [ $# -lt 1 ]; then
-    echo "usage: $0 tf|onnxruntime|pytorch|tflite [resnet50|mobilenet|ssd-mobilenet|ssd-resnet34|ssd-resnext50] [cpu|gpu]"
+    echo "usage: $0 tf|onnxruntime|pytorch|tflite [resnet50|mobilenet|ssd-mobilenet|ssd-resnet34|retinanet] [cpu|gpu]"
     exit 1
 fi
 if [ "x$DATA_DIR" == "x" ]; then
@@ -21,7 +21,7 @@ for i in $* ; do
        tf|onnxruntime|tflite|pytorch) backend=$i; shift;;
        cpu|gpu) device=$i; shift;;
        gpu) device=gpu; shift;;
-       resnet50|mobilenet|ssd-mobilenet|ssd-resnet34|ssd-resnet34-tf|ssd-resnext50) model=$i; shift;;
+       resnet50|mobilenet|ssd-mobilenet|ssd-resnet34|ssd-resnet34-tf|retinanet) model=$i; shift;;
     esac
 done
 
@@ -77,9 +77,9 @@ if [ $name == "ssd-resnet34-tf-onnxruntime" ] ; then
     model_path="$MODEL_DIR/ssd_resnet34_mAP_20.2.onnx"
     profile=ssd-resnet34-onnxruntime-tf
 fi
-if [ $name == "ssd-resnext50-onnxruntime" ] ; then
+if [ $name == "retinanet-onnxruntime" ] ; then
     model_path="$MODEL_DIR/resnext50_32x4d_fpn.onnx"
-    profile=ssd-resnext50-onnxruntime
+    profile=retinanet-onnxruntime
 fi
 
 #
@@ -99,9 +99,9 @@ if [ $name == "ssd-resnet34-pytorch" ] ; then
     model_path="$MODEL_DIR/resnet34-ssd1200.pytorch"
     profile=ssd-resnet34-pytorch
 fi
-if [ $name == "ssd-resnext50-pytorch" ] ; then
+if [ $name == "retinanet-pytorch" ] ; then
     model_path="$MODEL_DIR/resnext50_32x4d_fpn.pth"
-    profile=ssd-resnext50-pytorch
+    profile=retinanet-pytorch
 fi
 
 

diff --git a/vision/classification_and_detection/tools/openimages_calibration_mlperf.sh b/vision/classification_and_detection/tools/openimages_calibration_mlperf.sh
@@ -50,7 +50,7 @@ MLPERF_CLASSES=('Airplane' 'Antelope' 'Apple' 'Backpack' 'Balloon' 'Banana'
 'Whale' 'Wheel' 'Wheelchair' 'Whiteboard' 'Window' 'Wine' 'Wine glass' 'Woman'
 'Zebra' 'Zucchini')
 
-python openimages_calibration.py \
+python3 openimages_calibration.py \
     --dataset-dir=${DATASET_PATH} \
     --output-labels="openimages-mlperf.json" \
-    --classes "${MLPERF_CLASSES[@]}"
+    --classes "${MLPERF_CLASSES[@]}"