diff --git a/test/regression_tests.sh b/test/regression_tests.sh
deleted file mode 100644
index 8825ee6ad..000000000
--- a/test/regression_tests.sh
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/bin/bash
-
-set -x
-set -e
-
-MMS_REPO="https://github.com/awslabs/multi-model-server.git"
-BRANCH=${1:-master}
-ROOT_DIR="/workspace/"
-CODEBUILD_WD=$(pwd)
-MODEL_STORE=$ROOT_DIR"/model_store"
-MMS_LOG_FILE="/tmp/mms.log"
-TEST_EXECUTION_LOG_FILE="/tmp/test_exec.log"
-
-install_mms_from_source() {
-  echo "Cloning & Building Multi Model Server Repo from " $1
-
-  sudo apt-get -y install nodejs-dev node-gyp libssl1.0-dev
-  sudo apt-get -y install npm
-  sudo npm install -g n
-  sudo n latest
-  export PATH="$PATH"
-  sudo npm install -g newman newman-reporter-html
-  pip install mxnet-mkl
-  # Clone & Build MMS
-  echo "Installing MMS from source"
-  git clone -b $2 $1
-  cd multi-model-server
-  pip install .
-  cd -
-  echo "MMS Succesfully installed"
-  
-}
-
-
-start_mms() {
-
-  # Start MMS with Model Store
-  multi-model-server --start --model-store $1  &>> $2
-  sleep 10
-  curl http://127.0.0.1:8081/models
-  
-}
-
-stop_mms_serve() {
-  multi-model-server --stop
-}
-
-start_secure_mms() {
-
-  # Start MMS with Model Store
-  multi-model-server --start --mms-config test/resources/config.properties --model-store $1  &>> $2
-  sleep 10
-  curl --insecure -X GET https://127.0.0.1:8444/models
-}
-
-
-run_postman_test() {
-  # Run Postman Scripts
-  mkdir $ROOT_DIR/report/
-  cd $CODEBUILD_WD/
-  set +e
-  # Run Management API Tests
-  stop_mms_serve
-  start_mms $MODEL_STORE $MMS_LOG_FILE
-  newman run -e test/postman/environment.json --bail --verbose test/postman/management_api_test_collection.json \
-	  -r cli,html --reporter-html-export $ROOT_DIR/report/management_report.html >>$1 2>&1
-
-  # Run Inference API Tests after Restart
-  stop_mms_serve
-  start_mms $MODEL_STORE $MMS_LOG_FILE
-  newman run -e test/postman/environment.json --bail --verbose test/postman/inference_api_test_collection.json \
-	  -d test/postman/inference_data.json -r cli,html --reporter-html-export $ROOT_DIR/report/inference_report.html >>$1 2>&1
-
-
-  # Run Https test cases
-  stop_mms_serve
-  start_secure_mms $MODEL_STORE $MMS_LOG_FILE
-  newman run --insecure -e test/postman/environment.json --bail --verbose test/postman/https_test_collection.json \
-	  -r cli,html --reporter-html-export $ROOT_DIR/report/MMS_https_test_report.html >>$1 2>&1
-
-  stop_mms_serve
-  set -e
-  cd -
-}
-
-
-sudo rm -rf $ROOT_DIR && sudo mkdir $ROOT_DIR
-sudo chown -R $USER:$USER $ROOT_DIR
-cd $ROOT_DIR
-mkdir $MODEL_STORE
-
-sudo rm -f $TEST_EXECUTION_LOG_FILE $MMS_LOG_FILE
-
-echo "** Execuing MMS Regression Test Suite executon for " $MMS_REPO " **"
-
-install_mms_from_source $MMS_REPO $BRANCH
-run_postman_test $TEST_EXECUTION_LOG_FILE
-
-echo "** Tests Complete ** "
-exit 0
diff --git a/test/resources/kitten.jpg b/test/resources/kitten.jpg
deleted file mode 100644
index ffcd2be2c..000000000
Binary files a/test/resources/kitten.jpg and /dev/null differ
diff --git a/test/README.md b/tests/api/README.md
similarity index 69%
rename from test/README.md
rename to tests/api/README.md
index cc30f24cd..96518d054 100644
--- a/test/README.md
+++ b/tests/api/README.md
@@ -2,7 +2,7 @@
 
 This folder contains regression tests executed against MMS master.These tests use [POSTMAN](https://www.postman.com/downloads/) for exercising all the Management & Inference APIs.
 
-### Running the test manually.
+### Running the test manually using docker.
 
 Pull multi-model-server pre build docker image
 ```
@@ -12,7 +12,7 @@ docker pull awsdeeplearningteam/multi-model-server
 This would build a docker Image with a awsdeeplearningteam/multi-model-server:latest in which we would run our Regression Tests.
 
 ```
-docker run -it --user root  awsdeeplearningteam/multi-model-server:latest /bin/bash
+docker run -it --user root  -v /tmp:/tmp awsdeeplearningteam/multi-model-server:latest /bin/bash
 ```
 
 In the Docker CLI execute the following cmds.
@@ -21,23 +21,31 @@ In the Docker CLI execute the following cmds.
 apt-get update 
 apt-get install -y git wget sudo 
 git clone https://github.com/awslabs/multi-model-server.git
-cd multi-model-server
+cd multi-model-server/test/api
 ```
 To execute tests on master run: 
 
-`./test/regression_tests.sh `
+`./regression_tests.sh `
 
 To execute tests on different run: 
 
-`./test/regression_tests.sh <branch_name>`
-
-
-You can view the logs for Test execution & the Multi-model-server in the /tmp dir.
+`./regression_tests.sh <branch_name>`
 
+### Running the test manually local environment.
 ```
-cat /tmp/test_exec.log
-cat /tmp/mms.log 
+git clone https://github.com/awslabs/multi-model-server.git
+cd multi-model-server/test
 ```
+To execute tests on master run:
+
+`./regression_tests.sh `
+
+To execute tests on different run:
+
+`./regression_tests.sh <branch_name>`
+
+You can view the logs for Test execution & the Multi-model-server in the /tmp/MMS_regression folder.
+
 
 ### Adding tests
 
@@ -49,4 +57,4 @@ Specifically to test for inference against a new model
 ![POSTMAN UI](screenshot/postman.png)
 
 Afterwards, export the collection as a v2.1 collection and replace the existing exported collection.
-To add a new suite of tests, add a new collection to /postman and update regression_tests.sh to run the new collection and buldsepc.yml to keep track of the report.
+To add a new suite of tests, add a new collection to /postman and update regression_tests.sh to run the new collection and buldsepc.yml to keep track of the report.
\ No newline at end of file
diff --git a/test/postman/environment.json b/tests/api/postman/environment.json
similarity index 100%
rename from test/postman/environment.json
rename to tests/api/postman/environment.json
diff --git a/test/postman/https_test_collection.json b/tests/api/postman/https_test_collection.json
similarity index 99%
rename from test/postman/https_test_collection.json
rename to tests/api/postman/https_test_collection.json
index 175f1bc42..5e21dc48d 100644
--- a/test/postman/https_test_collection.json
+++ b/tests/api/postman/https_test_collection.json
@@ -273,7 +273,7 @@
 				"body": {
 					"mode": "file",
 					"file": {
-						"src": "../examples/image_classifier/kitten.jpg"
+						"src": "resources/kitten.jpg"
 					},
 					"options": {
 						"raw": {
diff --git a/test/postman/inference_api_test_collection.json b/tests/api/postman/inference_api_test_collection.json
similarity index 100%
rename from test/postman/inference_api_test_collection.json
rename to tests/api/postman/inference_api_test_collection.json
diff --git a/test/postman/inference_data.json b/tests/api/postman/inference_data.json
similarity index 95%
rename from test/postman/inference_data.json
rename to tests/api/postman/inference_data.json
index b4eb7fb8a..9cfd15ef5 100644
--- a/test/postman/inference_data.json
+++ b/tests/api/postman/inference_data.json
@@ -4,7 +4,7 @@
         "model_name":"alexnet",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -36,7 +36,7 @@
         "model_name":"caffenet",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -68,7 +68,7 @@
         "model_name":"inception_v1",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -100,7 +100,7 @@
         "model_name":"inception-bn",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -132,7 +132,7 @@
         "model_name":"mobilenet",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -164,7 +164,7 @@
         "model_name":"nin",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -196,7 +196,7 @@
         "model_name":"resnet-152",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -228,7 +228,7 @@
         "model_name":"resnet-18",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -260,7 +260,7 @@
         "model_name":"resnext101",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -292,7 +292,7 @@
         "model_name":"resnet18-v1",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -324,7 +324,7 @@
         "model_name":"resnet34-v1",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -356,7 +356,7 @@
         "model_name":"resnet50-v1",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -388,7 +388,7 @@
         "model_name":"resnet101-v1",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -420,7 +420,7 @@
         "model_name":"resnet152-v1",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -452,7 +452,7 @@
         "model_name":"resnet18-v2",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -484,7 +484,7 @@
         "model_name":"resnet34-v2",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -516,7 +516,7 @@
         "model_name":"resnet50-v2",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -548,7 +548,7 @@
         "model_name":"resnet101-v2",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -580,7 +580,7 @@
         "model_name":"resnet152-v2",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -612,7 +612,7 @@
         "model_name":"shufflenet",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -644,7 +644,7 @@
         "model_name":"onnx-squeezenet",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -676,7 +676,7 @@
         "model_name":"squeezenet_v1.1",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -708,7 +708,7 @@
         "model_name":"vgg16",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -740,7 +740,7 @@
         "model_name":"onnx-vgg16",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -772,7 +772,7 @@
         "model_name":"onnx-vgg16_bn",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -804,7 +804,7 @@
         "model_name":"vgg19",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -836,7 +836,7 @@
         "model_name":"onnx-vgg19",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
@@ -868,7 +868,7 @@
         "model_name":"onnx-vgg19_bn",
         "worker":1,
         "synchronous":"true",
-        "file":"test/resources/kitten.jpg",
+        "file":"resources/kitten.jpg",
         "content-type":"application/json",
         "validator":"image_classification",
         "expected":[
diff --git a/test/postman/management_api_test_collection.json b/tests/api/postman/management_api_test_collection.json
similarity index 100%
rename from test/postman/management_api_test_collection.json
rename to tests/api/postman/management_api_test_collection.json
diff --git a/tests/api/regression_tests.sh b/tests/api/regression_tests.sh
new file mode 100644
index 000000000..b9f15383d
--- /dev/null
+++ b/tests/api/regression_tests.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+set -x
+set -e
+
+MMS_REPO="https://github.com/awslabs/multi-model-server.git"
+BRANCH=${1:-master}
+ROOT_DIR="/workspace/"
+CODEBUILD_WD=$(pwd)
+MODEL_STORE=$ROOT_DIR"/model_store"
+TEST_EXECUTION_LOG_FILE="/tmp/test_exec.log"
+ARTIFACTS_DIR="tests/api/artifacts"
+OUTPUT_DIR=/tmp/MMS_regression
+
+install_mms_from_source() {
+  echo "Cloning & Building Multi Model Server Repo from " $1
+  sudo apt-get -y install nodejs-dev node-gyp libssl1.0-dev
+  sudo apt-get -y install npm
+  sudo npm install -g n
+  sudo n latest
+  export PATH="$PATH"
+  sudo npm install -g newman newman-reporter-html
+  pip install mxnet-mkl
+  # Clone & Build MMS
+  echo "Installing MMS from source"
+  git clone -b $2 $1
+  cd multi-model-server
+  pip install .
+  echo "MMS Branch : " "$(git rev-parse --abbrev-ref HEAD)" >> $3
+  echo "MMS Branch Commit Id : " "$(git rev-parse HEAD)" >> $3
+  echo "Build date : " "$(date)" >> $3
+  echo "MMS Succesfully installed"
+}
+
+sudo rm -rf $ROOT_DIR $OUTPUT_DIR && sudo mkdir $ROOT_DIR
+sudo chown -R $USER:$USER $ROOT_DIR
+cd $ROOT_DIR
+mkdir $MODEL_STORE
+
+sudo rm -f $TEST_EXECUTION_LOG_FILE
+
+echo "** Execuing MMS Regression Test Suite executon for " $MMS_REPO " **"
+
+install_mms_from_source $MMS_REPO $BRANCH $TEST_EXECUTION_LOG_FILE
+ci/scripts/linux_test_api.sh ALL >> $TEST_EXECUTION_LOG_FILE
+mv $TEST_EXECUTION_LOG_FILE $ARTIFACTS_DIR
+mv $ARTIFACTS_DIR $OUTPUT_DIR
+echo "** Tests Complete ** "
+exit 0
diff --git a/test/resources/certs.pem b/tests/api/resources/certs.pem
similarity index 100%
rename from test/resources/certs.pem
rename to tests/api/resources/certs.pem
diff --git a/test/resources/config.properties b/tests/api/resources/config.properties
similarity index 50%
rename from test/resources/config.properties
rename to tests/api/resources/config.properties
index ce6e2f3b9..ed79a84c2 100644
--- a/test/resources/config.properties
+++ b/tests/api/resources/config.properties
@@ -1,4 +1,4 @@
 inference_address=https://127.0.0.1:8443
 management_address=https://127.0.0.1:8444
-private_key_file=test/resources/key.pem
-certificate_file=test/resources/certs.pem
+private_key_file=resources/key.pem
+certificate_file=resources/certs.pem
diff --git a/test/resources/key.pem b/tests/api/resources/key.pem
similarity index 100%
rename from test/resources/key.pem
rename to tests/api/resources/key.pem
diff --git a/test/screenshot/postman.png b/tests/api/screenshot/postman.png
similarity index 100%
rename from test/screenshot/postman.png
rename to tests/api/screenshot/postman.png
diff --git a/tests/performance/README.md b/tests/performance/README.md
index 32f3c59f9..b90125099 100644
--- a/tests/performance/README.md
+++ b/tests/performance/README.md
@@ -9,7 +9,7 @@ The salient features of the performance regression suite are
 * Non-intrusive - Does not need any code-changes or instrumentation on the server being monitored. 
 * It can be used to monitor a wide variety of server metrics - memory, cpu, io - in addition to 
 traditional API level metrics such as latency, throughput etc. 
-* It is easy to add custom metrics. For example, in MMS server, `the number of workers spawned` would be an interesting 
+* It is easy to add custom metrics. For example, in Model server, `the number of workers spawned` would be an interesting 
 metric to track. The platform allows for easy addition of these metrics.
 * Test cases are specified in human readable yaml files. Every test case has a pass or fail status. This is determined 
 by evaluating expressions specified in the test case. Every expression checks metrics against threshold values. For 
@@ -19,11 +19,12 @@ possible to specify multiple compute environments against which the test cases w
 environment, will have its own threshold values.
 * This suite leverages the open source [Taurus framework](https://gettaurus.org/). 
 * This suite extends the Taurus framework in the following ways
-   * Adds resource monitoring service. This allows MMS specific metrics to be added. 
+   * Adds resource monitoring service. This allows Model Server specific metrics to be added. 
    * Environments as described earlier.
    * Specification of pass/fail criterion between two commits. For example, memory consumed by workers should not 
    increase by more than 10% between two commits for the given test case.
    * Custom reporting of results.
+   * Apache Benchmark executor which supports GET, POST, PUT, OPTIONS, DELETE methods
    
 The building blocks of the performance regression suite and flow is captured in the following drawing
 
@@ -38,21 +39,21 @@ The building blocks of the performance regression suite and flow is captured in
     ``` 
 2. Install performance regression suite dependencies.
    ```bash 
-    export MMS_HOME=<MMS_HOME_PATH>
-    pip install -r $MMS_HOME/tests/performance/requirements.txt
+    export MODEL_SERVER_HOME=<MODEL_SERVER_HOME_PATH>
+    pip install -r $MODEL_SERVER_HOME/tests/performance/requirements.txt
     ``` 
-3. Make sure that `git` is installed and the test suites are run from the MMS working directory.
+3. Make sure that `git` is installed and the test suites are run from the Model Server working directory.
 
 ### B. Running the test suite
-1. Make sure parameters set in [tests/common/global_config.yaml](tests/performance/tests/global_config.yaml) are correct.
+1. Make sure parameters set in [tests/global_config.yaml](tests/global_config.yaml) are correct.
 2. To run the test suite execute [run_performance_suite.py](run_performance_suite.py) with the following 
 parameters
 
    * `--artifacts-dir` or `-a` is a directory where the test case results will be stored. The default value is 
-`$MMS_HOME/tests/performance/run_artifacts`.  
+`$MODEL_SERVER_HOME/tests/performance/run_artifacts`.  
 
    * `--test-dir` or `-t` is a directory containing the test cases. The default value is 
-`$MMS_HOME/tests/performance/tests`.
+`$MODEL_SERVER_HOME/tests/performance/tests`.
  
    * `--pattern` or `-p` glob pattern picks up certain test cases for execution within the `test-dir`. The default value picks up 
 all test cases.
@@ -64,24 +65,35 @@ The default value excludes nothing.
 the file (minus the extension) found inside the environments folder in each test case. They encapsulate parameter 
 values which are specific to the execution environment. This is a mandatory parameter.   
 
+   * `--compare-local` or `--no-compare-local` specifies whether to do comparison with run artifacts data  available on local machine
+   or the data available on S3 bucket.
+   
+   * `--compare-with` or `-c` specifies the commit id compare against.  The default value is 'HEAD~1'. The branch name, tag,
+   can also be specified. The comparison happens if the run artifacts folder for the commit_id and env is available.
+   
+
+
+
    The script does the following:  
    1. Starts the metrics monitoring server.
-   2. Collects all the tests from test-dir satisfying the pattern
-   3. Executes the tests
+   2. Collects all the tests from test-dir satisfying the pattern, excluding exclude pattern and test starting with 'skip'
+   3. Executes the collected tests
    4. Generates artifacts in the artifacts-dir against each test case.  
+   5. Generates Pass/Fail report for test cases
+   6. Generates comparison report for specified commit id
 
-3. Check the console logs, $artifacts-dir$/<run-dir>/performance_results.html report, comparison.csv, comparison.html 
+3. Check the console logs, $artifacts-dir$/<run-dir>/performance_results.html report, comparison_result.csv, comparison_result.html 
 and other artifacts.
 
 **Steps are provided below**
 
 ```bash
-export MMS_HOME=<MMS_HOME_PATH>
-cd $MMS_HOME/tests/performance
+export MODEL_SERVER_HOME=<MODEL_SERVER_HOME_PATH>
+cd $MODEL_SERVER_HOME/tests/performance
  
-# Note that MMS server started and stopped by the individual test suite.
-# check variables such as MMS server PORT etc 
-# vi tests/common/global_config.yaml 
+# Note that Model server started and stopped by the individual test suite.
+# check variables such as Model server PORT etc 
+# vi tests/global_config.yaml 
 
 #all tests
 python -m run_performance_suite -e xlarge
@@ -92,17 +104,93 @@ python -m run_performance_suite -e xlarge -p inference_single_worker
 ```
 
 ### C. Understanding the test suite artifacts and reports
-1. The $artifacts-dir$/<run-dir>/performance_results.html is a summary report of the test run. 
+1. The $artifacts-dir/<run-dir>/performance_results.html is a summary report of the test run. 
 2. Each test yaml is treated as a test suite. Each criteria in the test suite is treated as a test case. 
 If the test suite does not specify any criteria, then the test suite is reported as skipped with 0 test cases.
 3. For each test suite, a sub-directory is created containing relevant run artifacts. Important files in this directory are
    * metrics.csv -- contains the values of the various system-monitored metrics over time
+   * metrics_agg.csv -- contains percentile values for columns in metrics.csv
    * finals_stats.csv -- contains the values of the various api metrics over time  
-4. The $artifacts-dir$/<run-dir>/comparison_results.html is a summary report which shows performance difference between
+4. The $artifacts-dir/<run-dir>/comparison_results.html is a summary report which shows performance difference between
 the last two commits.
 5. The run completes with a console summary of the performance and comparision suites which have failed
 ![](assets/console.png) 
 
+### D. Understanding the test case components
+A Test Case consists of the test.yaml, test.jmx, environments/*.yaml files and a global_config.yaml.
+Below is the sample folder structure for 'api_description' test case:
+```bash
+tests
+   -- api_description
+      --- environments
+          ---- xlarge.yaml
+          ---- mac_xlarge.yaml
+      --- api_description.jmx
+      --- api_description.yaml
+   -- global_config.yaml
+```
+
+1. global_config.yaml  
+   - It is a master store for common items across all the tests.
+   - It contains the common sections, criteria, monitoring metrics etc.  
+   - It also contain variables in the format ${variable} for metric thresholds and other test specific attributes.   
+
+2. environments/*.yaml  
+   - It stores values specific to an environment. An environment reflects the underlying compute characteristics.  For e.g. macos_xlarge, ubuntu_xlarge etc. 
+   - A test case can have multiple environments.
+   - The environment file can override variable values defined in global_config.yaml and test.yaml. 
+
+3. test_name.yaml  
+   - The central file for a test case. Note the name of the yaml should be same as the test folder.  
+   - It contains the scenario, specific pre-processing commands (optional) and special criteria (optional) relevant for the test case. 
+   - It inherits the settings defined global_config.yaml. global_config.yaml's top-level sections can be overridden, merged, or appended based on following rules  
+        1. By default the test cases configurations get merged with the global configuration.  
+        2. If the dictionary key is pre-pended with '~', it will get overridden.  
+        3. The lists in yaml section gets appended.       
+   - Below are the sample yamls to demonstrate the merging of global_config and test_name yamls. The list in "services" section in global_config will
+   get appended by list in 'services' section of test yaml. The 'reporting' section will get replaced by '~reporting' section from test yaml.
+   Refer test case for more details [tests/scale_down_workers/scale_down_workers.yaml](tests/scale_down_workers/scale_down_workers.yaml) and [global_config.yaml](tests/global_config.yaml)
+   
+    ```yaml
+    #global_config.yaml
+     
+    services:
+      - module: shellexec
+        prepare:
+          - "curl -s -O ${INPUT_IMG_URL}"
+          - "mkdir /tmp/ts_model_store"
+   
+    reporting:
+        - module: passfail
+          criteria:
+            # API requests KPI crieteria
+            - success of ${API_LABEL}<${API_SUCCESS} for 10s, stop as failed
+            - avg-rt of ${API_LABEL}>${API_AVG_RT}, ${STOP_ALIAS} as failed
+                    
+    ```  
+
+    ```yaml
+    #test.yaml
+     
+    services:
+      - module: shellexec
+        prepare:
+            - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
+       
+    
+    ~reporting:
+        - module: passfail
+          criteria:
+            # Inbuilt Criteria
+            - success of ScaleDown<${SCL_DWN_SUCC} for 10s, ${STOP_ALIAS} as failed
+            - avg-rt of ScaleDown>${SCL_DWN_RT}, ${STOP_ALIAS} as failed
+    ```
+   
+4. test.jmx 
+   -  The JMeter test scenario file. The test.yaml runs the scenario mentioned in the .jmx file.4. test.jmx 
+   -  The JMeter test scenario file. The test.yaml runs the scenarion mentioned in the .jmx file.
+
+
 ## Add a new test
 
 Follow these three steps to add a new test case to the test suite.
@@ -110,18 +198,20 @@ Follow these three steps to add a new test case to the test suite.
 1. Add scenario (a.k.a test suite)
 2. Add metrics to monitor
 3. Add pass/fail criteria (a.k.a test case)
+4. Add compare criteria (a.k.a compare test cases)
 
 
 #### 1. Add scenario (a.k.a test suite)
+> By default, all scenarios are triggered using _jmeter_ as the underlying executor.
+
 Create a folder for the test under `test_dir` location. A test generally comprises of a jmeter file - containing the 
 load scenario and a yaml file which contains test scenarios specifying the conditions for failure or success. The
 file-names should be identical to the folder name with their respective extensions. 
 
-An example [jmeter script](tests/examples_starter/examples_starter.jmx) 
-and a [scenario](tests/examples_starter/examples_starter.yaml) is provided as a template to get started.
+An example [jmeter script](tests/examples_starter/examples_starter.jmx) and [scenario](tests/examples_starter/examples_starter.yaml) is provided as a template to get started.
     
 Please note that various global configuration settings used by examples_starter.jmx script are specified in 
-[tests/global_config.yaml](tests/performance/tests/global_config.yaml) file.
+[tests/global_config.yaml](tests/global_config.yaml) file.
     
  ```tests/examples_starter/examples_starter.yaml
  execution:
@@ -135,18 +225,38 @@ Please note that various global configuration settings used by examples_starter.
      script: examples_starter.jmx
 
  ```
-    
 To execute this test suite, run the following command
     
  ```bash
- export MMS_HOME=<MMS_HOME_PATH>
- cd $MMS_HOME/tests/performance
+ export MODEL_SERVER_HOME=<MODEL_SERVER_HOME_PATH>
+ cd $MODEL_SERVER_HOME/tests/performance
  python -m run_performance_suite -p examples_starter -e xlarge
  ```
 
-**Note**:
-Taurus provides support for different executors such as JMeter. Supported executor types can be found [here](https://gettaurus.org/docs/ExecutionSettings/).
-Details about how to use an existing JMeter script are provided [here](https://gettaurus.org/docs/JMeter/). 
+**Using Apache Benchmark**
+
+To execute a scenario using _apache benchmark_ as the executor; In the yaml -
+1. Override the `execution` section and explicitly specify "apache_bench" as the value of `executor`
+2. Override the `scenarios` section and specify the request details under `requests` section
+
+```
+~execution:
+  - executor: apache_bench
+    concurrency: 10
+    hold-for: 300s
+~scenarios:
+  demo:
+    requests:
+    - url: http://127.0.0.1:8080/predictions/squeezenet1_1
+      label: MyInference
+      method: POST
+      file-path: /Users/johndoe/demo/kitten.jpg
+```
+Refer to [examples_apache_bench](tests/examples_apache_bench/examples_apache_bench.yaml) for the complete scenario.
+
+> **Note**:  
+> Taurus provides support for different executors such as JMeter, Apache Benchmark, etc. Supported executor types can be found [here](https://gettaurus.org/docs/ExecutionSettings/).
+> Details about how to use an existing JMeter script are provided [here](https://gettaurus.org/docs/JMeter/).  
 
 
 #### 2. Add metrics to monitor
@@ -154,56 +264,56 @@ Specify the metrics of interest in the services/monitoring section of the yaml.
 
 1. Standalone monitoring server
 
-   Use this technique if MMS and the tests execute on different machines. Before running the test cases, 
-   please start the [metrics_monitoring_server.py](metrics_monitoring_server.py) script. It will communicate server 
+   Use this technique if Model Server and the tests execute on different machines. Before running the test cases, 
+   please start the [metrics_monitoring_server.py](agents/metrics_monitoring_server.py) script. It will communicate server 
    metric data with the test client over sockets. The monitoring server runs on port 9009 by default.
     
-   To start the monitoring server, run the following commands on the MMS host:
+   To start the monitoring server, run the following commands on the Model Server host:
     ```bash 
-    export MMS_HOME=<MMS_HOME_PATH>
-    pip install -r $MMS_HOME/tests/performance/requirements.txt
-    python $MMS_HOME/tests/performance/metrics_monitoring_server.py --start
+    export MODEL_SERVER_HOME=<MODEL_SERVER_HOME_PATH>
+    pip install -r $MODEL_SERVER_HOME/tests/performance/requirements.txt
+    python $MODEL_SERVER_HOME/tests/performance/metrics_monitoring_server.py --start
     ```     
       
    The monitoring section configuration is shown below. 
     
     ```yaml
     services:
-      - module: monitoring
-        server-agent:
-          - address: <mms-host>:9009 # metric monitoring service address
-            label: mms-inference-server  # Specified label will be used in reports instead of ip:port
+      - module: server_monitoring
+        ServerRemoteClient:
+          - address: <Model-Server-host>:9009 # metric monitoring service address
+            label: Model-Server-inference-server  # Specified label will be used in reports instead of ip:port
             interval: 1s    # polling interval
             logging: True # those logs will be saved to "SAlogs_192.168.0.1_9009.csv" in the artifacts dir
             metrics: # metrics should be supported by monitoring service
-              - sum_cpu_percent # cpu percent used by all the mms server processes and workers
+              - sum_cpu_percent # cpu percent used by all the Model server processes and workers
               - sum_memory_percent
               - sum_num_handles
-              - server_workers # no of mms workers
+              - server_workers # no of Model Server workers
     ```
    The complete yaml can be found [here](tests/examples_remote_monitoring/examples_remote_monitoring.yaml)
     
    Use the command below to run the test suite.
     
     ```bash
-    export MMS_HOME=<MMS_HOME_PATH>
-    cd $MMS_HOME/tests/performance
+    export MODEL_SERVER_HOME=<MODEL_SERVER_HOME_PATH>
+    cd $MODEL_SERVER_HOME/tests/performance
     python -m run_performance_suite -p examples_remote_monitoring -e xlarge
     ```
 
 2. Local monitoring plugin
 
-   Use this technique if both MMS and the tests run on the same host.   
+   Use this technique if both Model Server and the tests run on the same host.   
    The monitoring section configuration is shown below.
     
     ```yaml
     modules:
-      server_local_monitoring:
+      server_monitoring:
         # metrics_monitoring_taurus and dependencies should be in python path
         class : metrics_monitoring_taurus.Monitor # monitoring class.
     
     services:
-      - module: server_local_monitoring # should be added in modules section
+      - module: server_monitoring # should be added in modules section
         ServerLocalClient: # keyword from metrics_monitoring_taurus.Monitor
         - interval: 1s
           metrics:
@@ -218,8 +328,8 @@ Specify the metrics of interest in the services/monitoring section of the yaml.
    Use the command below to run the test suite.
     
     ```bash
-    export MMS_HOME=<MMS_HOME_PATH>
-    cd $MMS_HOME/tests/performance
+    export MODEL_SERVER_HOME=<MODEL_SERVER_HOME_PATH>
+    cd $MODEL_SERVER_HOME/tests/performance
     python -m run_performance_suite -p examples_local_monitoring -e xlarge
     ```
 
@@ -235,7 +345,7 @@ pass-fail module from Taurus to achieve this functionality. More details can be
     - module: passfail
       criteria:
       - class: bzt.modules.monitoring.MonitoringCriteria
-        subject: mms-inference-server/sum_num_handles
+        subject: model-server/sum_num_handles
         condition: '>'
         threshold: 180
         timeframe: 1s
@@ -255,19 +365,18 @@ specified in the pass/fail criterion are used for comparison with the previous r
     - module: passfail
       criteria:
       - class: bzt.modules.monitoring.MonitoringCriteria
-        subject: mms-inference-server/sum_num_handles
+        subject: model-server/sum_num_handles
         condition: '>'
         threshold: 180
         timeframe: 1s
         fail: true
         stop: true
-        diff_percent : 30
     
     ```
     Note that 
     1. At least one test suite run on the same environment should have happened in order to do the comparison.
     2. The $artifacts-dir$/<run-dir>/comparison_results.html is a summary report which shows performance difference 
-    between the last two commits.
+    between the current run and user specified compare_with commit_id run.
     3. The test case fails if the diff_percent is greater than the specified value across runs.
 
 3. Metrics available for pass-fail criteria  
@@ -307,19 +416,33 @@ specified in the pass/fail criterion are used for comparison with the previous r
       * total_workers - Total number of workers spawned
       * orphans - Total number of orphan processes
 
+4. Add compare criteria:  
+There are two types of compare criteria you can add for metrics:
+    1. diff_percent_run  
+    This criteria is used to check the percent difference between first and last value of the metric for a run. 
+    In other words it is used to verify if metrics values are same before and after the scenario run. 
+    2. diff_percent_previous  
+    Compare first and last values of previous run of compare_with commit_id. Here we compare first and last value of metrics for current run
+    and previous run and check if percentage difference is not greater than diff_percent_previous. 
+
+Note formula for percentage difference is abs(value1 - value2)/((value1 + value2)/2) * 100
+
+## Guidelines for writing good test cases:
+1. The 'timeframe' duration to check values for threshold criteria should be sufficiently large at least 5 sec. 
+2. The duration specified using 'hold-for' property should also be sufficiently large at least 5 min.
+3. When you use diff_percent_run, make sure that scenario (JMX script) results in deterministic state across different runs.
+
 ## Test Strategy & Cases
 More details about our testing strategy and test cases can be found [here](TESTS.md) 
 
 ## FAQ
 
-Q1. Is it possible to use the performance regression framework to test MMS on Python2.7?
+Q1. Is it possible to use the performance regression framework to test Model Server on Python2.7?
 
 Yes. Even though, the performance regression framework needs Python 3.7+ (as Taurus requires Python 3.7+), there are two
 possible ways to achieve this
-* Please create a Python 2.7 virtual env which runs MMS and a Python 3.7 virtual env which runs 
+* Please create a Python 2.7 virtual env which runs Model Server and a Python 3.7 virtual env which runs 
   the test framework and test cases.
-* Alternatively, deploy the standalone monitoring agent on the MMS instance and run the test cases against the remote
+* Alternatively, deploy the standalone monitoring agent on the Model Server instance and run the test cases against the remote
 server. Note that the standalone monitoring agent works on both Python 2/3. 
 
-
-
diff --git a/tests/performance/agents/config.ini b/tests/performance/agents/config.ini
index aacbe97e1..28827140a 100644
--- a/tests/performance/agents/config.ini
+++ b/tests/performance/agents/config.ini
@@ -6,4 +6,5 @@ HOST =
 PORT = 9009
 
 [suite]
-s3_bucket = mms-performance-regression-reports
\ No newline at end of file
+s3_bucket = shivam-codebuild-test
+comparison_artifacts_dir = perf_comparison_artifacts
\ No newline at end of file
diff --git a/tests/performance/agents/metrics/__init__.py b/tests/performance/agents/metrics/__init__.py
index 9d7bf7eb2..4348a2484 100644
--- a/tests/performance/agents/metrics/__init__.py
+++ b/tests/performance/agents/metrics/__init__.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-""" Customised system and mms process metrics for monitoring and pass-fail criteria in taurus"""
+""" Customised system and Model Server process metrics for monitoring and pass-fail criteria in taurus"""
 
 # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License").
@@ -10,6 +10,7 @@
 # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 # express or implied. See the License for the specific language governing
 # permissions and limitations under the License.
+# pylint: disable=redefined-builtin, redefined-outer-name, broad-except, unused-variable
 
 from enum import Enum
 from statistics import mean
@@ -19,7 +20,7 @@
 
 
 class ProcessType(Enum):
-    """ Type of MMS processes to compute metrics on """
+    """ Type of Server processes to compute metrics on """
     FRONTEND = 1
     WORKER = 2
     ALL = 3
@@ -64,7 +65,8 @@ class ProcessType(Enum):
 misc_metrics = {
     'total_processes': None,
     'total_workers': None,
-    'orphans': None
+    'orphans': None,
+    'zombies': None
 }
 
 AVAILABLE_METRICS = list(system_metrics) + list(misc_metrics)
@@ -85,6 +87,7 @@ class ProcessType(Enum):
                 AVAILABLE_METRICS.append('{}_{}_{}'.format(op, PNAME, metric))
 
 children = set()
+zombie_children = set()
 
 
 def get_metrics(server_process, child_processes, logger):
@@ -92,7 +95,7 @@ def get_metrics(server_process, child_processes, logger):
     """
     result = {}
     children.update(child_processes)
-    logger.debug("children : {0}".format(",".join([str(c.pid) for c in children])))
+    logger.info("children : {0}".format(",".join([str(c.pid) for c in children])))
 
     def update_metric(metric_name, proc_type, stats):
         stats = list(filter(lambda x: isinstance(x, (float, int)), stats))
@@ -116,22 +119,28 @@ def update_metric(metric_name, proc_type, stats):
     try:
         # as_dict() gets all stats in one shot
         processes_stats.append({'type': ProcessType.FRONTEND, 'stats': server_process.as_dict()})
-    except:
+    except Exception as e:
         pass
-    for child in children:
+
+    for child in children | zombie_children:
         try:
             child_cmdline = child.cmdline()
             if psutil.pid_exists(child.pid) and len(child_cmdline) >= 2 and WORKER_NAME in child_cmdline[1]:
                 processes_stats.append({'type': ProcessType.WORKER, 'stats': child.as_dict()})
             else:
                 reclaimed_pids.append(child)
-                logger.debug('child {0} no longer available'.format(child.pid))
-        except (NoSuchProcess, ZombieProcess):
+                logger.info('child {0} no longer available'.format(child.pid))
+        except ZombieProcess:
+            zombie_children.add(child)
+        except NoSuchProcess:
             reclaimed_pids.append(child)
-            logger.debug('child {0} no longer available'.format(child.pid))
+            logger.info('child {0} no longer available'.format(child.pid))
 
     for p in reclaimed_pids:
-        children.remove(p)
+        if p in children:
+            children.remove(p)
+        if p in zombie_children:
+            zombie_children.remove(p)
 
     ### PROCESS METRICS ###
     worker_stats = list(map(lambda x: x['stats'], \
@@ -147,10 +156,11 @@ def update_metric(metric_name, proc_type, stats):
 
     # Total processes
     result['total_processes'] = len(worker_stats) + 1
-    result['total_workers'] = max(len(worker_stats) - 1, 0)
+    result['total_workers'] = len(worker_stats)
     result['orphans'] = len(list(filter(lambda p: p['ppid'] == 1, worker_stats)))
+    result['zombies'] = len(zombie_children)
 
-    ### SYSTEM METRICS ###
+    # ###SYSTEM METRICS ###
     result['system_disk_used'] = psutil.disk_usage('/').used
     result['system_memory_percent'] = psutil.virtual_memory().percent
     system_disk_io_counters = psutil.disk_io_counters()
@@ -160,3 +170,22 @@ def update_metric(metric_name, proc_type, stats):
     result['system_write_bytes'] = system_disk_io_counters.write_bytes
 
     return result
+
+
+if __name__ == "__main__":
+    import logging
+    import sys
+    from agents.utils.process import *
+    from agents import configuration
+
+    logger = logging.getLogger(__name__)
+    logging.basicConfig(stream=sys.stdout, format="%(message)s", level=logging.INFO)
+
+    PID_FILE = configuration.get('server', 'pid_file', 'model_server.pid')
+    server_pid = get_process_pid_from_file(get_server_pidfile(PID_FILE))
+    server_process = get_server_processes(server_pid)
+    children = get_child_processes(server_process)
+
+    metrics = get_metrics(server_process, children, logger)
+
+    print(metrics)
diff --git a/tests/performance/agents/utils/process.py b/tests/performance/agents/utils/process.py
index 8bdfb5078..02cf5d528 100644
--- a/tests/performance/agents/utils/process.py
+++ b/tests/performance/agents/utils/process.py
@@ -17,7 +17,6 @@
 
 import os
 import tempfile
-
 import psutil
 
 
@@ -56,9 +55,7 @@ def get_child_processes(process):
 
 
 def get_server_processes(server_process_pid):
-    """ It caches the main server and child processes at module level.
-    Ensure that you call this process so that MMS process
-    """
+    """get psutil Process object from process id """
     try:
         server_process = psutil.Process(server_process_pid)
     except Exception as e:
@@ -68,4 +65,5 @@ def get_server_processes(server_process_pid):
 
 
 def get_server_pidfile(file):
+    """get temp server pid file"""
     return os.path.join(tempfile.gettempdir(), ".{}".format(file))
diff --git a/tests/performance/requirements.txt b/tests/performance/requirements.txt
index 2fa19c26a..55a4486fe 100644
--- a/tests/performance/requirements.txt
+++ b/tests/performance/requirements.txt
@@ -8,4 +8,5 @@ awscli==1.18.80
 click==7.1.2
 tabulate==0.8.7
 pandas==1.0.3
-termcolor==1.1.0
\ No newline at end of file
+termcolor==1.1.0
+bzt== 1.14.2
\ No newline at end of file
diff --git a/tests/performance/run_performance_suite.py b/tests/performance/run_performance_suite.py
index 45948dff4..aa4d391e6 100755
--- a/tests/performance/run_performance_suite.py
+++ b/tests/performance/run_performance_suite.py
@@ -1,4 +1,4 @@
-
+#!/usr/bin/env python
 
 # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License").
@@ -13,20 +13,20 @@
 """
 Run Performance Regression Test Cases and Generate Reports
 """
-# pylint: disable=redefined-builtin, no-value-for-parameter
+# pylint: disable=redefined-builtin, no-value-for-parameter, unused-argument
 
 import logging
 import os
 import subprocess
 import sys
 import time
+import pathlib
 
 import click
-import pathlib
-from runs.context import ExecutionEnv
-from runs.taurus import get_taurus_options, x2junit, update_taurus_metric_files
 from tqdm import tqdm
 
+from runs.context import ExecutionEnv
+from runs.taurus import get_taurus_options, x2junit, update_taurus_metric_files
 from utils import run_process, Timer, get_sub_dirs
 
 logger = logging.getLogger(__name__)
@@ -71,8 +71,9 @@ def validate_env(ctx, param, value):
 @click.option('--monit/--no-monit', help='Start Monitoring server', default=True)
 @click.option('--compare-local/--no-compare-local', help='Compare with previous run with files stored'
                                                          ' in artifacts directory', default=True)
+@click.option('-c', '--compare-with', help='Compare with commit id, branch, tag, HEAD~N.', default="HEAD~1")
 def run_test_suite(artifacts_dir, test_dir, pattern, exclude_pattern,
-                   jmeter_path, env_name, monit, compare_local):
+                   jmeter_path, env_name, monit, compare_local, compare_with):
     """Collect test suites, run them and generate reports"""
 
     logger.info("Artifacts will be stored in directory %s", artifacts_dir)
@@ -84,8 +85,8 @@ def run_test_suite(artifacts_dir, test_dir, pattern, exclude_pattern,
     else:
         logger.info("Collected tests %s", test_dirs)
 
-    with ExecutionEnv(MONITORING_AGENT, artifacts_dir, env_name, compare_local, monit) as prt:
-        pre_command = 'export PYTHONPATH={}:$PYTHONPATH;'.format(os.path.join(str(ROOT_PATH), "agents"))
+    with ExecutionEnv(MONITORING_AGENT, artifacts_dir, env_name, compare_local, compare_with, monit) as prt:
+        pre_command = 'export PYTHONPATH={}:$PYTHONPATH;'.format(os.path.join(str(ROOT_PATH), "runs", "taurus", "override"))
         for suite_name in tqdm(test_dirs, desc="Test Suites"):
             with Timer("Test suite {} execution time".format(suite_name)) as t:
                 suite_artifacts_dir = os.path.join(artifacts_dir, suite_name)
@@ -95,10 +96,13 @@ def run_test_suite(artifacts_dir, test_dir, pattern, exclude_pattern,
                 test_file = os.path.join(test_dir, suite_name, "{}.yaml".format(suite_name))
                 with x2junit.X2Junit(suite_name, suite_artifacts_dir, prt.reporter, t, env_name) as s:
                     s.code, s.err = run_process("{} bzt {} {} {} {}".format(pre_command, options_str,
-                                                                            test_file, env_yaml_path,
-                                                                            GLOBAL_CONFIG_PATH))
+                                                                            GLOBAL_CONFIG_PATH, test_file,
+                                                                            env_yaml_path))
+
+                    update_taurus_metric_files(suite_artifacts_dir)
+
+    sys.exit(prt.exit_code)
 
-                    update_taurus_metric_files(suite_artifacts_dir, test_file)
 
 if __name__ == "__main__":
     run_test_suite()
diff --git a/tests/performance/runs/compare.py b/tests/performance/runs/compare.py
index 8ebbb4b67..28f9068c6 100644
--- a/tests/performance/runs/compare.py
+++ b/tests/performance/runs/compare.py
@@ -15,9 +15,6 @@
 """
 # pylint: disable=redefined-builtin, self-assigning-variable, broad-except
 
-
-import csv
-import glob
 import logging
 import sys
 import os
@@ -34,16 +31,18 @@
 
 
 class CompareReportGenerator():
+    """Wrapper class to generate the compare report"""
 
-    def __init__(self, path, env_name, local_run):
+    def __init__(self, path, env_name, local_run, compare_with):
         self.artifacts_dir = path
         self.current_run_name = os.path.basename(path)
         self.env_name = env_name
+        self.comare_with = compare_with
         storage_class = LocalStorage if local_run else S3Storage
-        self.storage = storage_class(self.artifacts_dir, self.env_name)
+        self.storage = storage_class(self.artifacts_dir, self.env_name, compare_with)
         self.junit_reporter = None
         self.pandas_result = None
-        self.pass_fail =  True
+        self.pass_fail = True
 
     def gen(self):
         """Driver method to get comparison directory, do the comparison of it with current run directory
@@ -52,10 +51,11 @@ def gen(self):
         compare_dir, compare_run_name = self.storage.get_dir_to_compare()
         if compare_run_name:
             self.junit_reporter, self.pandas_result = compare_artifacts(self.storage.artifacts_dir, compare_dir,
-                                       self.storage.current_run_name, compare_run_name)
+                                                                        self.storage.current_run_name, compare_run_name)
             self.pandas_result.to_csv(os.path.join(self.artifacts_dir, "comparison_result.csv"))
         else:
-            logger.warning("The latest run not found for env.")
+            logger.info("The latest run for comparison was not found for env='%s' and commit_id='%s'.",
+                        self.env_name, self.comare_with)
 
         self.storage.store_results()
         return self.junit_reporter
@@ -87,7 +87,7 @@ def add_test_case(self, name, msg, type):
 
 def get_log_file(dir, sub_dir):
     """Get metric monitoring log files"""
-    metrics_file = os.path.join(dir, sub_dir, "metrics.csv")
+    metrics_file = os.path.join(dir, sub_dir, "metrics_agg.csv")
     return metrics_file if os.path.exists(metrics_file) else None
 
 
@@ -102,11 +102,21 @@ def get_aggregate_val(df, agg_func, col):
     return val
 
 
+def get_centile_val(df, agg_func, col):
+    """Get aggregate values of a pandas dataframe coulmn for given aggregate function"""
+
+    val = None
+    if "metric_name" in df and agg_func in df:
+        val = df[df["metric_name"] == col][agg_func]
+        val = val.iloc[0] if len(val) >= 1 else None
+    return val
+
+
 def compare_values(val1, val2, diff_percent, run_name1, run_name2):
     """ Compare percentage diff values of val1 and val2 """
     if pd.isna(val1) or pd.isna(val2):
-        msg = "Either of the value can not be determined. The run1 value is '{}' and " \
-              "run2 value is {}.".format(val1, val2)
+        msg = "Either of the value can not be determined. run1_value='{}' and " \
+              "run2_value='{}'.".format(val1, val2)
         pass_fail, diff, msg = "error", "NA", msg
     else:
         try:
@@ -116,15 +126,15 @@ def compare_values(val1, val2, diff_percent, run_name1, run_name2):
                 if diff < float(diff_percent):
                     pass_fail, diff, msg = "pass", diff, "passed"
                 else:
-                    msg = "The diff_percent criteria has failed. The expected diff_percent is '{}' and actual " \
-                          "diff percent is '{}' and the '{}' run value is '{}' and '{}' run value is '{}'. ". \
+                    msg = "The diff_percent criteria has failed. Expected='{}', actual='{}' " \
+                          "run1='{}', run1_value='{}', run2='{}', run2_value='{}' ". \
                         format(diff_percent, diff, run_name1, val1, run_name2, val2)
 
                     pass_fail, diff, msg = "fail", diff, msg
             else:  # special case of 0
                 pass_fail, diff, msg = "pass", 0, ""
         except Exception as e:
-            msg = "error while calculating the diff for val1={} and val2={}." \
+            msg = "error while calculating the diff for val1='{}' and val2='{}'." \
                   "Error is: {}".format(val1, val2, str(e))
             logger.info(msg)
             pass_fail, diff, msg = "pass", "NA", msg
@@ -139,7 +149,7 @@ def compare_artifacts(dir1, dir2, run_name1, run_name2):
     sub_dirs_1 = get_sub_dirs(dir1)
 
     over_all_pass = True
-    aggregates = ["mean", "max", "min"]
+    aggregates = ["first_value", "last_value"]
     header = ["run_name1", "run_name2", "test_suite", "metric", "run1", "run2",
               "percentage_diff", "expected_diff", "result", "message"]
     rows = [header]
@@ -161,14 +171,18 @@ def compare_artifacts(dir1, dir2, run_name1, run_name2):
 
             metrics_from_file1 = pd.read_csv(metrics_file1)
             metrics_from_file2 = pd.read_csv(metrics_file2)
-            metrics, diff_percents = taurus_reader.get_compare_metric_list(dir1, sub_dir1)
+            metrics = taurus_reader.get_compare_metric_list(dir1, sub_dir1)
 
-            for col, diff_percent in zip(metrics, diff_percents):
+            for metric_values in metrics:
+                col = metric_values[0]
+                diff_percent = metric_values[1]
+                if diff_percent is None:
+                    continue
                 for agg_func in aggregates:
                     name = "{}_{}".format(agg_func, str(col))
 
-                    val1 = get_aggregate_val(metrics_from_file1, agg_func, col)
-                    val2 = get_aggregate_val(metrics_from_file2, agg_func, col)
+                    val1 = get_centile_val(metrics_from_file1, agg_func, col)
+                    val2 = get_centile_val(metrics_from_file2, agg_func, col)
 
                     diff, pass_fail, msg = compare_values(val1, val2, diff_percent, run_name1, run_name2)
 
@@ -188,3 +202,10 @@ def compare_artifacts(dir1, dir2, run_name1, run_name2):
     dataframe = pd.DataFrame(rows[1:], columns=rows[0])
     return reporter, dataframe
 
+
+if __name__ == "__main__":
+    compare_artifacts(
+        "./run_artifacts/xlarge__5c35d98__1594819866",
+        "./run_artifacts/xlarge__f386038__1594819700",
+        "xlarge__5c35d98__1594819866", "xlarge__f386038__1594819700"
+    )
diff --git a/tests/performance/runs/context.py b/tests/performance/runs/context.py
index a204c67bc..35905ba9b 100644
--- a/tests/performance/runs/context.py
+++ b/tests/performance/runs/context.py
@@ -13,12 +13,13 @@
 """
 Start and stop monitoring server
 """
-# pylint: disable=redefined-builtin
+# pylint: disable=redefined-builtin, broad-except
 
 import logging
 import os
 import sys
 import time
+import subprocess
 import webbrowser
 from termcolor import colored
 
@@ -32,20 +33,32 @@
 logging.basicConfig(stream=sys.stdout, format="%(message)s", level=logging.INFO)
 
 
+def get_git_commit_id(compare_with):
+    """Get short commit id for compare_with commit, branch, tag"""
+    cmd = 'git rev-parse --short {}'.format(compare_with)
+    logger.info("Running command: %s", cmd)
+    commit_id = subprocess.check_output(cmd.split()).decode("utf-8")[:-1]
+    logger.info("Commit id for compare_with='%s' is '%s'", compare_with, commit_id)
+    return commit_id
+
+
 class ExecutionEnv(object):
     """
     Context Manager class to run the performance regression suites
     """
 
-    def __init__(self, agent, artifacts_dir, env, local_run, use=True, check_mms_server_status=False):
+    def __init__(self, agent, artifacts_dir, env, local_run, compare_with, use=True, check_model_server_status=False):
         self.monitoring_agent = agent
         self.artifacts_dir = artifacts_dir
         self.use = use
         self.env = env
         self.local_run = local_run
-        self.check_mms_server_status = check_mms_server_status
+        self.compare_with = get_git_commit_id(compare_with)
+        self.check_model_server_status = check_model_server_status
         self.reporter = JUnitXml()
-        self.compare_reporter_generator = CompareReportGenerator(self.artifacts_dir, self.env, self.local_run)
+        self.compare_reporter_generator = CompareReportGenerator(self.artifacts_dir, self.env, self.local_run,
+                                                                 self.compare_with)
+        self.exit_code = 1
 
     def __enter__(self):
         if self.use:
@@ -56,14 +69,16 @@ def __enter__(self):
 
     @staticmethod
     def open_report(file_path):
+        """Open html report in browser """
         if os.path.exists(file_path):
             return webbrowser.open_new_tab('file://' + os.path.realpath(file_path))
         return False
 
     @staticmethod
     def report_summary(reporter, suite_name):
+        """Create a report summary """
         if reporter and os.path.exists(reporter.junit_html_path):
-            status = reporter.junit_xml.errors or reporter.junit_xml.failures or reporter.junit_xml.skipped
+            status = reporter.junit_xml.errors or reporter.junit_xml.failures
             status, code, color = ("failed", 3, "red") if status else ("passed", 0, "green")
 
             msg = "{} run has {}.".format(suite_name, status)
@@ -86,13 +101,20 @@ def __exit__(self, type, value, traceback):
 
         junit_reporter = JunitConverter(self.reporter, self.artifacts_dir, 'performance_results')
         junit_reporter.generate_junit_report()
-        junit_compare = self.compare_reporter_generator.gen()
         junit_compare_reporter = None
-        if junit_compare:
-            junit_compare_reporter = JunitConverter(junit_compare, self.artifacts_dir, 'comparison_results')
-            junit_compare_reporter.generate_junit_report()
+        try:
+            junit_compare = self.compare_reporter_generator.gen()
+            if junit_compare:
+                junit_compare_reporter = JunitConverter(junit_compare, self.artifacts_dir, 'comparison_results')
+                junit_compare_reporter.generate_junit_report()
+        except Exception as e:
+            logger.info("Exception has occured while comparing results", exc_info=1)
 
         compare_exit_code = ExecutionEnv.report_summary(junit_compare_reporter, "Comparison Test suite")
         exit_code = ExecutionEnv.report_summary(junit_reporter, "Performance Regression Test suite")
 
-        sys.exit(0 if 0 == exit_code == compare_exit_code else 3)
+        self.exit_code = 0 if 0 == exit_code == compare_exit_code else 3
+
+        # Return True needed so that __exit__ method do no ignore the exception
+        # otherwise exception are not reported
+        return False
diff --git a/tests/performance/runs/junit.py b/tests/performance/runs/junit.py
index 8ff8f1951..fb3c41c41 100644
--- a/tests/performance/runs/junit.py
+++ b/tests/performance/runs/junit.py
@@ -19,13 +19,16 @@
 import html
 import textwrap
 import tabulate
-from utils import run_process
 from junitparser import JUnitXml
 
+from utils import run_process
+
+
 header = ["suite_name", "test_case", "result", "message"]
 
 
 class JunitConverter():
+    """Convert JUnit XML object to XML and HTML report"""
 
     def __init__(self, junit_xml, out_dir, report_name):
         self.junit_xml = junit_xml
@@ -50,7 +53,7 @@ def pretty_text(data):
 def junit2array(junit_xml):
     """convert junit xml junitparser.JUnitXml object to 2d array """
     rows = [header]
-    for i, suite in enumerate(junit_xml):
+    for _, suite in enumerate(junit_xml):
         if len(suite) == 0:
             rows.append([suite.name, "", "skipped",
                          "No criteria specified or there is an error."])
diff --git a/tests/performance/runs/storage.py b/tests/performance/runs/storage.py
index 6db69716c..5e6f04ac6 100644
--- a/tests/performance/runs/storage.py
+++ b/tests/performance/runs/storage.py
@@ -20,25 +20,27 @@
 import os
 import sys
 import shutil
+import pathlib
 
 import boto3
-import pathlib
-from agents import configuration
 
+from agents import configuration
 from utils import run_process
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(stream=sys.stdout, format="%(message)s", level=logging.INFO)
 S3_BUCKET = configuration.get('suite', 's3_bucket')
+S3_COMPARE_DIR = configuration.get('suite', 'comparison_artifacts_dir')
 
 
 class Storage():
     """Class to store and retrieve artifacts"""
 
-    def __init__(self, path, env_name):
+    def __init__(self, path, env_name, compare_with):
         self.artifacts_dir = path
         self.current_run_name = os.path.basename(path)
         self.env_name = env_name
+        self.compare_with = compare_with
 
     def get_dir_to_compare(self):
         """get the artifacts dir to compare to"""
@@ -47,7 +49,7 @@ def store_results(self):
         """Store the results"""
 
     @staticmethod
-    def get_latest(names, env_name, exclude_name):
+    def get_latest(names, env_name, exclude_name, compare_with):
         """
         Get latest directory for same env_name name given a list of them.
         :param names: list of folder names in the format env_name___commitid__timestamp
@@ -59,7 +61,8 @@ def get_latest(names, env_name, exclude_name):
         latest_run = ''
         for run_name in names:
             run_name_list = run_name.split('__')
-            if env_name == run_name_list[0] and run_name != exclude_name:
+            if env_name == run_name_list[0] and compare_with == run_name_list[1] \
+                    and run_name != exclude_name:
                 if int(run_name_list[2]) > max_ts:
                     max_ts = int(run_name_list[2])
                     latest_run = run_name
@@ -76,7 +79,7 @@ def get_dir_to_compare(self):
         """Get latest run directory name to be compared with"""
         parent_dir = pathlib.Path(self.artifacts_dir).parent
         names = [di for di in os.listdir(parent_dir) if os.path.isdir(os.path.join(parent_dir, di))]
-        latest_run = self.get_latest(names, self.env_name, self.current_run_name)
+        latest_run = self.get_latest(names, self.env_name, self.current_run_name, self.compare_with)
         return os.path.join(parent_dir, latest_run), latest_run
 
 
@@ -90,22 +93,26 @@ def get_dir_to_compare(self):
         comp_data_path = os.path.join(self.artifacts_dir, "comp_data")
         s3 = boto3.resource('s3')
         bucket = s3.Bucket(S3_BUCKET)
+        prefix = S3_COMPARE_DIR+"/"
         result = bucket.meta.client.list_objects(Bucket=bucket.name,
+                                                 Prefix=prefix,
                                                  Delimiter='/')
         run_names = []
-        for o in result.get('CommonPrefixes'):
-            run_names.append(o.get('Prefix')[:-1])
+        if result.get('CommonPrefixes') is not None:
+            for o in result.get('CommonPrefixes'):
+                prefix_list = o.get('Prefix').split('/')
+                run_names.append(prefix_list[len(prefix_list) - 2])
 
-        latest_run = self.get_latest(run_names, self.env_name, self.current_run_name)
+        latest_run = self.get_latest(run_names, self.env_name, self.current_run_name, self.compare_with)
         if not latest_run:
-            logger.info("No run found for env_id %s", self.env_name)
+            logger.info("No run artifacts folder found for env_id %s", self.env_name)
             return '', ''
 
         if not os.path.exists(comp_data_path):
             os.makedirs(comp_data_path)
 
         tgt_path = os.path.join(comp_data_path, latest_run)
-        run_process("aws s3 cp  s3://{}/{} {} --recursive".format(bucket.name, latest_run, tgt_path))
+        run_process("aws s3 cp  s3://{}/{}/{} {} --recursive".format(bucket.name, S3_COMPARE_DIR, latest_run, tgt_path))
 
         return tgt_path, latest_run
 
@@ -115,5 +122,5 @@ def store_results(self):
         if os.path.exists(comp_data_path):
             shutil.rmtree(comp_data_path)
 
-        run_process("aws s3 cp {} s3://{}/{}  --recursive".format(self.artifacts_dir, S3_BUCKET,
-                                                                  self.current_run_name))
+        run_process("aws s3 cp {} s3://{}/{}/{}  --recursive".format(self.artifacts_dir, S3_BUCKET, S3_COMPARE_DIR,
+                                                                     self.current_run_name))
diff --git a/tests/performance/runs/taurus/__init__.py b/tests/performance/runs/taurus/__init__.py
index 4a07717ec..cadc5af82 100644
--- a/tests/performance/runs/taurus/__init__.py
+++ b/tests/performance/runs/taurus/__init__.py
@@ -18,10 +18,17 @@
 import glob
 import shutil
 import os
+import sys
+import logging
 
+from utils.pyshell import run_process
 from .reader import get_mon_metrics_list
 
 
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, format="%(message)s", level=logging.INFO)
+
+
 def get_taurus_options(artifacts_dir, jmeter_path=None):
     """The options for Taurus BZT command"""
     options = []
@@ -29,19 +36,20 @@ def get_taurus_options(artifacts_dir, jmeter_path=None):
         options.append('-o modules.jmeter.path={}'.format(jmeter_path))
     options.append('-o settings.artifacts-dir={}'.format(artifacts_dir))
     options.append('-o modules.console.disable=true')
-    options.append('-o settings.env.BASEDIR={}'.format(artifacts_dir))
+    options.append('-o settings.env.ARTIFACTS_DIR={}'.format(artifacts_dir))
     options_str = ' '.join(options)
 
     return options_str
 
 
-def update_taurus_metric_files(suite_artifacts_dir, test_file):
+def update_taurus_metric_files(suite_artifacts_dir):
     """
     It renames the server and local metric monitoring log files to metrics.csv.
     The order of the columns in header of server metric monitoring SALogs file generated by taurus
     is not inline with data. So as a work around this function rewrites the header based on order
     defined in the test yaml.
     """
+    test_file = os.path.join(suite_artifacts_dir, "effective.yml")
     metrics_new_file = os.path.join(suite_artifacts_dir, "metrics.csv")
 
     server_metric_file_pattern = os.path.join(suite_artifacts_dir, "SAlogs_*")
@@ -58,6 +66,18 @@ def update_taurus_metric_files(suite_artifacts_dir, test_file):
         os.rename(metrics_log_file[0], metrics_new_file)
 
     else:
-         metrics_log_file = os.path.join(suite_artifacts_dir, "local_monitoring_logs.csv")
-         if os.path.exists(metrics_log_file):
-             os.rename(metrics_log_file, metrics_new_file)
+        metrics_log_file = os.path.join(suite_artifacts_dir, "local_monitoring_logs.csv")
+        if os.path.exists(metrics_log_file):
+            os.rename(metrics_log_file, metrics_new_file)
+
+    KEEP_LINES = 10000
+
+    def handle_big_files(name):
+        report_file = os.path.join(suite_artifacts_dir, name)
+        report_tmp_file = os.path.join(suite_artifacts_dir, "{}_tmp".format(name))
+        if os.path.exists(report_file) and os.stat(report_file).st_size > 1e+7:  # 10MB
+            logger.info("Keeping first %s records from file %s as it is >10MB", KEEP_LINES, report_file)
+            run_process("head -{0} {1} > {2}; mv {2} {1};".format(KEEP_LINES, report_file, report_tmp_file))
+
+    handle_big_files("error.jtl")
+    handle_big_files("kpi.jtl")
diff --git a/tests/performance/runs/taurus/override/__init__.py b/tests/performance/runs/taurus/override/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/performance/runs/taurus/override/apache_bench.py b/tests/performance/runs/taurus/override/apache_bench.py
new file mode 100644
index 000000000..026cbb119
--- /dev/null
+++ b/tests/performance/runs/taurus/override/apache_bench.py
@@ -0,0 +1,129 @@
+"""
+Module add support for POST, PUT, OPTIONS and DELETE methods to Apache Benchmark
+"""
+import mimetypes
+import os
+
+from math import ceil
+from distutils.version import LooseVersion
+
+from bzt import TaurusConfigError
+from bzt.modules.aggregator import ConsolidatingAggregator
+from bzt.six import iteritems
+from bzt.utils import dehumanize_time
+from bzt.modules.ab import ApacheBenchmarkExecutor, TSVDataReader
+
+
+class ApacheBenchmarkExecutor(ApacheBenchmarkExecutor):
+    """
+    Apache Benchmark executor module
+    """
+
+    def prepare(self):
+        super(ApacheBenchmarkExecutor, self).prepare()
+        self.scenario = self.get_scenario()
+        self.install_required_tools()
+
+        self._tsv_file = self.engine.create_artifact("ab", ".tsv")
+
+        self.stdout = open(self.engine.create_artifact("ab", ".out"), 'w')
+        self.stderr = open(self.engine.create_artifact("ab", ".err"), 'w')
+
+        self.reader = TSVDataReader(self._tsv_file, self.log)
+        if isinstance(self.engine.aggregator, ConsolidatingAggregator):
+            self.engine.aggregator.add_underling(self.reader)
+
+    def startup(self):
+        args = [self.tool.tool_path]
+        load = self.get_load()
+        load_iterations = load.iterations or 1
+        load_concurrency = load.concurrency or 1
+
+        if load.hold:
+            hold = int(ceil(dehumanize_time(load.hold)))
+            args += ['-t', str(hold)]
+        else:
+            args += ['-n', str(load_iterations * load_concurrency)]  # ab waits for total number of iterations
+
+        timeout = self.get_scenario().get("timeout", None)
+        if timeout:
+            args += ['-s', str(ceil(dehumanize_time(timeout)))]
+
+        args += ['-c', str(load_concurrency)]
+        args += ['-d']  # do not print 'Processed *00 requests' every 100 requests or so
+        args += ['-r']  # do not crash on socket level errors
+
+        if self.tool.version and LooseVersion(self.tool.version) >= LooseVersion("2.4.7"):
+            args += ['-l']  # accept variable-len responses
+
+        args += ['-g', str(self._tsv_file)]  # dump stats to TSV file
+
+        # add global scenario headers
+        for key, val in iteritems(self.scenario.get_headers()):
+            args += ['-H', "%s: %s" % (key, val)]
+
+        requests = self.scenario.get_requests()
+        if not requests:
+            raise TaurusConfigError("You must specify at least one request for ab")
+        if len(requests) > 1:
+            self.log.warning("ab doesn't support multiple requests. Only first one will be used.")
+        request = self.__first_http_request()
+        if request is None:
+            raise TaurusConfigError("ab supports only HTTP requests, while scenario doesn't have any")
+
+        # add request-specific headers
+        for key, val in iteritems(request.headers):
+            args += ['-H', "%s: %s" % (key, val)]
+
+        # if request.method != 'GET':
+        #     raise TaurusConfigError("ab supports only GET requests, but '%s' is found" % request.method)
+
+        if request.method == 'HEAD':
+            args += ['-i']
+        elif request.method in ['POST', 'PUT']:
+            options = {'POST': '-p', 'PUT': '-u'}
+            file_path = request.config['file-path']
+            if not file_path:
+                file_path = os.devnull
+                self.log.warning("No file path specified, dev null will be used instead")
+            args += [options[request.method], file_path]
+            content_type = request.config['content-type'] or mimetypes.guess_type(file_path)[0]
+            if content_type:
+                args += ['-T', content_type]
+        else: # 'GET', 'OPTIONS', 'DELETE', etc
+            args += ['-m', request.method]
+
+        if request.priority_option('keepalive', default=True):
+            args += ['-k']
+
+        args += [request.url]
+
+        self.reader.setup(load_concurrency, request.label)
+
+        self.log.info('Executing command : ' + ' '.join(arg for arg in args))
+        self.process = self._execute(args)
+
+
+class TSVDataReader(TSVDataReader):
+    def _read(self, last_pass=False):
+        lines = self.file.get_lines(size=1024 * 1024, last_pass=last_pass)
+
+        for line in lines:
+            if not self.skipped_header:
+                self.skipped_header = True
+                continue
+            log_vals = [val.strip() for val in line.split('\t')]
+
+            _error = None
+            # _rstatus = None
+            _rstatus = '' #Hack to trick taurus into computing aggreated stats
+
+            _url = self.url_label
+            _concur = self.concurrency
+            _tstamp = int(log_vals[1])  # timestamp - moment of request sending
+            _con_time = float(log_vals[2]) / 1000.0  # connection time
+            _etime = float(log_vals[4]) / 1000.0  # elapsed time
+            _latency = float(log_vals[5]) / 1000.0  # latency (aka waittime)
+            _bytes = None
+
+            yield _tstamp, _url, _concur, _etime, _con_time, _latency, _rstatus, _error, '', _bytes
\ No newline at end of file
diff --git a/tests/performance/agents/metrics_monitoring_inproc.py b/tests/performance/runs/taurus/override/metrics_monitoring.py
similarity index 80%
rename from tests/performance/agents/metrics_monitoring_inproc.py
rename to tests/performance/runs/taurus/override/metrics_monitoring.py
index ed5788187..483ef7317 100644
--- a/tests/performance/agents/metrics_monitoring_inproc.py
+++ b/tests/performance/runs/taurus/override/metrics_monitoring.py
@@ -24,9 +24,9 @@
 from bzt.modules import monitoring
 from bzt.utils import dehumanize_time
 
-import configuration
-from metrics import get_metrics, AVAILABLE_METRICS as AVAILABLE_SERVER_METRICS
-from utils.process import get_process_pid_from_file, get_server_processes, \
+from agents import  configuration
+from agents.metrics import get_metrics, AVAILABLE_METRICS as AVAILABLE_SERVER_METRICS
+from agents.utils.process import get_process_pid_from_file, get_server_processes, \
     get_child_processes, get_server_pidfile
 
 
@@ -42,6 +42,7 @@ class Monitor(monitoring.Monitoring):
     def __init__(self):
         super(Monitor, self).__init__()
         self.client_classes.update({'ServerLocalClient': ServerLocalClient})
+        self.client_classes.update({'ServerRemoteClient': ServerRemoteClient})
 
 
 class ServerLocalClient(monitoring.LocalClient):
@@ -58,6 +59,11 @@ def __init__(self, parent_log, label, config, engine=None):
         else:
             self.label = 'ServerLocalClient'
 
+    def disconnect(self):
+        self.log.info("Last metric values before shutdown")
+        self.interval = 0
+        self.get_data()
+
     def connect(self):
         exc = TaurusConfigError('Metric is required in Local monitoring client')
         metric_names = self.config.get('metrics', exc)
@@ -86,6 +92,20 @@ def connect(self):
                     logs_writer.writerow(metrics)
 
 
+class ServerRemoteClient(monitoring.ServerAgentClient):
+    """Custom server remote client """
+    def get_data(self):
+        result = super().get_data()
+        # Logging for custom metric values
+        msg = []
+        for res in result:
+            for metric_name in self.config.get("metrics"):
+                metric_value = res[metric_name]
+                msg.append("{0} : {1}".format(metric_name, metric_value))
+            self.log.info("{0}".format(" -- ".join(msg)))
+        return result
+
+
 class ServerLocalMonitor(monitoring.LocalMonitor):
     """Custom server local monitor"""
 
diff --git a/tests/performance/runs/taurus/reader.py b/tests/performance/runs/taurus/reader.py
index 2222afdbe..700039f04 100644
--- a/tests/performance/runs/taurus/reader.py
+++ b/tests/performance/runs/taurus/reader.py
@@ -21,22 +21,31 @@
 
 
 def get_mon_metrics_list(test_yaml_path):
-    """Utility method to get list of server-agent metrics which are being monitored from a test yaml file"""
+    """Utility method to get list of ServerRemoteClient metrics which are being monitored from a test yaml file"""
     metrics = []
     with open(test_yaml_path) as test_yaml:
         test_yaml = yaml.safe_load(test_yaml)
         for rep_section in test_yaml.get('services', []):
-            if rep_section.get('module', None) == 'monitoring' and "server-agent" in rep_section:
-                for mon_section in rep_section.get('server-agent', []):
+            if rep_section.get('module', None) == 'server_monitoring' and "ServerRemoteClient" in rep_section:
+                for mon_section in rep_section.get('ServerRemoteClient', []):
                     if isinstance(mon_section, dict):
                         metrics.extend(mon_section.get('metrics', []))
 
     return metrics
 
 
-def get_compare_metric_list(dir, sub_dir):
+def parse_criterion_sec(criterion):
+    subject = criterion["subject"]
+    metric = subject.rsplit('/', 1)
+    metric = metric[1] if len(metric) == 2 else metric[0]
+    diff_percent_prev = criterion.get("diff_percent_previous", None)
+    diff_percent_run = criterion.get("diff_percent_run", None)
+
+    return [metric, diff_percent_prev, diff_percent_run]
+
+
+def get_compare_metric_list_taurus(dir, sub_dir):
     """Utility method to get list of compare monitoring metrics identified by diff_percent property"""
-    diff_percents = []
     metrics = []
     test_yaml = os.path.join(dir, sub_dir, "effective.yml")
     with open(test_yaml) as test_yaml:
@@ -45,13 +54,21 @@ def get_compare_metric_list(dir, sub_dir):
             if rep_section.get('module', None) == 'passfail':
                 for criterion in rep_section.get('criteria', []):
                     if isinstance(criterion, dict) and 'monitoring' in criterion.get('class', ''):
-                        subject = criterion["subject"]
-                        metric = subject.rsplit('/', 1)
-                        metric = metric[1] if len(metric) == 2 else metric[0]
-                        diff_percent = criterion.get("diff_percent", None)
+                        metrics.append(parse_criterion_sec(criterion))
+
+    return metrics
+
 
-                        if diff_percent:
-                            metrics.append(metric)
-                            diff_percents.append(diff_percent)
+def get_compare_metric_list(dir, sub_dir):
+    """Utility method to get list of compare monitoring metrics identified by diff_percent property"""
+    metrics = []
+    test_yaml = os.path.join(dir, sub_dir, "effective.yml")
+    with open(test_yaml) as test_yaml:
+        test_yaml = yaml.safe_load(test_yaml)
+        sec = test_yaml.get('compare_criteria', [])
+        if sec:
+            for criterion in sec:
+                if criterion:
+                    metrics.append(parse_criterion_sec(criterion))
 
-    return metrics, diff_percents
+    return metrics
diff --git a/tests/performance/runs/taurus/x2junit.py b/tests/performance/runs/taurus/x2junit.py
index 0219ef76d..71320a46d 100644
--- a/tests/performance/runs/taurus/x2junit.py
+++ b/tests/performance/runs/taurus/x2junit.py
@@ -13,19 +13,26 @@
 """
 Convert the Taurus Test suite XML to Junit XML
 """
-# pylint: disable=redefined-builtin
+# pylint: disable=redefined-builtin, unused-variable, broad-except
 
 
 import os
+import html
 
+import pandas as pd
+import tabulate
+from bzt.modules.passfail import DataCriterion
 from junitparser import TestCase, TestSuite, JUnitXml, Skipped, Error, Failure
 
+from runs.taurus.reader import get_compare_metric_list
+
 
 class X2Junit(object):
     """
        Context Manager class to do convert Taurus Test suite XML report which is in Xunit specifications
        to JUnit XML report.
     """
+
     def __init__(self, name, artifacts_dir, junit_xml, timer, env_name):
         self.ts = TestSuite(name)
         self.name = name
@@ -33,44 +40,190 @@ def __init__(self, name, artifacts_dir, junit_xml, timer, env_name):
         self.timer = timer
         self.artifacts_dir = artifacts_dir
         self.env_name = env_name
+        self.metrics = None
+        self.metrics_agg_dict = {}
+
+        self.code = 0
+        self.err = ""
+
+        self.ts.tests, self.ts.failures, self.ts.skipped, self.ts.errors = 0, 0, 0, 0
 
     def __enter__(self):
         return self
 
+    def add_compare_tests(self):
+        """Add compare test for a run.
+        Compare actual percentage difference between fist value and last value against provided difference."""
+
+        compare_list = get_compare_metric_list(self.artifacts_dir, "")
+        for metric_values in compare_list:
+            col = metric_values[0]
+            diff_percent = metric_values[2]
+            try:
+                diff_percent = float(diff_percent)
+            except Exception as e:
+                diff_percent = None
+            tc = TestCase("{}_diff_run > {}".format(col, diff_percent))
+            if diff_percent is None:
+                tc.result = Skipped("diff_percent_run value is not mentioned")
+                self.ts.skipped += 1
+            elif self.metrics is None:
+                tc.result = Skipped("Metrics are not captured")
+                self.ts.skipped += 1
+            else:
+                col_metric_values = getattr(self.metrics, col, None)
+                if col_metric_values is None:
+                    tc.result = Error("Metric {} is not captured".format(col))
+                    self.ts.errors += 1
+                elif len(col_metric_values) < 2:
+                    tc.result = Skipped("Enough values are not captured")
+                    self.ts.errors += 1
+                else:
+                    first_value = col_metric_values.iloc[0]
+                    last_value = col_metric_values.iloc[-1]
+
+                    try:
+                        if last_value == first_value == 0:
+                            diff_actual = 0
+                        else:
+                            diff_actual = (abs(last_value - first_value) / ((last_value + first_value) / 2)) * 100
+
+                        if float(diff_actual) <= float(diff_percent):
+                            self.ts.tests += 1
+                        else:
+                            tc.result = Failure("The first value and last value of run are {}, {} "
+                                                "with percent diff {}".format(first_value, last_value, diff_actual))
+
+                    except Exception as e:
+                        tc.result = Error("Error while comparing values {}".format(str(e)))
+                        self.ts.errors += 1
+
+            self.ts.add_testcase(tc)
+
+    @staticmethod
+    def casename_to_criteria(test_name):
+        """Extract metric from Taurus pass/fail criteria string"""
+        metric = None
+        if ' of ' not in test_name:
+            test_name = "label of {}".format(test_name)
+        try:
+            test_name = html.unescape(html.unescape(test_name))
+            criteria = DataCriterion.string_to_config(test_name)
+        except Exception as e:
+            return None
+
+        label = criteria["label"].split('/')
+        if len(label) == 2:
+            metric = label[1]
+        return metric
+
+    def percentile_values(self, metric_name):
+        """Calculate percentile values for metric_name column in self.metrics pandas df"""
+        values = {}
+        if self.metrics is not None and metric_name is not None:
+            metric_vals = getattr(self.metrics, metric_name, None)
+            if metric_vals is not None:
+                centile_values = [0, 0.5, 0.9, 0.95, 0.99, 0.999, 1]
+                for centile in centile_values:
+                    val = getattr(metric_vals, 'quantile')(centile)
+                    values.update({str(centile * 100) + "%": val})
+
+        return values
+
+    def update_metrics(self):
+        """ Update self.mertics and self.metrics_agg_dict"""
+        metrics_file = os.path.join(self.artifacts_dir, "metrics.csv")
+        rows = []
+        agg_dict = {}
+        if os.path.exists(metrics_file):
+            self.metrics = pd.read_csv(metrics_file)
+            centile_values = [0, 0.5, 0.9, 0.95, 0.99, 0.999, 1]
+            header_names = ['test_name', 'metric_name']
+            header_names.extend([str(colname * 100) + "%" for colname in centile_values])
+            header_names.extend(['first_value', 'last_value'])
+            if self.metrics.size:
+                for col in self.metrics.columns:
+                    row = [self.name, str(col)]
+                    metric_vals = getattr(self.metrics, str(col), None)
+                    for centile in centile_values:
+                        row.append(getattr(metric_vals, 'quantile')(centile))
+                    row.extend([metric_vals.iloc[0], metric_vals.iloc[-1]])
+                    agg_dict.update({row[1]: dict(zip(header_names[2:], row[2:]))})
+                    rows.append(row)
+
+                dataframe = pd.DataFrame(rows, columns=header_names)
+                print("Metric percentile values:\n")
+                print(tabulate.tabulate(rows, headers=header_names, tablefmt="grid"))
+                dataframe.to_csv(os.path.join(self.artifacts_dir, "metrics_agg.csv"), index=False)
+
+        self.metrics_agg_dict = agg_dict
+
     def __exit__(self, type, value, traceback):
+        print("error code is " + str(self.code))
+
+        self.update_metrics()
         xunit_file = os.path.join(self.artifacts_dir, "xunit.xml")
-        tests, failures, skipped, errors = 0, 0, 0, 0
-        if os.path.exists(xunit_file):
+        if self.code not in [0, 3]:  # 0-no error, 3-pass/fail
+            tc = TestCase(self.name)
+            tc.result = Error(self.err)
+            self.ts.add_testcase(tc)
+        elif os.path.exists(xunit_file):
             xml = JUnitXml.fromfile(xunit_file)
             for i, suite in enumerate(xml):
                 for case in suite:
                     name = "scenario_{}: {}".format(i, case.name)
                     result = case.result
+
+                    metric_name = X2Junit.casename_to_criteria(case.name)
+                    values = self.metrics_agg_dict.get(metric_name, None)
+                    msg = result.message if result else ""
+                    if values:
+                        val_msg = "Actual percentile values are {}".format(values)
+                        msg = "{}. {}".format(msg, val_msg)
+
                     if isinstance(result, Error):
-                        failures += 1
-                        result = Failure(result.message, result.type)
+                        self.ts.failures += 1
+                        result = Failure(msg, result.type)
                     elif isinstance(result, Failure):
-                        errors += 1
-                        result = Error(result.message, result.type)
+                        self.ts.errors += 1
+                        result = Error(msg, result.type)
                     elif isinstance(result, Skipped):
-                        skipped += 1
+                        self.ts.skipped += 1
+                        result = Skipped(msg, result.type)
                     else:
-                        tests += 1
+                        self.ts.tests += 1
 
                     tc = TestCase(name)
                     tc.result = result
                     self.ts.add_testcase(tc)
         else:
             tc = TestCase(self.name)
-            tc.result = Skipped()
+            tc.result = Skipped("Skipped criteria test cases as Taurus XUnit file is not generated.")
             self.ts.add_testcase(tc)
 
+        self.add_compare_tests()
+
         self.ts.hostname = self.env_name
         self.ts.timestamp = self.timer.start
         self.ts.time = self.timer.diff()
-        self.ts.tests = tests
-        self.ts.failures = failures
-        self.ts.skipped = skipped
-        self.ts.errors = errors
         self.ts.update_statistics()
         self.junit_xml.add_testsuite(self.ts)
+
+        # Return False needed so that __exit__ method do no ignore the exception
+        # otherwise exception are not reported
+        return False
+
+
+if __name__ == "__main__":
+    from utils.timer import Timer
+
+    with Timer("ads") as t:
+        test_folder = './run_artifacts/xlarge__7bc1982__1594795786/scale_up_workers'
+        x = X2Junit("test", test_folder, JUnitXml(), t, "xlarge")
+
+    # x.update_metrics()
+    # x.add_compare_tests()
+
+    x.__exit__(None, None, None)
+    print(x.ts)
+    print("a")
diff --git a/tests/performance/tests/api_description/api_description.jmx b/tests/performance/tests/api_description/api_description.jmx
index a026cb312..69a31b5cf 100644
--- a/tests/performance/tests/api_description/api_description.jmx
+++ b/tests/performance/tests/api_description/api_description.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS API Description Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server API Description Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
diff --git a/tests/performance/tests/api_description/api_description.yaml b/tests/performance/tests/api_description/api_description.yaml
index da3316228..88546be71 100644
--- a/tests/performance/tests/api_description/api_description.yaml
+++ b/tests/performance/tests/api_description/api_description.yaml
@@ -1,68 +1,19 @@
----
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 30s
-  scenario: api_description
-
-scenarios:
-  api_description:
-    script: api_description.jmx
-
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
-
-services:
-  - module: shellexec
-    prepare:
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 10s"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - sum_all_memory_rss
 
 reporting:
 - module: passfail
   criteria:
     # Inbuilt Criteria
-    - success of ManagementAPIDescription<${MGMT_DESC_SUCC}, stop as failed
-    - success of InferenceAPIDescription<${INFR_DESC_SUCC}, stop as failed
-    - avg-rt of ManagementAPIDescription>${MGMT_DESC_RT}, stop as failed
-    - avg-rt of InferenceAPIDescription>${INFR_DESC_RT}, stop as failed
-    # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '<'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
-      timeframe: 5s
-      stop : true
-      fail : true
+    - success of ManagementAPIDescription<${MGMT_DESC_SUCC}
+    - avg-rt of ManagementAPIDescription>${MGMT_DESC_AVG_RT}
+#    # Custom Criteria
 #    - class: bzt.modules.monitoring.MonitoringCriteria
-#      subject: ServerLocalClient/sum_all_memory_rss
-#      condition: '>'
-#      threshold: ${TOTAL_MEM}
-#      timeframe: 5s
-#      stop : true
-#      fail : true
\ No newline at end of file
+#      subject: ServerLocalClient/total_processes
+#      condition: '<'
+#      threshold: ${TOTAL_PROCS}
+#      timeframe: 1s
+#      stop : ${STOP}
+#      fail : true
+
+scenarios:
+  ~scenario_0:
+    script: api_description.jmx
\ No newline at end of file
diff --git a/tests/performance/tests/api_description/environments/xlarge.yaml b/tests/performance/tests/api_description/environments/xlarge.yaml
index f7bc5561d..be4e38930 100644
--- a/tests/performance/tests/api_description/environments/xlarge.yaml
+++ b/tests/performance/tests/api_description/environments/xlarge.yaml
@@ -1,10 +1,52 @@
 ---
 settings:
   env:
-    MGMT_DESC_SUCC: 100%
-    INFR_DESC_SUCC: 100%
-    MGMT_DESC_RT : 10ms
-    INFR_DESC_RT : 10ms
+    MGMT_DESC_SUCC: 80%
+    MGMT_DESC_AVG_RT: 30ms
+
+    API_LABEL : ManagementAPIDescription
+    API_SUCCESS : 80%
+    API_AVG_RT : 30ms
+
+    TOTAL_WORKERS: 0
+    TOTAL_WORKERS_MEM: 0
+    TOTAL_WORKERS_FDS: 0
+
+    TOTAL_MEM : 1500098304
     TOTAL_PROCS : 1
-    TOTAL_FDS : 73
-    TOTAL_MEM: 100000000 #100MB
+    TOTAL_FDS : 10
+
+    FRNTEND_MEM: 1500098304
+
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 0
+    TOTAL_WORKERS_FDS_RUN_DIFF: 0
+    TOTAL_MEM_RUN_DIFF: 185
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 185
+    FRNTEND_MEM_RUN_DIFF: 30
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 30s
+    SCRIPT : api_description.jmx
+
+    STOP : ''
+    STOP_ALIAS: continue
\ No newline at end of file
diff --git a/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.jmx b/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.jmx
index d119e9b7f..de4e2a00d 100644
--- a/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.jmx
+++ b/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Benchmarking Image Input Model Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Benchmarking Image Input Model Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,12 +16,12 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model1" elementType="Argument">
             <stringProp name="Argument.name">model1</stringProp>
-            <stringProp name="Argument.value">${__P(model_name1,resnet-152)}</stringProp>
+            <stringProp name="Argument.value">${__P(RESNET_152_BATCH_NAME,resnet-152-batch)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
           <elementProp name="model2" elementType="Argument">
             <stringProp name="Argument.name">model2</stringProp>
-            <stringProp name="Argument.value">${__P(model_name2,squeezenet_v1.1)}</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
         </collectionProp>
diff --git a/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.yaml b/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.yaml
index 73e9ab957..62f4af8ad 100644
--- a/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.yaml
+++ b/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.yaml
@@ -1,96 +1,22 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 5s
-  hold-for: 20s
-  scenario: Inference
-
 scenarios:
-  Inference:
+  scenario_0:
     script: batch_and_single_inference.jmx
 
-modules:
-  server_local_monitoring:
-    # metrics_monitoring_inproc and dependencies should be in python path
-    class : metrics_monitoring_inproc.Monitor # monitoring class.
 
 services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/examples/resnet-152-batching/resnet-152.mar&batch_size=8&max_batch_delay=50"
+      - "curl -s -X POST http://localhost:8081/models?url=${RESNET_152_BATCH_URL}&batch_size=8&max_batch_delay=50"
       # uncomment below and comment prev and use downloaded model with model-store
-      #- curl -s -X POST "http://localhost:8081/models?url=resnet-152.mar&batch_size=8&max_batch_delay=60&initial_workers=1"
-      - "curl -s -X PUT  http://localhost:8081/models/resnet-152?min_worker=2&synchronous=true"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
-      - "curl -s -X PUT  http://localhost:8081/models/squeezenet_v1.1?min_worker=2&synchronous=true"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: server_local_monitoring # should be added in modules section
-    ServerLocalClient: # keyword from metrics_monitoring_inproc.Monitor
-    - interval: 1s
-      logging : True
-      metrics:
-        - sum_workers_memory_rss
-        - sum_workers_file_descriptors
-        - total_workers
-        - orphans
+      #- curl -s -X POST "http://localhost:8081/models?url=${RESNET_152_BATCH_NAME}.mar&batch_size=8&max_batch_delay=60&initial_workers=1"
+      - "curl -s -X PUT  http://localhost:8081/models/${RESNET_152_BATCH_NAME}?min_worker=2&synchronous=true"
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X PUT  http://localhost:8081/models/${SQZNET_NAME}?min_worker=2&synchronous=true"
 
 reporting:
 - module: passfail
   criteria:
-    - subject: avg-rt  # required
-      label: 'Inference1'  # optional, default is ''
-      condition: '>'  # required
-      threshold: ${INFR1_RT}  # required
-      logic: for  # optional, logic to aggregate values within timeframe.
-      # Default 'for' means take latest,
-      # 'within' and 'over' means take sum/avg of all values within interval
-      timeframe: 1s  # optional, default is none
-      stop: true  # optional, default is true. false for nonstop testing until the end
-      fail: true  # optional, default is true
-    - subject: avg-rt  # required
-      label: 'Inference2'  # optional, default is ''
-      condition: '>'  # required
-      threshold: ${INFR2_RT} # required
-      logic: for  # optional, logic to aggregate values within timeframe.
-      # Default 'for' means take latest,
-      # 'within' and 'over' means take sum/avg of all values within interval
-      timeframe: 1s  # optional, default is none
-      stop: true  # optional, default is true. false for nonstop testing until the end
-      fail: true  # optional, default is true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_workers_memory_rss
-      condition: '>'
-      threshold: ${TOTAL_WORKERS_MEM}
-      timeframe: 1s
-      stop : true
-      fail : true
-      diff_percent : 30
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/orphans
-      condition: '>'
-      threshold: ${TOTAL_ORPHANS}
-      timeframe: 1s
-      stop : true
-      fail : true
-      diff_percent : 0
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_workers
-      condition: '>'
-      threshold: ${TOTAL_WORKERS}
-      timeframe: 1s
-      stop: true
-      fail: true
-      diff_percent: 0
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_workers_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_WORKERS_FDS}
-      timeframe: 1s
-      stop: true
-      fail: true
-      diff_percent: 30
+    # Inbuilt Criteria
+    - success of ManagementAPIDescription<${INF2_SUCC}
+    - avg-rt of ManagementAPIDescription>${INF2_AVG_RT}
\ No newline at end of file
diff --git a/tests/performance/tests/batch_and_single_inference/environments/xlarge.yaml b/tests/performance/tests/batch_and_single_inference/environments/xlarge.yaml
index 97307b690..e4ab23643 100644
--- a/tests/performance/tests/batch_and_single_inference/environments/xlarge.yaml
+++ b/tests/performance/tests/batch_and_single_inference/environments/xlarge.yaml
@@ -1,9 +1,55 @@
 ---
 settings:
   env:
-    INFR1_RT : 6s
-    INFR2_RT : 0.08s
-    TOTAL_WORKERS_MEM : 4000000000 #4GB
-    TOTAL_WORKERS : 9
+    API_LABEL : Inference1
+    API_SUCCESS : 80%
+    API_AVG_RT : 2.5s
+
+    INF2_SUCC: 80%
+    INF2_AVG_RT: 50ms
+
+    TOTAL_WORKERS: 6
+    TOTAL_WORKERS_MEM: 999686400
+    TOTAL_WORKERS_FDS: 60
+
+    TOTAL_MEM : 1292481024
+    TOTAL_PROCS : 7
+    TOTAL_FDS : 230
+
+    FRNTEND_MEM: 435241216
+
     TOTAL_ORPHANS : 0
-    TOTAL_WORKERS_FDS : 78
+    TOTAL_ZOMBIES : 0
+
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 45
+    TOTAL_WORKERS_FDS_RUN_DIFF: 30
+    TOTAL_MEM_RUN_DIFF: 45
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 45
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : batch_and_single_inference.jmx
+
+    STOP : ''
+    STOP_ALIAS: continue
+
+
diff --git a/tests/performance/tests/batch_inference/batch_inference.jmx b/tests/performance/tests/batch_inference/batch_inference.jmx
index 885fac295..111799a57 100644
--- a/tests/performance/tests/batch_inference/batch_inference.jmx
+++ b/tests/performance/tests/batch_inference/batch_inference.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Benchmarking Image Input Model Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Benchmarking Image Input Model Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">${__P(model_name,resnet-152)}</stringProp>
+            <stringProp name="Argument.value">${__P(RESNET_152_BATCH_NAME,resnet-152-batch)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
         </collectionProp>
diff --git a/tests/performance/tests/batch_inference/batch_inference.yaml b/tests/performance/tests/batch_inference/batch_inference.yaml
index 7c4485c06..ef8c4c700 100644
--- a/tests/performance/tests/batch_inference/batch_inference.yaml
+++ b/tests/performance/tests/batch_inference/batch_inference.yaml
@@ -1,84 +1,14 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 5s
-  hold-for: 20s
-  scenario: Inference
-
 scenarios:
-  Inference:
+  scenario_0:
     script: batch_inference.jmx
 
-modules:
-  server_local_monitoring:
-    # metrics_monitoring_inproc and dependencies should be in python path
-    class : metrics_monitoring_inproc.Monitor # monitoring class.
 
 services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/examples/resnet-152-batching/resnet-152.mar&batch_size=8&max_batch_delay=50"
+      - "curl -s -X POST http://localhost:8081/models?url=${RESNET_152_BATCH_URL}&batch_size=8&max_batch_delay=50"
       # uncomment below and comment prev and use downloaded model with model-store
-      #- "curl -s -X POST http://localhost:8081/models?url=resnet-152.mar&batch_size=8&max_batch_delay=60&initial_workers=1"
-      - "curl -s -X PUT  http://localhost:8081/models/resnet-152?min_worker=2&synchronous=true"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: server_local_monitoring # should be added in modules section
-    ServerLocalClient: # keyword from metrics_monitoring_inproc.Monitor
-    - interval: 1s
-      logging : True
-      metrics:
-        - sum_workers_memory_rss
-        - sum_workers_file_descriptors
-        - total_workers
-        - orphans
+      #- "curl -s -X POST http://localhost:8081/models?url=${RESNET_152_BATCH_NAME}.mar&batch_size=8&max_batch_delay=60&initial_workers=1"
+      - "curl -s -X PUT  http://localhost:8081/models/${RESNET_152_BATCH_NAME}?min_worker=2&synchronous=true"
 
-reporting:
-- module: passfail
-  criteria:
-    - subject: avg-rt  # required
-      label: 'Inference'  # optional, default is ''
-      condition: '>'  # required
-      threshold: ${INFR_RT}  # required
-      logic: for  # optional, logic to aggregate values within timeframe.
-      # Default 'for' means take latest,
-      # 'within' and 'over' means take sum/avg of all values within interval
-      timeframe: 1s  # optional, default is none
-      stop: true  # optional, default is true. false for nonstop testing until the end
-      fail: true  # optional, default is true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_workers_memory_rss
-      condition: '>'
-      threshold: ${TOTAL_WORKERS_MEM}
-      timeframe: 1s
-      stop : true
-      fail : true
-      diff_percent : 30
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/orphans
-      condition: '>'
-      threshold: ${TOTAL_ORPHANS}
-      timeframe: 1s
-      stop : true
-      fail : true
-      diff_percent : 0
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_workers
-      condition: '>'
-      threshold: ${TOTAL_WORKERS}
-      timeframe: 1s
-      stop: true
-      fail: true
-      diff_percent: 0
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_workers_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_WORKERS_FDS}
-      timeframe: 1s
-      stop: true
-      fail: true
-      diff_percent: 30
diff --git a/tests/performance/tests/batch_inference/environments/xlarge.yaml b/tests/performance/tests/batch_inference/environments/xlarge.yaml
index 23a443aaf..73b2d8379 100644
--- a/tests/performance/tests/batch_inference/environments/xlarge.yaml
+++ b/tests/performance/tests/batch_inference/environments/xlarge.yaml
@@ -1,8 +1,51 @@
 ---
 settings:
   env:
-    INFR_RT : 1.5s
-    TOTAL_WORKERS_MEM : 3000000000 #3GB
-    TOTAL_WORKERS : 4
+    API_LABEL : Inference
+    API_SUCCESS : 80%
+    API_AVG_RT : 2.5s
+
+    TOTAL_WORKERS: 3
+    TOTAL_WORKERS_MEM: 3000000000
+    TOTAL_WORKERS_FDS: 400
+
+    TOTAL_MEM : 4000000000
+    TOTAL_PROCS : 4
+    TOTAL_FDS : 200
+
+    FRNTEND_MEM: 1000000000
+
     TOTAL_ORPHANS : 0
-    TOTAL_WORKERS_FDS : 38
\ No newline at end of file
+    TOTAL_ZOMBIES : 0
+
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 50
+    TOTAL_WORKERS_FDS_RUN_DIFF: 30
+    TOTAL_MEM_RUN_DIFF: 45
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 80
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : batch_inference.jmx
+
+    STOP : ''
+    STOP_ALIAS: continue
+
+
diff --git a/tests/performance/tests/examples_apache_bench/environments/xlarge.yaml b/tests/performance/tests/examples_apache_bench/environments/xlarge.yaml
new file mode 100644
index 000000000..f06a0540b
--- /dev/null
+++ b/tests/performance/tests/examples_apache_bench/environments/xlarge.yaml
@@ -0,0 +1,48 @@
+
+---
+settings:
+  env:
+    API_LABEL : Inference
+    API_SUCCESS : 80%
+    API_AVG_RT : 140ms
+
+    TOTAL_WORKERS: 1
+    TOTAL_WORKERS_MEM: 300000000
+    TOTAL_WORKERS_FDS: 150
+
+    TOTAL_MEM : 1000000000
+    TOTAL_PROCS : 3
+    TOTAL_FDS : 150
+
+    FRNTEND_MEM: 600000000
+
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 30
+    TOTAL_WORKERS_FDS_RUN_DIFF: 30
+    TOTAL_MEM_RUN_DIFF: 60
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 90
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+
+    STOP :  ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/examples_apache_bench/examples_apache_bench.yaml b/tests/performance/tests/examples_apache_bench/examples_apache_bench.yaml
new file mode 100644
index 000000000..87af61a32
--- /dev/null
+++ b/tests/performance/tests/examples_apache_bench/examples_apache_bench.yaml
@@ -0,0 +1,43 @@
+~execution:
+  - executor: apache_bench
+    concurrency: ${CONCURRENCY}
+    ramp-up: ${RAMP-UP}
+    hold-for: ${HOLD-FOR}
+    scenario: scenario_0
+
+~scenarios:
+  scenario_0:
+    requests:
+    - url: http://127.0.0.1:8080/predictions/${SQZNET_NAME}
+      label: ${API_LABEL}
+      method: POST
+      file-path: ${INPUT_IMG_PATH}
+
+services:
+  - module: shellexec
+    prepare:
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X PUT  http://localhost:8081/models/${SQZNET_NAME}?min_worker=1&synchronous=true"
+
+
+reporting:
+- module: passfail
+  criteria:
+    # Inbuilt Criteria - cannot be used with Apache Benchmark
+    # - success of MyLabel<${INFR_SUCC}, stop as failed
+    # - avg-rt of MyLabel>${INFR_RT}, stop as failed
+    # Custom Criteria
+    - class: bzt.modules.monitoring.MonitoringCriteria
+      subject: ServerLocalClient/total_processes
+      condition: '>'
+      threshold: ${TOTAL_PROCS}
+      timeframe: 1s
+      stop : true
+      fail : true
+    - class: bzt.modules.monitoring.MonitoringCriteria
+      subject: ServerLocalClient/total_processes
+      condition: '<'
+      threshold: ${TOTAL_PROCS}
+      timeframe: 1s
+      stop : true
+      fail : true
\ No newline at end of file
diff --git a/tests/performance/tests/examples_local_criteria/environments/xlarge.yaml b/tests/performance/tests/examples_local_criteria/environments/xlarge.yaml
index 6c3835292..6cbbedc7d 100644
--- a/tests/performance/tests/examples_local_criteria/environments/xlarge.yaml
+++ b/tests/performance/tests/examples_local_criteria/environments/xlarge.yaml
@@ -4,5 +4,9 @@ settings:
     FAIL : 100%
     P90 : 290ms
     AVG_RT : 1s
-    TOTAL_WORKERS_MEM : 132000000
-    PERCENT_DIFF_TOTAL_WORKERS_MEM : 5
+    TOTAL_WORKERS_MEM : 135000000
+    TOTAL_WORKERS_MEM_DIFF : 5
+
+    STOP : false
+
+~compare_criteria:
diff --git a/tests/performance/tests/examples_local_criteria/examples_local_criteria.jmx b/tests/performance/tests/examples_local_criteria/examples_local_criteria.jmx
index 0d60d304c..618b88095 100644
--- a/tests/performance/tests/examples_local_criteria/examples_local_criteria.jmx
+++ b/tests/performance/tests/examples_local_criteria/examples_local_criteria.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Register and Inference Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Register and Inference Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="cnn_url" elementType="Argument">
             <stringProp name="Argument.name">cnn_url</stringProp>
-            <stringProp name="Argument.value">https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_URL,https://torchserve.s3.amazonaws.com/mar_files/squeezenet1_1.mar)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">The url from where to fetch noop model from</stringProp>
           </elementProp>
@@ -34,7 +34,7 @@
           </elementProp>
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
         </collectionProp>
diff --git a/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml b/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml
index 9d8b87907..72a539d26 100644
--- a/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml
+++ b/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml
@@ -1,30 +1,35 @@
 ---
-execution:
+~execution:
 - concurrency: 1
   ramp-up: 5s
   hold-for: 20s
   scenario: Inference
 
-scenarios:
+~scenarios:
   Inference:
     script: examples_local_criteria.jmx
 
 modules:
-  server_local_monitoring:
-    # metrics_monitoring_inproc and dependencies should be in python path
-    class : metrics_monitoring_inproc.Monitor # monitoring class.
+  server_monitoring:
+    # metrics_monitoring and dependencies should be in python path
+    class : metrics_monitoring.Monitor # monitoring class.
 
-services:
+~services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 10s"
+      - "curl -s -O ${INPUT_IMG_URL}"
+      - "mkdir -p /tmp/ts_model_store"
+      - "ps aux | grep '${SERVER_PROCESS_NAME}' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
+      - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
+      - "sleep 20s"
     post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: server_local_monitoring # should be added in modules section
-    ServerLocalClient: # keyword from metrics_monitoring_inproc.Monitor
+      - "${SERVER_STOP_CMD} > /dev/null 2>&1"
+      - "rm ${INPUT_IMG_PATH}"
+      - "rm -r /tmp/ts_model_store"
+      - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
+
+  - module: server_monitoring # should be added in modules section
+    ServerLocalClient: # keyword from metrics_monitoring.Monitor
     - interval: 1s
       logging : True
       metrics:
@@ -33,7 +38,14 @@ services:
         - mem
         - sum_workers_memory_rss
 
-reporting:
+~reporting:
+- module: passfail # this is to enable passfail module
+- module: junit-xml
+  data-source: pass-fail
+- module: junit-xml
+  data-source: sample-labels
+- module: final-stats
+  dump-csv : ${ARTIFACTS_DIR}/final_stats.csv
 - module: passfail
   criteria:
     - fail >${FAIL}, stop as failed
@@ -44,6 +56,9 @@ reporting:
       condition: '>'
       threshold: ${TOTAL_WORKERS_MEM}
       timeframe: 1s
-      stop : true
+      stop : ${STOP}
       fail : true
-      diff_percent : ${PERCENT_DIFF_TOTAL_WORKERS_MEM}
+      diff_percent_previous : ${TOTAL_WORKERS_MEM_DIFF}
+
+~compare_criteria:
+  -
\ No newline at end of file
diff --git a/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.jmx b/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.jmx
index 0d60d304c..618b88095 100644
--- a/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.jmx
+++ b/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Register and Inference Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Register and Inference Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="cnn_url" elementType="Argument">
             <stringProp name="Argument.name">cnn_url</stringProp>
-            <stringProp name="Argument.value">https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_URL,https://torchserve.s3.amazonaws.com/mar_files/squeezenet1_1.mar)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">The url from where to fetch noop model from</stringProp>
           </elementProp>
@@ -34,7 +34,7 @@
           </elementProp>
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
         </collectionProp>
diff --git a/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml b/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml
index d00226470..b48ea0d81 100644
--- a/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml
+++ b/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml
@@ -1,32 +1,44 @@
 ---
-execution:
+~execution:
 - concurrency: 1
   ramp-up: 5s
   hold-for: 20s
   scenario: Inference
-scenarios:
+
+~scenarios:
   Inference:
     script: examples_local_monitoring.jmx
 
 modules:
-  server_local_monitoring:
-    # metrics_monitoring_inproc and dependencies should be in python path
-    class : metrics_monitoring_inproc.Monitor # monitoring class.
+  server_monitoring:
+    # metrics_monitoring and dependencies should be in python path
+    class : metrics_monitoring.Monitor # monitoring class.
 
-services:
+~services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 10s"
+      - "curl -s -O ${INPUT_IMG_URL}"
+      - "mkdir -p /tmp/ts_model_store"
+      - "ps aux | grep '${SERVER_PROCESS_NAME}' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
+      - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
+      - "sleep 20s"
     post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: server_local_monitoring # should be added in modules section
-    ServerLocalClient: # keyword from metrics_monitoring_inproc.Monitor
+      - "${SERVER_STOP_CMD} > /dev/null 2>&1"
+      - "rm ${INPUT_IMG_PATH}"
+      - "rm -r /tmp/ts_model_store"
+      - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
+
+  - module: server_monitoring # should be added in modules section
+    ServerLocalClient: # keyword from metrics_monitoring.Monitor
       - interval: 1s
         metrics:
           - cpu
           - disk-space
           - mem
-          - sum_workers_memory_percent
\ No newline at end of file
+          - sum_workers_memory_percent
+
+~reporting:
+  - module: passfail
+
+~compare_criteria:
+  -
diff --git a/tests/performance/tests/examples_remote_criteria/environments/xlarge.yaml b/tests/performance/tests/examples_remote_criteria/environments/xlarge.yaml
index 674a6c1ff..6ad41860d 100644
--- a/tests/performance/tests/examples_remote_criteria/environments/xlarge.yaml
+++ b/tests/performance/tests/examples_remote_criteria/environments/xlarge.yaml
@@ -5,3 +5,6 @@ settings:
     P90 : 250ms
     AVG_RT : 1s
     TOTAL_WORKERS_FDS : 80
+    TOTAL_WORKERS_FDS_DIFF : 35
+
+~compare_criteria:
\ No newline at end of file
diff --git a/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.jmx b/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.jmx
index 0d60d304c..618b88095 100644
--- a/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.jmx
+++ b/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Register and Inference Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Register and Inference Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="cnn_url" elementType="Argument">
             <stringProp name="Argument.name">cnn_url</stringProp>
-            <stringProp name="Argument.value">https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_URL,https://torchserve.s3.amazonaws.com/mar_files/squeezenet1_1.mar)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">The url from where to fetch noop model from</stringProp>
           </elementProp>
@@ -34,7 +34,7 @@
           </elementProp>
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
         </collectionProp>
diff --git a/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml b/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml
index 0c3c206d1..487c825ae 100644
--- a/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml
+++ b/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml
@@ -1,47 +1,60 @@
 
-execution:
+~execution:
 - concurrency: 4
   ramp-up: 1s
   hold-for: 20s
   scenario: Inference
 
-scenarios:
+~scenarios:
   Inference:
     script: examples_remote_criteria.jmx
 
-services:
+~services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 10s"
+      - "curl -s -O ${INPUT_IMG_URL}"
+      - "mkdir -p /tmp/ts_model_store"
+      - "ps aux | grep '${SERVER_PROCESS_NAME}' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
+      - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
+      - "sleep 20s"
     post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: monitoring
-    server-agent:
+      - "${SERVER_STOP_CMD} > /dev/null 2>&1"
+      - "rm ${INPUT_IMG_PATH}"
+      - "rm -r /tmp/ts_model_store"
+      - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
+
+  - module: server_monitoring
+    ServerRemoteClient:
       - address: localhost:9009 # metric monitoring service address
-        label: mms-inference-server  # if you specify label, it will be used in reports instead of ip:port
+        label: model-server  # if you specify label, it will be used in reports instead of ip:port
         interval: 1s    # polling interval
         logging: True # those logs will be saved to "SAlogs_192.168.0.1_9009.csv" in the artifacts dir
         metrics: # metrics should be supported by monitoring service
-          - sum_workers_cpu_percent # cpu percent used by all the mms server processes and workers
+          - sum_workers_cpu_percent # cpu percent used by all the Model Server server processes and workers
           - sum_workers_memory_percent
           - sum_workers_file_descriptors
-          - total_workers # no of mms workers
+          - total_workers # no of Model Server workers
 
 
-reporting:
+~reporting:
+- module: passfail # this is to enable passfail module
+- module: junit-xml
+  data-source: pass-fail
+- module: junit-xml
+  data-source: sample-labels
+- module: final-stats
+  dump-csv : ${ARTIFACTS_DIR}/final_stats.csv
 - module: passfail
   criteria:
   - fail >${FAIL}, stop as failed
   - p90  >${P90}  , stop as failed
   - avg-rt >${AVG_RT} , stop as failed
   - class: bzt.modules.monitoring.MonitoringCriteria
-    subject: mms-inference-server/sum_workers_file_descriptors
+    subject: model-server/sum_workers_file_descriptors
     condition: '>'
     threshold: ${TOTAL_WORKERS_FDS}
     timeframe: 1s
     fail: true
     stop: true
-    diff_percent : 35
\ No newline at end of file
+    diff_percent_previous : ${TOTAL_WORKERS_FDS_DIFF}
+
diff --git a/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.jmx b/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.jmx
index 0d60d304c..618b88095 100644
--- a/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.jmx
+++ b/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Register and Inference Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Register and Inference Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="cnn_url" elementType="Argument">
             <stringProp name="Argument.name">cnn_url</stringProp>
-            <stringProp name="Argument.value">https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_URL,https://torchserve.s3.amazonaws.com/mar_files/squeezenet1_1.mar)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">The url from where to fetch noop model from</stringProp>
           </elementProp>
@@ -34,7 +34,7 @@
           </elementProp>
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
         </collectionProp>
diff --git a/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml b/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml
index 235c3b803..bf531d571 100644
--- a/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml
+++ b/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml
@@ -1,34 +1,43 @@
 
-execution:
+~execution:
 - concurrency: 4
   ramp-up: 1s
   hold-for: 20s
   scenario: Inference
 
-scenarios:
+~scenarios:
   Inference:
     script: examples_remote_monitoring.jmx
 
 
 
-services:
+~services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 10s"
+      - "curl -s -O ${INPUT_IMG_URL}"
+      - "mkdir -p /tmp/ts_model_store"
+      - "ps aux | grep '${SERVER_PROCESS_NAME}' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
+      - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
+      - "sleep 20s"
     post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: monitoring
-    server-agent:
+      - "${SERVER_STOP_CMD} > /dev/null 2>&1"
+      - "rm ${INPUT_IMG_PATH}"
+      - "rm -r /tmp/ts_model_store"
+      - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
+
+  - module: server_monitoring
+    ServerRemoteClient:
       - address: localhost:9009 # metric monitoring service address
-        label: mms-inference-server  # if you specify label, it will be used in reports instead of ip:port
+        label: model-server  # if you specify label, it will be used in reports instead of ip:port
         interval: 1s    # polling interval
         logging: True # those logs will be saved to "SAlogs_192.168.0.1_9009.csv" in the artifacts dir
         metrics: # metrics should be supported by monitoring service
-          - sum_all_cpu_percent # cpu percent used by all the mms server processes and workers
+          - sum_all_cpu_percent # cpu percent used by all the Model server processes and workers
           - sum_workers_memory_percent
           - frontend_file_descriptors
-          - total_workers # no of mms workers
+          - total_workers # no of Model Server workers
+
+~reporting:
+  - module: passfail
 
+~compare_criteria:
diff --git a/tests/performance/tests/examples_starter/examples_starter.jmx b/tests/performance/tests/examples_starter/examples_starter.jmx
index 0d60d304c..618b88095 100644
--- a/tests/performance/tests/examples_starter/examples_starter.jmx
+++ b/tests/performance/tests/examples_starter/examples_starter.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Register and Inference Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Register and Inference Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="cnn_url" elementType="Argument">
             <stringProp name="Argument.name">cnn_url</stringProp>
-            <stringProp name="Argument.value">https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_URL,https://torchserve.s3.amazonaws.com/mar_files/squeezenet1_1.mar)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">The url from where to fetch noop model from</stringProp>
           </elementProp>
@@ -34,7 +34,7 @@
           </elementProp>
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
         </collectionProp>
diff --git a/tests/performance/tests/examples_starter/examples_starter.yaml b/tests/performance/tests/examples_starter/examples_starter.yaml
index ac6aaa50b..a94ccc5c8 100644
--- a/tests/performance/tests/examples_starter/examples_starter.yaml
+++ b/tests/performance/tests/examples_starter/examples_starter.yaml
@@ -1,20 +1,30 @@
 ---
-execution:
-- concurrency: 1
-  ramp-up: 1s
-  hold-for: 40s
-  scenario: Inference
-scenarios:
+~execution:
+  - concurrency: 1
+    ramp-up: 1s
+    hold-for: 40s
+    scenario: Inference
+
+~scenarios:
   Inference:
     script: examples_starter.jmx
 
-services:
+~services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 10s"
+      - "curl -s -O ${INPUT_IMG_URL}"
+      - "mkdir -p /tmp/ts_model_store"
+      - "ps aux | grep '${SERVER_PROCESS_NAME}' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
+      - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
+      - "sleep 20s"
     post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
+      - "${SERVER_STOP_CMD} > /dev/null 2>&1"
+      - "rm ${INPUT_IMG_PATH}"
+      - "rm -r /tmp/ts_model_store"
+      - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
+
+~reporting:
+  - module: passfail
 
+~compare_criteria:
+  -
diff --git a/tests/performance/tests/global_config.yaml b/tests/performance/tests/global_config.yaml
index 94731f4a3..51de8a03d 100644
--- a/tests/performance/tests/global_config.yaml
+++ b/tests/performance/tests/global_config.yaml
@@ -1,17 +1,69 @@
+---
+execution:
+- concurrency: ${CONCURRENCY}
+  ramp-up: ${RAMP-UP}
+  hold-for: ${HOLD-FOR}
+  scenario: scenario_0
+
+scenarios:
+  scenario_0:
+    script: ${SCRIPT}
+
 modules:
   jmeter:
     # These are JMeter test case properties. These variables are used in jmx files.
     # Change the vaues as per your setup
     properties:
-      hostname : 127.0.0.1 # MMS properties
-      port : 8080
-      management_port : 8081
-      protocol : http
-      input_filepath : kitten.jpg # make sure jpg is available at this path
+      hostname: 127.0.0.1 # Model Server properties
+      port: 8080
+      management_port: 8081
+      protocol: http
+      input_filepath: kitten.jpg # make sure jpg is available at this path
       # if relative path is provided this will be relative to current working directory
 
-# DO-NOT change properties below unless you know what you are doing.
-# They are needed for performance test suite runner script.
+      RESNET_152_BATCH_URL: ${RESNET_152_BATCH_URL}
+      RESNET_152_BATCH_NAME: ${RESNET_152_BATCH_NAME}
+      SQZNET_URL: ${SQZNET_URL}
+      SQZNET_NAME: ${SQZNET_NAME}
+      RESNET_URL: ${RESNET_URL}
+      RESNET_NAME: ${RESNET_NAME}
+
+  server_monitoring:
+    # metrics_monitoring and dependencies should be in python path
+    class : metrics_monitoring.Monitor # monitoring class.
+
+  apache_bench:
+    class: apache_bench.ApacheBenchmarkExecutor
+
+services:
+  - module: shellexec
+    prepare:
+      - "curl -s -O ${INPUT_IMG_URL}"
+      - "mkdir -p /tmp/ts_model_store"
+      - "ps aux | grep '${SERVER_PROCESS_NAME}' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
+      - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
+      - "sleep 20s"
+    post-process:
+      - "${SERVER_STOP_CMD} > /dev/null 2>&1"
+      - "rm ${INPUT_IMG_PATH}"
+      - "rm -r /tmp/ts_model_store"
+      - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
+
+  - module: server_monitoring # should be added in modules section
+    ServerLocalClient: # keyword from metrics_monitoring.Monitor
+    - interval: 1s
+      logging : True
+      metrics:
+        - sum_workers_memory_rss
+        - sum_workers_file_descriptors
+        - total_workers
+        - orphans
+        - zombies
+        - frontend_memory_rss
+        - sum_all_memory_rss
+        - total_processes
+        - sum_all_file_descriptors
+
 reporting:
 - module: passfail # this is to enable passfail module
 - module: junit-xml
@@ -19,8 +71,130 @@ reporting:
 - module: junit-xml
   data-source: sample-labels
 - module: final-stats
-  dump-csv : ${BASEDIR}/final_stats.csv
+  dump-csv : ${ARTIFACTS_DIR}/final_stats.csv
+
+- module: passfail
+  criteria:
+    # API requests KPI crieteria
+    - success of ${API_LABEL}<${API_SUCCESS}
+    - avg-rt of ${API_LABEL}>${API_AVG_RT}
+#
+#    # Monitoring metrics criteria
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/total_workers
+#      condition: '>'
+#      threshold: ${TOTAL_WORKERS}
+#      timeframe: 5s
+#      stop: ${STOP}
+#      fail: true
+#      diff_percent_previous: ${TOTAL_WORKERS_DIFF}
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/sum_workers_memory_rss
+#      condition: '>'
+#      threshold: ${TOTAL_WORKERS_MEM}
+#      timeframe: 5s
+#      stop : ${STOP}
+#      fail : true
+#      diff_percent_previous : ${TOTAL_WORKERS_MEM_DIFF}
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/sum_workers_file_descriptors
+#      condition: '>'
+#      threshold: ${TOTAL_WORKERS_FDS}
+#      timeframe: 5s
+#      stop: ${STOP}
+#      fail: true
+#      diff_percent_previous: ${TOTAL_WORKERS_FDS_DIFF}
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/sum_all_memory_rss
+#      condition: '>'
+#      threshold: ${TOTAL_MEM}
+#      timeframe: 5s
+#      stop : ${STOP}
+#      fail : true
+#      diff_percent_previous: ${TOTAL_MEM_DIFF}
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/total_processes
+#      condition: '>'
+#      threshold: ${TOTAL_PROCS}
+#      timeframe: 5s
+#      stop: ${STOP}
+#      fail: true
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/sum_all_file_descriptors
+#      condition: '>'
+#      threshold: ${TOTAL_FDS}
+#      timeframe: 1s
+#      stop: ${STOP}
+#      fail: true
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/frontend_memory_rss
+#      condition: '>'
+#      threshold: ${FRNTEND_MEM}
+#      timeframe: 5s
+#      stop: ${STOP}
+#      fail: true
+#      diff_percent_previous: ${FRNTEND_MEM_DIFF}
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/orphans
+#      condition: '>'
+#      threshold: ${TOTAL_ORPHANS}
+#      timeframe: 5s
+#      stop: ${STOP}
+#      fail: true
+#      diff_percent_previous: ${TOTAL_ORPHANS_DIFF}
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/zombies
+#      condition: '>'
+#      threshold: ${TOTAL_ZOMBIES}
+#      timeframe: 5s
+#      stop: ${STOP}
+#      fail: true
+#      diff_percent_previous: ${TOTAL_ZOMBIES_DIFF}
+
+compare_criteria:
+  # Monitoring metrics criteria
+  - subject: ServerLocalClient/total_workers
+    diff_percent_previous: ${TOTAL_WORKERS_PREV_DIFF}
+    diff_percent_run: ${TOTAL_WORKERS_RUN_DIFF}
+  - subject: ServerLocalClient/sum_workers_memory_rss
+    diff_percent_previous: ${TOTAL_WORKERS_MEM_PREV_DIFF}
+    diff_percent_run : ${TOTAL_WORKERS_MEM_RUN_DIFF}
+  - subject: ServerLocalClient/sum_workers_file_descriptors
+    diff_percent_previous: ${TOTAL_WORKERS_FDS_PREV_DIFF}
+    diff_percent_run: ${TOTAL_WORKERS_FDS_RUN_DIFF}
+#  - subject: ServerLocalClient/sum_all_memory_rss
+#    diff_percent_previous: ${TOTAL_MEM_PREV_DIFF}
+#    diff_percent_run: ${TOTAL_MEM_RUN_DIFF}
+  - subject: ServerLocalClient/total_processes
+    diff_percent_previous: ${TOTAL_PROCS_PREV_DIFF}
+    diff_percent_run: ${TOTAL_PROCS_RUN_DIFF}
+  - subject: ServerLocalClient/sum_all_file_descriptors
+    diff_percent_previous : ${TOTAL_FDS_PREV_DIFF}
+    diff_percent_run : ${TOTAL_FDS_RUN_DIFF}
+#  - subject: ServerLocalClient/frontend_memory_rss
+#    diff_percent_previous: ${FRNTEND_MEM_PREV_DIFF}
+#    diff_percent_run: ${FRNTEND_MEM_RUN_DIFF}
+  - subject: ServerLocalClient/orphans
+    diff_percent_previous: ${TOTAL_ORPHANS_PREV_DIFF}
+    diff_percent_run: ${TOTAL_ORPHANS_RUN_DIFF}
+  - subject: ServerLocalClient/zombies
+    diff_percent_previous: ${TOTAL_ZOMBIES_PREV_DIFF}
+    diff_percent_run: ${TOTAL_ZOMBIES_RUN_DIFF}
+
 
 settings:
   env:
-    BASEDIR : '.'
+    ARTIFACTS_DIR : '.'
+    SERVER_START_CMD : "multi-model-server --start "
+    SERVER_STOP_CMD : "multi-model-server --stop "
+    SERVER_PROCESS_NAME : "[c]om.amazonaws.ml.mms.ModelServer"
+    INPUT_IMG_URL: "https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
+    INPUT_IMG_PATH: "kitten.jpg"
+
+    RESNET_152_BATCH_URL : "https://s3.amazonaws.com/model-server/model_archive_1.0/examples/resnet-152-batching/resnet-152.mar"
+    RESNET_152_BATCH_NAME : "resnet-152"
+    SQZNET_URL : "https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
+    SQZNET_NAME : "squeezenet_v1.1"
+    RESNET_URL : "https://s3.amazonaws.com/model-server/model_archive_1.0/resnet-18.mar"
+    RESNET_NAME : "resnet-18"
+
diff --git a/tests/performance/tests/health_check/environments/xlarge.yaml b/tests/performance/tests/health_check/environments/xlarge.yaml
index 689a5d66b..2a8e2332d 100644
--- a/tests/performance/tests/health_check/environments/xlarge.yaml
+++ b/tests/performance/tests/health_check/environments/xlarge.yaml
@@ -1,8 +1,49 @@
+
 ---
 settings:
   env:
-    HLTH_CHK_SUCC : 100%
-    HLTH_CHK_RT : 14ms
+    API_LABEL : HealthCheck
+    API_SUCCESS : 80%
+    API_AVG_RT : 30ms
+
+    TOTAL_WORKERS: 0
+    TOTAL_WORKERS_MEM: 0
+    TOTAL_WORKERS_FDS: 0
+
+    TOTAL_MEM : 1500098304
     TOTAL_PROCS : 1
-    TOTAL_FDS : 67
-    TOTAL_MEM : 750000000 #750MB
\ No newline at end of file
+    TOTAL_FDS : 73
+
+    FRNTEND_MEM: 1500098304
+
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 0
+    TOTAL_WORKERS_FDS_RUN_DIFF: 0
+    TOTAL_MEM_RUN_DIFF: 200
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 200
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : health_check.jmx
+
+    STOP :  ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/health_check/health_check.jmx b/tests/performance/tests/health_check/health_check.jmx
index 422c45cf9..dc699a6be 100644
--- a/tests/performance/tests/health_check/health_check.jmx
+++ b/tests/performance/tests/health_check/health_check.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Health Check Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Health Check Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
diff --git a/tests/performance/tests/health_check/health_check.yaml b/tests/performance/tests/health_check/health_check.yaml
index 2c8785e3c..3f2d38636 100644
--- a/tests/performance/tests/health_check/health_check.yaml
+++ b/tests/performance/tests/health_check/health_check.yaml
@@ -1,66 +1,18 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 30s
-  scenario: health_check
-
-scenarios:
-  health_check:
-    script: health_check.jmx
-
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
-
-services:
-  - module: shellexec
-    prepare:
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 10s"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - sum_all_memory_rss
-
 reporting:
 - module: passfail
   criteria:
-    # Inbuilt Criteria
-    - success of HealthCheck<${HLTH_CHK_SUCC}, stop as failed
-    - avg-rt of HealthCheck>${HLTH_CHK_RT}, stop as failed
     # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '<'
       threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
       timeframe: 5s
-      stop : true
+      stop : ${STOP}
       fail : true
-#    - class: bzt.modules.monitoring.MonitoringCriteria
-#      subject: ServerLocalClient/sum_all_memory_rss
-#      condition: '>'
-#      threshold: ${TOTAL_MEM}
-#      timeframe: 5s
-#      stop : true
-#      fail : true
\ No newline at end of file
+
+scenarios:
+  ~scenario_0:
+    script: health_check.jmx
+
+
diff --git a/tests/performance/tests/inference_multiple_models/environments/xlarge.yaml b/tests/performance/tests/inference_multiple_models/environments/xlarge.yaml
index 36b7dc0ad..38e33a727 100644
--- a/tests/performance/tests/inference_multiple_models/environments/xlarge.yaml
+++ b/tests/performance/tests/inference_multiple_models/environments/xlarge.yaml
@@ -1,10 +1,51 @@
+
 ---
 settings:
   env:
-    INFR1_SUCC : 100%
+    API_LABEL : Inference1
+    API_SUCCESS : 80%
+    API_AVG_RT : 200ms
+
     INFR2_SUCC: 100%
-    INFR1_RT : 290ms
-    INFR2_RT: 450ms
+    INFR2_RT: 550ms
+
+    TOTAL_WORKERS: 4
+    TOTAL_WORKERS_MEM: 600000000
+    TOTAL_WORKERS_FDS: 150
+
+    TOTAL_MEM : 1400000000
     TOTAL_PROCS : 5
-    TOTAL_FDS : 107
-    TOTAL_MEM : 600000000 #600MB
\ No newline at end of file
+    TOTAL_FDS : 150
+
+    FRNTEND_MEM: 800000000
+
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+    TOTAL_WORKERS_PREV_DIFF: 30
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 30
+    TOTAL_WORKERS_MEM_RUN_DIFF: 30
+    TOTAL_WORKERS_FDS_RUN_DIFF: 30
+    TOTAL_MEM_RUN_DIFF: 30
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 40
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : inference_multiple_models.jmx
+
+    STOP :  ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/inference_multiple_models/inference_multiple_models.jmx b/tests/performance/tests/inference_multiple_models/inference_multiple_models.jmx
index 1ceeaf2c2..67a5d689a 100644
--- a/tests/performance/tests/inference_multiple_models/inference_multiple_models.jmx
+++ b/tests/performance/tests/inference_multiple_models/inference_multiple_models.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Inference with Single Worker Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Inference with Single Worker Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,13 +16,13 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model1" elementType="Argument">
             <stringProp name="Argument.name">model1</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model1 Name</stringProp>
           </elementProp>
           <elementProp name="model2" elementType="Argument">
             <stringProp name="Argument.name">model2</stringProp>
-            <stringProp name="Argument.value">resnet-18</stringProp>
+            <stringProp name="Argument.value">${__P(RESNET_NAME,resnet-18)}</stringProp>
             <stringProp name="Argument.desc">Model2 Name</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
diff --git a/tests/performance/tests/inference_multiple_models/inference_multiple_models.yaml b/tests/performance/tests/inference_multiple_models/inference_multiple_models.yaml
index 3244c4d8f..87da67e2c 100644
--- a/tests/performance/tests/inference_multiple_models/inference_multiple_models.yaml
+++ b/tests/performance/tests/inference_multiple_models/inference_multiple_models.yaml
@@ -1,74 +1,27 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 30s
-  scenario: inference_multiple_models
-
 scenarios:
-  inference_multiple_models:
+  scenario_0:
     script: inference_multiple_models.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
 
 services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/resnet-18.mar"
-      - "curl -s -X PUT  http://localhost:8081/models/squeezenet_v1.1?min_worker=1&synchronous=true"
-      - "curl -s -X PUT  http://localhost:8081/models/resnet-18?min_worker=1&synchronous=true"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - sum_all_memory_rss
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X POST http://localhost:8081/models?url=${RESNET_URL}"
+      - "curl -s -X PUT  http://localhost:8081/models/${SQZNET_NAME}?min_worker=1&synchronous=true"
+      - "curl -s -X PUT  http://localhost:8081/models/${RESNET_NAME}?min_worker=1&synchronous=true"
 
 reporting:
 - module: passfail
   criteria:
     # Inbuilt Criteria
-    - success of Inference1<${INFR1_SUCC}, stop as failed
-    - success of Inference2<${INFR2_SUCC}, stop as failed
-    - avg-rt of Inference1>${INFR1_RT}, stop as failed
-    - avg-rt of Inference2>${INFR2_RT}, stop as failed
-    # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
+    - success of Inference2<${INFR2_SUCC}
+    - avg-rt of Inference2>${INFR2_RT}
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '<'
       threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
       timeframe: 5s
-      stop : true
+      stop : ${STOP}
       fail : true
-#    - class: bzt.modules.monitoring.MonitoringCriteria
-#      subject: ServerLocalClient/sum_all_memory_rss
-#      condition: '>'
-#      threshold: ${TOTAL_MEM}
-#      timeframe: 5s
-#      stop : true
-#      fail : true
diff --git a/tests/performance/tests/inference_multiple_worker/environments/xlarge.yaml b/tests/performance/tests/inference_multiple_worker/environments/xlarge.yaml
index 5b9fd6c0a..2fa0f5b6d 100644
--- a/tests/performance/tests/inference_multiple_worker/environments/xlarge.yaml
+++ b/tests/performance/tests/inference_multiple_worker/environments/xlarge.yaml
@@ -1,8 +1,49 @@
+
 ---
 settings:
   env:
-    INFR_SUCC : 100%
-    INFR_RT : 140ms
+    API_LABEL : Inference
+    API_SUCCESS : 80%
+    API_AVG_RT : 140ms
+
+    TOTAL_WORKERS: 5
+    TOTAL_WORKERS_MEM: 600000000
+    TOTAL_WORKERS_FDS: 40
+
+    TOTAL_MEM : 1400000000
     TOTAL_PROCS : 6
-    TOTAL_FDS : 126
-    TOTAL_MEM : 750000000 #750MB
\ No newline at end of file
+    TOTAL_FDS : 150
+
+    FRNTEND_MEM: 800000000
+
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 30
+    TOTAL_WORKERS_FDS_RUN_DIFF: 30
+    TOTAL_MEM_RUN_DIFF: 35
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 60
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : inference_multiple_worker.jmx
+
+    STOP :  ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/inference_multiple_worker/inference_multiple_worker.jmx b/tests/performance/tests/inference_multiple_worker/inference_multiple_worker.jmx
index 1251a56b8..5d0816ae2 100644
--- a/tests/performance/tests/inference_multiple_worker/inference_multiple_worker.jmx
+++ b/tests/performance/tests/inference_multiple_worker/inference_multiple_worker.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Inference with Multiple Workers Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Inference with Multiple Workers Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model Name</stringProp>
           </elementProp>
diff --git a/tests/performance/tests/inference_multiple_worker/inference_multiple_worker.yaml b/tests/performance/tests/inference_multiple_worker/inference_multiple_worker.yaml
index 5d73624a6..5f98bbd2c 100644
--- a/tests/performance/tests/inference_multiple_worker/inference_multiple_worker.yaml
+++ b/tests/performance/tests/inference_multiple_worker/inference_multiple_worker.yaml
@@ -1,71 +1,22 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 1m
-  iterations: 100
-  scenario: inference_multiple_worker
-
 scenarios:
   inference_multiple_worker:
     script: inference_multiple_worker.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
-
 services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
-      - "curl -s -X PUT  http://localhost:8081/models/squeezenet_v1.1?min_worker=4&synchronous=true"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - sum_all_memory_rss
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X PUT  http://localhost:8081/models/${SQZNET_NAME}?min_worker=4&synchronous=true"
 
 reporting:
 - module: passfail
   criteria:
-    # Inbuilt Criteria
-    - success of Inference<${INFR_SUCC}, stop as failed
-    - avg-rt of Inference>${INFR_RT}, stop as failed
     # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '<'
       threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
-      timeframe: 1s
-      stop : true
+      timeframe: 5s
+      stop : ${STOP}
       fail : true
-#    - class: bzt.modules.monitoring.MonitoringCriteria
-#      subject: ServerLocalClient/sum_all_memory_rss
-#      condition: '>'
-#      threshold: ${TOTAL_MEM}
-#      timeframe: 5s
-#      stop : true
-#      fail : true
\ No newline at end of file
diff --git a/tests/performance/tests/inference_single_worker/environments/xlarge.yaml b/tests/performance/tests/inference_single_worker/environments/xlarge.yaml
index c945e1f91..1dbeafb0f 100644
--- a/tests/performance/tests/inference_single_worker/environments/xlarge.yaml
+++ b/tests/performance/tests/inference_single_worker/environments/xlarge.yaml
@@ -1,8 +1,49 @@
+
 ---
 settings:
   env:
-    INFR_SUCC : 100%
-    INFR_RT : 290ms
+    API_LABEL : Inference
+    API_SUCCESS : 80%
+    API_AVG_RT : 330ms
+
+    TOTAL_WORKERS: 2
+    TOTAL_WORKERS_MEM: 300000000
+    TOTAL_WORKERS_FDS: 150
+
+    TOTAL_MEM : 1000000000
     TOTAL_PROCS : 3
-    TOTAL_FDS : 90
-    TOTAL_MEM : 290000000 #290MB
\ No newline at end of file
+    TOTAL_FDS : 150
+
+    FRNTEND_MEM: 600000000
+
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 30
+    TOTAL_WORKERS_FDS_RUN_DIFF: 30
+    TOTAL_MEM_RUN_DIFF: 60
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 90
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : inference_single_worker.jmx
+
+    STOP :  ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/inference_single_worker/inference_single_worker.jmx b/tests/performance/tests/inference_single_worker/inference_single_worker.jmx
index ea05cc1ef..5124dbc6e 100644
--- a/tests/performance/tests/inference_single_worker/inference_single_worker.jmx
+++ b/tests/performance/tests/inference_single_worker/inference_single_worker.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Inference with Single Worker Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Inference with Single Worker Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model Name</stringProp>
           </elementProp>
diff --git a/tests/performance/tests/inference_single_worker/inference_single_worker.yaml b/tests/performance/tests/inference_single_worker/inference_single_worker.yaml
index ece9e5b74..fdba969e0 100644
--- a/tests/performance/tests/inference_single_worker/inference_single_worker.yaml
+++ b/tests/performance/tests/inference_single_worker/inference_single_worker.yaml
@@ -1,71 +1,21 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 1m
-  iterations: 100
-  scenario: inference_single_worker
-
 scenarios:
-  inference_single_worker:
+  scenario_0:
     script: inference_single_worker.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
-
 services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
-      - "curl -s -X PUT  http://localhost:8081/models/squeezenet_v1.1?min_worker=1&synchronous=true"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - sum_all_memory_rss
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X PUT  http://localhost:8081/models/${SQZNET_NAME}?min_worker=1&synchronous=true"
 
 reporting:
 - module: passfail
   criteria:
-    # Inbuilt Criteria
-    - success of Inference<${INFR_SUCC}, stop as failed
-    - avg-rt of Inference>${INFR_RT}, stop as failed
-    # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '<'
       threshold: ${TOTAL_PROCS}
       timeframe: 1s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
-      timeframe: 1s
-      stop : true
+      stop : ${STOP}
       fail : true
-#    - class: bzt.modules.monitoring.MonitoringCriteria
-#      subject: ServerLocalClient/sum_all_memory_rss
-#      condition: '>'
-#      threshold: ${TOTAL_MEM}
-#      timeframe: 5s
-#      stop : true
-#      fail : true
diff --git a/tests/performance/tests/list_models/environments/xlarge.yaml b/tests/performance/tests/list_models/environments/xlarge.yaml
index 611624824..bed934bf9 100644
--- a/tests/performance/tests/list_models/environments/xlarge.yaml
+++ b/tests/performance/tests/list_models/environments/xlarge.yaml
@@ -1,8 +1,48 @@
 ---
 settings:
   env:
-    LST_MODLS_SUCC : 100%
-    LST_MODLS_RT : 14ms
+    API_LABEL : ListModels
+    API_SUCCESS : 80%
+    API_AVG_RT : 14ms
+
+    TOTAL_WORKERS: 2
+    TOTAL_WORKERS_MEM: 600000000
+    TOTAL_WORKERS_FDS: 40
+
+    TOTAL_MEM : 1400000000
     TOTAL_PROCS : 3
-    TOTAL_FDS : 86
-    TOTAL_MEM : 185000000 #185MB
\ No newline at end of file
+    TOTAL_FDS : 150
+
+    FRNTEND_MEM: 800000000
+
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 30
+    TOTAL_WORKERS_FDS_RUN_DIFF: 30
+    TOTAL_MEM_RUN_DIFF: 30
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 30
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : list_models.jmx
+
+    STOP :  ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/list_models/list_models.jmx b/tests/performance/tests/list_models/list_models.jmx
index cd5490dc4..0323fcee8 100644
--- a/tests/performance/tests/list_models/list_models.jmx
+++ b/tests/performance/tests/list_models/list_models.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS List Models Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server List Models Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model name</stringProp>
           </elementProp>
diff --git a/tests/performance/tests/list_models/list_models.yaml b/tests/performance/tests/list_models/list_models.yaml
index 81dd8ada7..60d29551c 100644
--- a/tests/performance/tests/list_models/list_models.yaml
+++ b/tests/performance/tests/list_models/list_models.yaml
@@ -1,68 +1,21 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 30s
-  scenario: list_models
-
 scenarios:
-  list_models:
+  scenario_0:
     script: list_models.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
-
 services:
   - module: shellexec
     prepare:
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/shufflenet.mar"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - sum_all_memory_rss
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X POST http://localhost:8081/models?url=${RESNET_URL}"
 
 reporting:
 - module: passfail
   criteria:
-    # Inbuilt Criteria
-    - success of ListModels<${LST_MODLS_SUCC}, stop as failed
-    - avg-rt of ListModels>${LST_MODLS_RT}, stop as failed
-    # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '<'
       threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
-      timeframe: 1s
-      stop : true
+      timeframe: 5s
+      stop : {STOP}
       fail : true
-#    - class: bzt.modules.monitoring.MonitoringCriteria
-#      subject: ServerLocalClient/sum_all_memory_rss
-#      condition: '>'
-#      threshold: ${TOTAL_MEM}
-#      timeframe: 5s
-#      stop : true
-#      fail : true
\ No newline at end of file
diff --git a/tests/performance/tests/model_description/environments/xlarge.yaml b/tests/performance/tests/model_description/environments/xlarge.yaml
index 00e0aac87..370ba7324 100644
--- a/tests/performance/tests/model_description/environments/xlarge.yaml
+++ b/tests/performance/tests/model_description/environments/xlarge.yaml
@@ -1,8 +1,49 @@
+
 ---
 settings:
   env:
-    MODL_DESC_SUCC : 100%
-    MODL_DESC_RT : 14ms
+    API_LABEL : ModelDescription
+    API_SUCCESS : 80%
+    API_AVG_RT : 14ms
+
+    TOTAL_WORKERS: 2
+    TOTAL_WORKERS_MEM: 150205952
+    TOTAL_WORKERS_FDS: 40
+
+    TOTAL_MEM : 1400000000
     TOTAL_PROCS : 3
-    TOTAL_FDS : 90
-    TOTAL_MEM : 300000000 #300MB
\ No newline at end of file
+    TOTAL_FDS : 150
+
+    FRNTEND_MEM: 800000000
+
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 50
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 30
+    TOTAL_WORKERS_FDS_RUN_DIFF: 30
+    TOTAL_MEM_RUN_DIFF: 30
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 30
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : model_description.jmx
+
+    STOP :  ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/model_description/model_description.jmx b/tests/performance/tests/model_description/model_description.jmx
index 4d8898adb..5cb68f9ac 100644
--- a/tests/performance/tests/model_description/model_description.jmx
+++ b/tests/performance/tests/model_description/model_description.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Model Description Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Model Description Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model name</stringProp>
           </elementProp>
diff --git a/tests/performance/tests/model_description/model_description.yaml b/tests/performance/tests/model_description/model_description.yaml
index 05358a53c..ef429dd5e 100644
--- a/tests/performance/tests/model_description/model_description.yaml
+++ b/tests/performance/tests/model_description/model_description.yaml
@@ -1,68 +1,21 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 30s
-  scenario: model_description
-
 scenarios:
-  model_description:
+  scenario_0:
     script: model_description.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
-
 services:
   - module: shellexec
     prepare:
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
-      - "curl -s -X PUT  http://localhost:8081/models/squeezenet_v1.1?min_worker=1&synchronous=true"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - sum_all_memory_rss
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X PUT  http://localhost:8081/models/${SQZNET_NAME}?min_worker=1&synchronous=true"
 
 reporting:
 - module: passfail
   criteria:
-    # Inbuilt Criteria
-    - success of ModelDescription<${MODL_DESC_SUCC}, stop as failed
-    - avg-rt of ModelDescription>${MODL_DESC_RT}, stop as failed
-    # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '<'
       threshold: ${TOTAL_PROCS}
       timeframe: 1s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
-      timeframe: 1s
-      stop : true
+      stop : ${STOP}
       fail : true
-#    - class: bzt.modules.monitoring.MonitoringCriteria
-#      subject: ServerLocalClient/sum_all_memory_rss
-#      condition: '>'
-#      threshold: ${TOTAL_MEM}
-#      timeframe: 5s
-#      stop : true
-#      fail : true
\ No newline at end of file
diff --git a/tests/performance/tests/multiple_inference_and_scaling/environments/xlarge.yaml b/tests/performance/tests/multiple_inference_and_scaling/environments/xlarge.yaml
index 1671213d1..21eaaee4b 100644
--- a/tests/performance/tests/multiple_inference_and_scaling/environments/xlarge.yaml
+++ b/tests/performance/tests/multiple_inference_and_scaling/environments/xlarge.yaml
@@ -1,12 +1,56 @@
+
 ---
 settings:
   env:
-    INFR1_SUCC : 100%
-    INFR2_SUCC: 100%
-    INFR1_RT : 290ms
+    API_LABEL : Inference1
+    API_SUCCESS : 80%
+    API_AVG_RT : 290ms
+
+    INFR2_SUCC: 80%
     INFR2_RT: 450ms
-    TOTAL_PROCS : 14
+    SCALEUP1_RT : 500ms
+    SCALEUP2_RT : 500ms
+    SCALEDOWN1_RT : 100ms
+    SCALEDOWN2_RT : 100ms
+
+    TOTAL_WORKERS: 8
+    TOTAL_WORKERS_MEM: 2668554752
+    TOTAL_WORKERS_FDS: 100
+
+    TOTAL_MEM : 2000000000
+    TOTAL_PROCS : 9
     TOTAL_FDS : 300
-    TOTAL_MEM : 2000000000 #~2GB
+
+    FRNTEND_MEM: 1000000000
+
     TOTAL_ORPHANS : 0
-    FRNTEND_MEM : 1000000000 #~1GB
\ No newline at end of file
+    TOTAL_ZOMBIES : 0
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 120
+    TOTAL_WORKERS_MEM_RUN_DIFF: 135
+    TOTAL_WORKERS_FDS_RUN_DIFF: 130
+    TOTAL_MEM_RUN_DIFF: 130
+    TOTAL_PROCS_RUN_DIFF: 100
+    TOTAL_FDS_RUN_DIFF: 40
+    FRNTEND_MEM_RUN_DIFF: 130
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : multiple_inference_and_scaling.jmx
+
+    STOP :  ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.jmx b/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.jmx
index cbff8debc..cbff660c4 100644
--- a/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.jmx
+++ b/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Inference, Scale up and Down with multiple models" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Inference, Scale up and Down with multiple models" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,13 +16,13 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model1" elementType="Argument">
             <stringProp name="Argument.name">model1</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model1 Name</stringProp>
           </elementProp>
           <elementProp name="model2" elementType="Argument">
             <stringProp name="Argument.name">model2</stringProp>
-            <stringProp name="Argument.value">resnet-18</stringProp>
+            <stringProp name="Argument.value">${__P(RESNET_NAME,resnet-18)}</stringProp>
             <stringProp name="Argument.desc">Model2 Name</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
@@ -117,7 +117,7 @@
           </UniformRandomTimer>
           <hashTree/>
         </hashTree>
-        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="ScaleUp1" enabled="true">
+        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="ScaleDown1" enabled="true">
           <elementProp name="HTTPsampler.Arguments" elementType="Arguments" guiclass="HTTPArgumentsPanel" testclass="Arguments" testname="User Defined Variables" enabled="true">
             <collectionProp name="Arguments.arguments"/>
           </elementProp>
@@ -148,7 +148,35 @@
           </UniformRandomTimer>
           <hashTree/>
         </hashTree>
-        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="ScaleDown1" enabled="true">
+        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="Inference11" enabled="true">
+          <elementProp name="HTTPsampler.Files" elementType="HTTPFileArgs">
+            <collectionProp name="HTTPFileArgs.files">
+              <elementProp name="${__P(input_filepath)}" elementType="HTTPFileArg">
+                <stringProp name="File.path">${__P(input_filepath)}</stringProp>
+                <stringProp name="File.paramname">data</stringProp>
+                <stringProp name="File.mimetype">image/jpeg</stringProp>
+              </elementProp>
+            </collectionProp>
+          </elementProp>
+          <elementProp name="HTTPsampler.Arguments" elementType="Arguments" guiclass="HTTPArgumentsPanel" testclass="Arguments" testname="User Defined Variables" enabled="true">
+            <collectionProp name="Arguments.arguments"/>
+          </elementProp>
+          <stringProp name="HTTPSampler.domain"></stringProp>
+          <stringProp name="HTTPSampler.port"></stringProp>
+          <stringProp name="HTTPSampler.protocol"></stringProp>
+          <stringProp name="HTTPSampler.contentEncoding"></stringProp>
+          <stringProp name="HTTPSampler.path">/predictions/${model1}</stringProp>
+          <stringProp name="HTTPSampler.method">POST</stringProp>
+          <boolProp name="HTTPSampler.follow_redirects">true</boolProp>
+          <boolProp name="HTTPSampler.auto_redirects">false</boolProp>
+          <boolProp name="HTTPSampler.use_keepalive">true</boolProp>
+          <boolProp name="HTTPSampler.DO_MULTIPART_POST">true</boolProp>
+          <stringProp name="HTTPSampler.embedded_url_re"></stringProp>
+          <stringProp name="HTTPSampler.connect_timeout"></stringProp>
+          <stringProp name="HTTPSampler.response_timeout"></stringProp>
+        </HTTPSamplerProxy>
+        <hashTree/>
+        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="ScaleUp1" enabled="true">
           <elementProp name="HTTPsampler.Arguments" elementType="Arguments" guiclass="HTTPArgumentsPanel" testclass="Arguments" testname="User Defined Variables" enabled="true">
             <collectionProp name="Arguments.arguments"/>
           </elementProp>
@@ -234,7 +262,7 @@
           </UniformRandomTimer>
           <hashTree/>
         </hashTree>
-        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="ScaleUp2" enabled="true">
+        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="ScaleDown2" enabled="true">
           <elementProp name="HTTPsampler.Arguments" elementType="Arguments" guiclass="HTTPArgumentsPanel" testclass="Arguments" testname="User Defined Variables" enabled="true">
             <collectionProp name="Arguments.arguments"/>
           </elementProp>
@@ -265,7 +293,35 @@
           </UniformRandomTimer>
           <hashTree/>
         </hashTree>
-        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="ScaleDown2" enabled="true">
+        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="Inference21" enabled="true">
+          <elementProp name="HTTPsampler.Files" elementType="HTTPFileArgs">
+            <collectionProp name="HTTPFileArgs.files">
+              <elementProp name="${__P(input_filepath)}" elementType="HTTPFileArg">
+                <stringProp name="File.path">${__P(input_filepath)}</stringProp>
+                <stringProp name="File.paramname">data</stringProp>
+                <stringProp name="File.mimetype">image/jpeg</stringProp>
+              </elementProp>
+            </collectionProp>
+          </elementProp>
+          <elementProp name="HTTPsampler.Arguments" elementType="Arguments" guiclass="HTTPArgumentsPanel" testclass="Arguments" testname="User Defined Variables" enabled="true">
+            <collectionProp name="Arguments.arguments"/>
+          </elementProp>
+          <stringProp name="HTTPSampler.domain"></stringProp>
+          <stringProp name="HTTPSampler.port"></stringProp>
+          <stringProp name="HTTPSampler.protocol"></stringProp>
+          <stringProp name="HTTPSampler.contentEncoding"></stringProp>
+          <stringProp name="HTTPSampler.path">/predictions/${model2}</stringProp>
+          <stringProp name="HTTPSampler.method">POST</stringProp>
+          <boolProp name="HTTPSampler.follow_redirects">true</boolProp>
+          <boolProp name="HTTPSampler.auto_redirects">false</boolProp>
+          <boolProp name="HTTPSampler.use_keepalive">true</boolProp>
+          <boolProp name="HTTPSampler.DO_MULTIPART_POST">true</boolProp>
+          <stringProp name="HTTPSampler.embedded_url_re"></stringProp>
+          <stringProp name="HTTPSampler.connect_timeout"></stringProp>
+          <stringProp name="HTTPSampler.response_timeout"></stringProp>
+        </HTTPSamplerProxy>
+        <hashTree/>
+        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="ScaleUp2" enabled="true">
           <elementProp name="HTTPsampler.Arguments" elementType="Arguments" guiclass="HTTPArgumentsPanel" testclass="Arguments" testname="User Defined Variables" enabled="true">
             <collectionProp name="Arguments.arguments"/>
           </elementProp>
diff --git a/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.yaml b/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.yaml
index 8f3324f2a..6b7e4891a 100644
--- a/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.yaml
+++ b/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.yaml
@@ -1,83 +1,33 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 300s
-  scenario: inference_multiple_models
-
 scenarios:
-  inference_multiple_models:
+  scenario_0:
     script: multiple_inference_and_scaling.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
-
 services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/resnet-18.mar"
-      - "curl -s -X PUT  http://localhost:8081/models/squeezenet_v1.1?min_worker=1&synchronous=true"
-      - "curl -s -X PUT  http://localhost:8081/models/resnet-18?min_worker=1&synchronous=true"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - sum_all_memory_rss
-          - frontend_memory_rss
-          - orphans
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X POST http://localhost:8081/models?url=${RESNET_URL}"
+      - "curl -s -X PUT  http://localhost:8081/models/${SQZNET_NAME}?min_worker=1&synchronous=true"
+      - "curl -s -X PUT  http://localhost:8081/models/${RESNET_NAME}?min_worker=1&synchronous=true"
+
 
 reporting:
 - module: passfail
   criteria:
     # Inbuilt Criteria
-    - success of Inference1<${INFR1_SUCC}, stop as failed
-    - success of Inference2<${INFR2_SUCC}, stop as failed
-    - avg-rt of Inference1>${INFR1_RT}, stop as failed
-    - avg-rt of Inference2>${INFR2_RT}, stop as failed
-    # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 10s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
-      timeframe: 5s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_memory_rss
-      condition: '>'
-      threshold: ${TOTAL_MEM}
-      timeframe: 5s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/orphans
-      condition: '>'
-      threshold: ${TOTAL_ORPHANS}
-      timeframe: 1s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/frontend_memory_rss
-      condition: '>'
-      threshold: ${FRNTEND_MEM}
-      timeframe: 5s
-      stop : true
-      fail : true
+    - success of Inference2<${INFR2_SUCC}
+    - avg-rt of Inference2>${INFR2_RT}
+    - success of Inference11<${INFR1_SUCC}
+    - success of Inference21<${INFR2_SUCC}
+    - avg-rt of Inference11>${INFR1_RT}
+    - avg-rt of Inference21>${INFR2_RT}
+    - success of ScaleUp1<${INFR2_SUCC}
+    - avg-rt of ScaleUp1>${SCALEUP1_RT}
+    - success of ScaleUp2<${INFR2_SUCC}
+    - avg-rt of ScaleUp2>${SCALEUP2_RT}
+    - success of ScaleDown1<${INFR2_SUCC}
+    - avg-rt of ScaleDown1>${SCALEDOWN1_RT}
+    - success of ScaleDown2<${INFR2_SUCC}
+    - avg-rt of ScaleDown2>${SCALEDOWN2_RT}
+
diff --git a/tests/performance/tests/register_unregister/environments/xlarge.yaml b/tests/performance/tests/register_unregister/environments/xlarge.yaml
index 0e099afed..87137002b 100644
--- a/tests/performance/tests/register_unregister/environments/xlarge.yaml
+++ b/tests/performance/tests/register_unregister/environments/xlarge.yaml
@@ -1,11 +1,51 @@
 ---
 settings:
   env:
-    REG_SUCC : 100%
-    UNREG_SUCC: 100%
-    REG_RT : 15s
-    UNREG_RT: 10ms
+    API_LABEL : RegisterModel
+    API_SUCCESS : 80%
+    API_AVG_RT : 290ms
+
+    UNREG_SUCC: 80%
+    UNREG_RT: 290ms
+
+    TOTAL_WORKERS: 0
+    TOTAL_WORKERS_MEM: 14054528
+    TOTAL_WORKERS_FDS: 50
+
+    TOTAL_MEM : 1400000000
     TOTAL_PROCS : 1
-    TOTAL_FDS : 66
+    TOTAL_FDS : 100
+
+    FRNTEND_MEM: 1200000000
+
     TOTAL_ORPHANS : 0
-    FRNTEND_MEM : 75000000 #75MB
\ No newline at end of file
+    TOTAL_ZOMBIES : 0
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 220
+    TOTAL_WORKERS_FDS_RUN_DIFF: 220
+    TOTAL_MEM_RUN_DIFF: 150
+    TOTAL_PROCS_RUN_DIFF: 70
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 140
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 1
+    RAMP-UP : 1s
+    ITERATIONS : 1400 #approximately runs for 5 mins
+    SCRIPT : register_unregister.jmx
+
+    STOP : false    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/register_unregister/register_unregister.jmx b/tests/performance/tests/register_unregister/register_unregister.jmx
index 504c3ae8c..aa420f88e 100644
--- a/tests/performance/tests/register_unregister/register_unregister.jmx
+++ b/tests/performance/tests/register_unregister/register_unregister.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Model Register-Unregister Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Model Register-Unregister Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,13 +16,13 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model name</stringProp>
           </elementProp>
           <elementProp name="model_url" elementType="Argument">
             <stringProp name="Argument.name">model_url</stringProp>
-            <stringProp name="Argument.value">https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_URL,https://torchserve.s3.amazonaws.com/mar_files/squeezenet1_1.mar)}</stringProp>
             <stringProp name="Argument.desc">URL to model store on s3</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
diff --git a/tests/performance/tests/register_unregister/register_unregister.yaml b/tests/performance/tests/register_unregister/register_unregister.yaml
index 1feb22487..296870996 100644
--- a/tests/performance/tests/register_unregister/register_unregister.yaml
+++ b/tests/performance/tests/register_unregister/register_unregister.yaml
@@ -1,72 +1,17 @@
 ---
-execution:
-- concurrency: 1
-  ramp-up: 0s
-#  hold-for: 5h
-  iterations: 5
-  scenario: register_unregister
+~execution:
+- concurrency: ${CONCURRENCY}
+  ramp-up: ${RAMP-UP}
+  iterations: ${ITERATIONS}
+  scenario: scenario_0
 
 scenarios:
-  register_unregister:
+  scenario_0:
     script: register_unregister.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
-
-services:
-  - module: shellexec
-    prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 10s"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - frontend_memory_rss
-          - orphans
-
 reporting:
 - module: passfail
   criteria:
     # Inbuilt Criteria
-    - success of RegisterModel<${REG_SUCC}, stop as failed
-    - success of UnregisterModel<${UNREG_SUCC}, stop as failed
-    - avg-rt of RegisterModel>${REG_RT}, stop as failed
-    - avg-rt of UnregisterModel>${UNREG_RT}, stop as failed
-    # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 5s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
-      timeframe: 5s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/orphans
-      condition: '>'
-      threshold: ${TOTAL_ORPHANS}
-      timeframe: 1s
-      stop : true
-      fail : true
-#    - class: bzt.modules.monitoring.MonitoringCriteria
-#      subject: ServerLocalClient/frontend_memory_rss
-#      condition: '>'
-#      threshold: ${FRNTEND_MEM}
-#      timeframe: 5s
-#      stop : true
-#      fail : true
\ No newline at end of file
+    - success of UnregisterModel<${UNREG_SUCC}
+    - avg-rt of UnregisterModel>${UNREG_RT}
diff --git a/tests/performance/tests/register_unregister_multiple/environments/xlarge.yaml b/tests/performance/tests/register_unregister_multiple/environments/xlarge.yaml
index 24c07f5cf..1793349e5 100644
--- a/tests/performance/tests/register_unregister_multiple/environments/xlarge.yaml
+++ b/tests/performance/tests/register_unregister_multiple/environments/xlarge.yaml
@@ -1,12 +1,55 @@
 ---
 settings:
   env:
-    REG_SUCC : 100%
-    SCL_UP_SUCC: 100%
-    UNREG_SUCC: 100%
-    REG_RT : 15s
+    API_LABEL : RegisterModel
+    API_SUCCESS : 80%
+    API_AVG_RT : 15s
+
+    SCL_UP_SUCC: 80%
+    UNREG_SUCC: 80%
     SCL_UP_RT: 1.5s
     UNREG_RT: 18ms
+
+    TOTAL_WORKERS: 1
+    TOTAL_WORKERS_MEM: 100000000
+    TOTAL_WORKERS_FDS: 200
+
+
+    TOTAL_MEM : 2000000000
     TOTAL_PROCS : 2
-    TOTAL_FDS : 73
-    FRNTEND_MEM : 120000000 #120MB
\ No newline at end of file
+    TOTAL_FDS : 200
+
+    FRNTEND_MEM: 100000000
+
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 200
+    TOTAL_WORKERS_FDS_RUN_DIFF: 200
+    TOTAL_MEM_RUN_DIFF: 200
+    TOTAL_PROCS_RUN_DIFF: 150
+    TOTAL_FDS_RUN_DIFF: 200
+    FRNTEND_MEM_RUN_DIFF: 200
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+
+    CONCURRENCY : 1
+    RAMP-UP : 1s
+    ITERATIONS : 360 #approximately runs for 5 mins
+    SCRIPT : register_unregister_multiple.jmx
+
+    STOP :  ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.jmx b/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.jmx
index 1dac0d5fe..876e51513 100644
--- a/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.jmx
+++ b/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Model Register-Unregister Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Model Register-Unregister Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,13 +16,13 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model name</stringProp>
           </elementProp>
           <elementProp name="model_url" elementType="Argument">
             <stringProp name="Argument.name">model_url</stringProp>
-            <stringProp name="Argument.value">https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_URL,https://torchserve.s3.amazonaws.com/mar_files/squeezenet1_1.mar)}</stringProp>
             <stringProp name="Argument.desc">URL to model store on s3</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
diff --git a/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.yaml b/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.yaml
index 5c8fcb85e..e7c30a660 100644
--- a/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.yaml
+++ b/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.yaml
@@ -1,66 +1,34 @@
 ---
-execution:
-- concurrency: 1
-  ramp-up: 0s
-  iterations: 5
-  scenario: register_unregister_multiple
+~execution:
+- concurrency: ${CONCURRENCY}
+  ramp-up: ${RAMP-UP}
+  iterations: ${ITERATIONS}
+  scenario: scenario_0
 
 scenarios:
-  register_unregister_multiple:
+  scenario_0:
     script: register_unregister_multiple.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
-
 services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/resnet-18.mar"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - frontend_memory_rss
+      - "curl -s -X POST http://localhost:8081/models?url=${RESNET_URL}"
+
 
-reporting:
+~reporting:
+- module: passfail # this is to enable passfail module
+- module: junit-xml
+  data-source: pass-fail
+- module: junit-xml
+  data-source: sample-labels
+- module: final-stats
+  dump-csv : ${ARTIFACTS_DIR}/final_stats.csv
 - module: passfail
   criteria:
     # Inbuilt Criteria
-    - success of RegisterModel<${REG_SUCC}, stop as failed
-    - success of ScaleUp<${SCL_UP_SUCC}, stop as failed
-    - success of UnregisterModel<${UNREG_SUCC}, stop as failed
-    - avg-rt of RegisterModel>${REG_RT}, stop as failed
-    - avg-rt of ScaleUp>${SCL_UP_RT}, stop as failed
-    - avg-rt of UnregisterModel>${UNREG_RT}, stop as failed
-    # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 5s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
-      timeframe: 5s
-      stop : true
-      fail : true
-#    - class: bzt.modules.monitoring.MonitoringCriteria
-#      subject: ServerLocalClient/frontend_memory_rss
-#      condition: '>'
-#      threshold: ${FRNTEND_MEM}
-#      timeframe: 5s
-#      stop : true
-#      fail : true
\ No newline at end of file
+    - success of ${API_LABEL}<${API_SUCCESS}
+    - avg-rt of ${API_LABEL}>${API_AVG_RT}
+    - success of ScaleUp<${SCL_UP_SUCC}
+    - success of UnregisterModel<${UNREG_SUCC}
+    - avg-rt of ScaleUp>${SCL_UP_RT}
+    - avg-rt of UnregisterModel>${UNREG_RT}
diff --git a/tests/performance/tests/scale_down_workers/environments/xlarge.yaml b/tests/performance/tests/scale_down_workers/environments/xlarge.yaml
index 6d43899b3..913da23ee 100644
--- a/tests/performance/tests/scale_down_workers/environments/xlarge.yaml
+++ b/tests/performance/tests/scale_down_workers/environments/xlarge.yaml
@@ -1,15 +1,25 @@
 ---
 settings:
   env:
-    SCL_DWN_SUCC : 100%
+    SCL_DWN_SUCC : 80%
     SCL_DWN_RT : 10ms
     TOTAL_PROCS_B4_SCL_DWN : 6
     TOTAL_PROCS_AFTR_SCL_DWN : 4
-    TOTAL_WRKRS_B4_SCL_DWN : 4
-    TOTAL_WRKRS_AFTR_SCL_DWN  : 2
+    TOTAL_WRKRS_B4_SCL_DWN : 5
+    TOTAL_WRKRS_AFTR_SCL_DWN  : 3
     FRNTEND_FDS : 78
     TOTAL_WRKRS_FDS_B4_SCL_DWN: 38
-    TOTAL_WRKRS_FDS_AFTR_SCL_DWN: 23
-    FRNTEND_MEM : 290000000 #290MB
-    TOTAL_WRKRS_MEM_B4_SCL_DWN : 450000000 #450MB
-    TOTAL_WRKRS_MEM_AFTR_SCL_DWN : 210000000 #210MB
\ No newline at end of file
+    FRNTEND_MEM : 1000000000
+    TOTAL_WRKRS_MEM_B4_SCL_DWN : 650000000
+    TOTAL_WRKRS_MEM_AFTR_SCL_DWN : 200000000
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+
+    CONCURRENCY: 10
+    RAMP-UP: 1s
+    HOLD-FOR: 30s
+    SCRIPT: scale_down_workers.jmx
+
+    STOP : ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/scale_down_workers/scale_down_workers.jmx b/tests/performance/tests/scale_down_workers/scale_down_workers.jmx
index 512444b07..c995a47fd 100644
--- a/tests/performance/tests/scale_down_workers/scale_down_workers.jmx
+++ b/tests/performance/tests/scale_down_workers/scale_down_workers.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Scale Down Workers Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Scale Down Workers Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -22,7 +22,7 @@
           </elementProp>
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model name</stringProp>
           </elementProp>
diff --git a/tests/performance/tests/scale_down_workers/scale_down_workers.yaml b/tests/performance/tests/scale_down_workers/scale_down_workers.yaml
index dc8cc1382..c5f21b61e 100644
--- a/tests/performance/tests/scale_down_workers/scale_down_workers.yaml
+++ b/tests/performance/tests/scale_down_workers/scale_down_workers.yaml
@@ -1,113 +1,109 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 30s
-  scenario: scaledown
 
 scenarios:
-  scaledown:
+  scenario_0:
     script: scale_down_workers.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
 
 services:
   - module: shellexec
     prepare:
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
-      - "curl -s -X PUT  http://localhost:8081/models/squeezenet_v1.1?min_worker=4&synchronous=true"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - total_workers
-          - frontend_file_descriptors
-          - sum_workers_file_descriptors
-          - frontend_memory_rss
-          - sum_workers_memory_rss
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X PUT  http://localhost:8081/models/${SQZNET_NAME}?min_worker=4&synchronous=true"
+      - "sleep 10s"
 
-reporting:
+~reporting:
+- module: passfail # this is to enable passfail module
+- module: junit-xml
+  data-source: pass-fail
+- module: junit-xml
+  data-source: sample-labels
+- module: final-stats
+  dump-csv : ${ARTIFACTS_DIR}/final_stats.csv
 - module: passfail
   criteria:
     # Inbuilt Criteria
-    - success of ScaleDown<${SCL_DWN_SUCC}, stop as failed
-    - avg-rt of ScaleDown>${SCL_DWN_RT}, stop as failed
+    - success of ScaleDown<${SCL_DWN_SUCC}
+    - avg-rt of ScaleDown>${SCL_DWN_RT}
     # Custom Criteria
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '>'
       threshold: ${TOTAL_PROCS_B4_SCL_DWN}
       timeframe: 1s
-      stop : true
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '<'
       threshold: ${TOTAL_PROCS_AFTR_SCL_DWN}
-      timeframe: 1s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_workers
       condition: '>'
       threshold: ${TOTAL_WRKRS_B4_SCL_DWN}
-      timeframe: 1s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_workers
       condition: '<'
       threshold: ${TOTAL_WRKRS_AFTR_SCL_DWN}
-      timeframe: 1s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/frontend_file_descriptors
       condition: '>'
       threshold: ${FRNTEND_FDS}
-      timeframe: 5s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/sum_workers_file_descriptors
       condition: '>'
       threshold: ${TOTAL_WRKRS_FDS_B4_SCL_DWN}
-      timeframe: 5s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_workers_file_descriptors
-      condition: '<'
-      threshold: ${TOTAL_WRKRS_FDS_AFTR_SCL_DWN}
-      timeframe: 5s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
 #    - class: bzt.modules.monitoring.MonitoringCriteria
 #      subject: ServerLocalClient/frontend_memory_rss
 #      condition: '>'
 #      threshold: ${FRNTEND_MEM}
-#      timeframe: 5s
-#      stop : true
+#      timeframe: 10s
+#      stop : ${STOP}
+#      fail : true
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/sum_workers_memory_rss
+#      condition: '>'
+#      threshold: ${TOTAL_WRKRS_MEM_B4_SCL_DWN}
+#      timeframe: 10s
+#      stop : ${STOP}
+#      fail : true
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/sum_workers_memory_rss
+#      condition: '<'
+#      threshold: ${TOTAL_WRKRS_MEM_AFTR_SCL_DWN}
+#      timeframe: 10s
+#      stop : ${STOP}
 #      fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_workers_memory_rss
+      subject: ServerLocalClient/orphans
       condition: '>'
-      threshold: ${TOTAL_WRKRS_MEM_B4_SCL_DWN}
-      timeframe: 5s
-      stop : true
-      fail : true
+      threshold: ${TOTAL_ORPHANS}
+      timeframe: 10s
+      stop: ${STOP}
+      fail: true
+      diff_percent_previous: ${TOTAL_ORPHANS_DIFF}
     - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_workers_memory_rss
-      condition: '<'
-      threshold: ${TOTAL_WRKRS_MEM_AFTR_SCL_DWN}
-      timeframe: 5s
-      stop : true
-      fail : true
\ No newline at end of file
+      subject: ServerLocalClient/zombies
+      condition: '>'
+      threshold: ${TOTAL_ZOMBIES}
+      timeframe: 10s
+      stop: ${STOP}
+      fail: true
+
+~compare_criteria:
+  -
\ No newline at end of file
diff --git a/tests/performance/tests/scale_up_workers/environments/xlarge.yaml b/tests/performance/tests/scale_up_workers/environments/xlarge.yaml
index 9e3182c3e..0b9082b0c 100644
--- a/tests/performance/tests/scale_up_workers/environments/xlarge.yaml
+++ b/tests/performance/tests/scale_up_workers/environments/xlarge.yaml
@@ -1,15 +1,26 @@
 ---
 settings:
   env:
-    SCL_UP_SUCC : 100%
+    SCL_UP_SUCC : 80%
     SCL_UP_RT : 10ms
     TOTAL_PROCS_AFTR_SCL_UP : 6
-    TOTAL_PROCS_B4_SCL_UP : 3
-    TOTAL_WRKRS_AFTR_SCL_UP : 4
-    TOTAL_WRKRS_B4_SCL_UP  : 1
+    TOTAL_PROCS_B4_SCL_UP : 2
+    TOTAL_WRKRS_AFTR_SCL_UP : 5
+    TOTAL_WRKRS_B4_SCL_UP  : 3
     FRNTEND_FDS : 88
-    TOTAL_WRKRS_FDS_AFTR_SCL_UP : 38
-    TOTAL_WRKRS_FDS_B4_SCL_UP : 11
-    FRNTEND_MEM : 290000000 #290MB
-    TOTAL_WRKRS_MEM_AFTR_SCL_UP : 450000000 #450MB
-    TOTAL_WRKRS_MEM_B4_SCL_UP : 115000000 #115MB
\ No newline at end of file
+    TOTAL_WRKRS_FDS_AFTR_SCL_UP : 43
+    FRNTEND_MEM : 1000000000
+    TOTAL_WRKRS_MEM_AFTR_SCL_UP : 796492032
+    TOTAL_WRKRS_MEM_B4_SCL_UP : 115000000 #115MB
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+    TOTAL_WRKRS_FDS_B4_SCL_UP : 0
+
+
+    CONCURRENCY: 10
+    RAMP-UP: 1s
+    HOLD-FOR: 30s
+    SCRIPT: scale_up_workers.jmx
+
+    STOP : ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/scale_up_workers/scale_up_workers.jmx b/tests/performance/tests/scale_up_workers/scale_up_workers.jmx
index 997547d66..d875872e8 100644
--- a/tests/performance/tests/scale_up_workers/scale_up_workers.jmx
+++ b/tests/performance/tests/scale_up_workers/scale_up_workers.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Scale Up Workers Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Scale Up Workers Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -22,7 +22,7 @@
           </elementProp>
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model name</stringProp>
           </elementProp>
diff --git a/tests/performance/tests/scale_up_workers/scale_up_workers.yaml b/tests/performance/tests/scale_up_workers/scale_up_workers.yaml
index 125aff830..2404a7f82 100644
--- a/tests/performance/tests/scale_up_workers/scale_up_workers.yaml
+++ b/tests/performance/tests/scale_up_workers/scale_up_workers.yaml
@@ -1,113 +1,109 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 30s
-  scenario: scaleup
-
 scenarios:
-  scaleup:
+  scenario_0:
     script: scale_up_workers.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
 
 services:
   - module: shellexec
     prepare:
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
-      - "curl -s -X PUT  http://localhost:8081/models/squeezenet_v1.1?min_worker=1&synchronous=true"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - total_workers
-          - frontend_file_descriptors
-          - sum_workers_file_descriptors
-          - frontend_memory_rss
-          - sum_workers_memory_rss
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X PUT  http://localhost:8081/models/${SQZNET_NAME}?min_worker=1&synchronous=true"
+      - "sleep 10s"
+
 
-reporting:
+~reporting:
+- module: passfail # this is to enable passfail module
+- module: junit-xml
+  data-source: pass-fail
+- module: junit-xml
+  data-source: sample-labels
+- module: final-stats
+  dump-csv : ${ARTIFACTS_DIR}/final_stats.csv
 - module: passfail
   criteria:
     # Inbuilt Criteria
-    - success of ScaleUp<${SCL_UP_SUCC}, stop as failed
-    - avg-rt of ScaleUp>${SCL_UP_RT}, stop as failed
+    - success of ScaleUp<${SCL_UP_SUCC}
+    - avg-rt of ScaleUp>${SCL_UP_RT}
     # Custom Criteria
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '>'
       threshold: ${TOTAL_PROCS_AFTR_SCL_UP}
-      timeframe: 1s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '<'
       threshold: ${TOTAL_PROCS_B4_SCL_UP}
-      timeframe: 1s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_workers
       condition: '>'
       threshold: ${TOTAL_WRKRS_AFTR_SCL_UP}
-      timeframe: 1s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_workers
       condition: '<'
       threshold: ${TOTAL_WRKRS_B4_SCL_UP}
-      timeframe: 1s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/frontend_file_descriptors
       condition: '>'
       threshold: ${FRNTEND_FDS}
-      timeframe: 5s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/sum_workers_file_descriptors
       condition: '>'
       threshold: ${TOTAL_WRKRS_FDS_AFTR_SCL_UP}
-      timeframe: 5s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/sum_workers_file_descriptors
       condition: '<'
       threshold: ${TOTAL_WRKRS_FDS_B4_SCL_UP}
-      timeframe: 5s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
 #    - class: bzt.modules.monitoring.MonitoringCriteria
 #      subject: ServerLocalClient/frontend_memory_rss
 #      condition: '>'
 #      threshold: ${FRNTEND_MEM}
-#      timeframe: 5s
-#      stop : true
+#      timeframe: 10s
+#      stop : ${STOP}
+#      fail : true
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/sum_workers_memory_rss
+#      condition: '>'
+#      threshold: ${TOTAL_WRKRS_MEM_AFTR_SCL_UP}
+#      timeframe: 10s
+#      stop : ${STOP}
 #      fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_workers_memory_rss
+      subject: ServerLocalClient/orphans
       condition: '>'
-      threshold: ${TOTAL_WRKRS_MEM_AFTR_SCL_UP}
-      timeframe: 5s
-      stop : true
-      fail : true
+      threshold: ${TOTAL_ORPHANS}
+      timeframe: 10s
+      stop: ${STOP}
+      fail: true
+      diff_percent_previous: ${TOTAL_ORPHANS_DIFF}
     - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_workers_memory_rss
-      condition: '<'
-      threshold: ${TOTAL_WRKRS_MEM_B4_SCL_UP}
-      timeframe: 5s
-      stop : true
-      fail : true
\ No newline at end of file
+      subject: ServerLocalClient/zombiesx
+      condition: '>'
+      threshold: ${TOTAL_ZOMBIES}
+      timeframe: 10s
+      stop: ${STOP}
+      fail: true
+
+~compare_criteria:
+  -
\ No newline at end of file
diff --git a/tests/performance/utils/fs.py b/tests/performance/utils/fs.py
index 2a2a73178..eb0cce149 100644
--- a/tests/performance/utils/fs.py
+++ b/tests/performance/utils/fs.py
@@ -23,7 +23,7 @@
 logging.basicConfig(stream=sys.stdout, format="%(message)s", level=logging.INFO)
 
 
-def get_sub_dirs(dir, exclude_list=['comp_data'], include_pattern='*', exclude_pattern=None):
+def get_sub_dirs(dir, exclude_list=[], include_pattern='*', exclude_pattern=None):
     """Utility method to get list of folders in a directory"""
     dir = dir.strip()
     if not os.path.exists(dir):
@@ -32,8 +32,16 @@ def get_sub_dirs(dir, exclude_list=['comp_data'], include_pattern='*', exclude_p
         raise Exception(msg)
 
     pattern_list = glob.glob(dir + "/" + include_pattern)
-    exclude_pattern_list = glob.glob(dir + "/" + exclude_pattern) if exclude_pattern is not None else []
-    return list([x for x in os.listdir(dir) if os.path.isdir(dir + "/" + x)
-                 and x not in exclude_list
-                 and dir + "/" + x in pattern_list
-                 and dir + "/" + x not in exclude_pattern_list])
+    exclude_pattern_list, exclude_pattern = (glob.glob(dir + "/" + exclude_pattern), exclude_pattern) \
+        if exclude_pattern is not None else ([], '')
+    skip_pattern = "/skip*"
+    skip_list = glob.glob(dir + skip_pattern)
+
+    exclude_patterns = exclude_list
+    exclude_patterns.extend([skip_pattern, exclude_pattern])
+    logger.info("Excluding the tests with name patterns '{}'.".format("','".join(exclude_patterns)))
+    return sorted(list([x for x in os.listdir(dir) if os.path.isdir(dir + "/" + x)
+                        and x not in exclude_list
+                        and dir + "/" + x in pattern_list
+                        and dir + "/" + x not in exclude_pattern_list
+                        and dir + "/" + x not in skip_list]))
diff --git a/tests/performance/utils/pyshell.py b/tests/performance/utils/pyshell.py
index 108cb9253..6178fb2a9 100644
--- a/tests/performance/utils/pyshell.py
+++ b/tests/performance/utils/pyshell.py
@@ -38,9 +38,20 @@ def run_process(cmd, wait=True):
             if not line:
                 break
             lines.append(line)
+            if len(lines) > 20:
+                lines = lines[1:]
             logger.info(line)
 
-        return process.returncode, '\n'.join(lines)
+        process.communicate()
+        code = process.returncode
+        error_msg = ""
+        if code:
+            error_msg = "Error (error_code={}) while executing command : {}. ".format(code, cmd)
+            logger.info(error_msg)
+            error_msg += "\n\n$$$$Here are the last 20 lines of the logs." \
+                         " For more details refer log file.$$$$\n\n"
+            error_msg += '\n'.join(lines)
+        return code, error_msg
     else:
         process = subprocess.Popen(cmd, shell=True)
         return process.returncode, ''
diff --git a/tests/performance/utils/timer.py b/tests/performance/utils/timer.py
index 1fa47d086..8beb03c29 100644
--- a/tests/performance/utils/timer.py
+++ b/tests/performance/utils/timer.py
@@ -37,5 +37,9 @@ def __enter__(self):
     def __exit__(self, type, value, traceback):
         logger.info("%s: %ss", self.description, self.diff())
 
+        # Return False needed so that __exit__ method do no ignore the exception
+        # otherwise exception are not reported
+        return False
+
     def diff(self):
         return int(time.time()) - self.start