diff --git a/inference/python_api_test/test_case/infer_test.py b/inference/python_api_test/test_case/infer_test.py
index 86de44c77..8890bc48f 100644
--- a/inference/python_api_test/test_case/infer_test.py
+++ b/inference/python_api_test/test_case/infer_test.py
@@ -61,7 +61,7 @@ def load_config(self, **kwargs):
         else:
             raise Exception(f"model file path is not exist, [{model_path}] or [{model_file}] invalid!")
 
-    def get_truth_val(self, input_data_dict: dict, device: str) -> dict:
+    def get_truth_val(self, input_data_dict: dict, device: str, gpu_mem=1000) -> dict:
         """
         get truth value calculated by target device kernel
         Args:
@@ -72,7 +72,7 @@ def get_truth_val(self, input_data_dict: dict, device: str) -> dict:
         if device == "cpu":
             self.pd_config.disable_gpu()
         elif device == "gpu":
-            self.pd_config.enable_use_gpu(1000, 0)
+            self.pd_config.enable_use_gpu(gpu_mem, 0)
         else:
             raise Exception(f"{device} not support in current test codes")
         self.pd_config.switch_ir_optim(False)
@@ -194,7 +194,7 @@ def mkldnn_test(self, input_data_dict: dict, output_data_dict: dict, mkldnn_cach
                     abs(out_data - output_data_truth_val[j]) <= delta
                 ), f"{out_data} - {output_data_truth_val[j]} > {delta}"
 
-    def trt_fp32_bz1_test(self, input_data_dict: dict, output_data_dict: dict, repeat=5, delta=1e-5):
+    def trt_fp32_bz1_test(self, input_data_dict: dict, output_data_dict: dict, repeat=5, delta=1e-5, gpu_mem=1000):
         """
         test enable_tensorrt_engine()
         batch_size = 1
@@ -208,7 +208,7 @@ def trt_fp32_bz1_test(self, input_data_dict: dict, output_data_dict: dict, repea
         Returns:
             None
         """
-        self.pd_config.enable_use_gpu(1000, 0)
+        self.pd_config.enable_use_gpu(gpu_mem, 0)
         self.pd_config.enable_tensorrt_engine(
             workspace_size=1 << 30,
             max_batch_size=1,
@@ -238,7 +238,7 @@ def trt_fp32_bz1_test(self, input_data_dict: dict, output_data_dict: dict, repea
                     abs(out_data - output_data_truth_val[j]) <= delta
                 ), f"{out_data} - {output_data_truth_val[j]} > {delta}"
 
-    def trt_fp32_more_bz_test(self, input_data_dict: dict, output_data_dict: dict, repeat=5, delta=1e-5):
+    def trt_fp32_more_bz_test(self, input_data_dict: dict, output_data_dict: dict, repeat=1, delta=1e-5, gpu_mem=1000):
         """
         test enable_tensorrt_engine()
         batch_size = 10
@@ -252,7 +252,7 @@ def trt_fp32_more_bz_test(self, input_data_dict: dict, output_data_dict: dict, r
         Returns:
             None
         """
-        self.pd_config.enable_use_gpu(1000, 0)
+        self.pd_config.enable_use_gpu(gpu_mem, 0)
         self.pd_config.enable_tensorrt_engine(
             workspace_size=1 << 30,
             max_batch_size=10,
@@ -283,7 +283,7 @@ def trt_fp32_more_bz_test(self, input_data_dict: dict, output_data_dict: dict, r
                 ), f"{out_data} - {output_data_truth_val[j]} > {delta}"
 
     def trt_fp32_bz1_multi_thread_test(
-        self, input_data_dict: dict, output_data_dict: dict, repeat=2, thread_num=5, delta=1e-5
+        self, input_data_dict: dict, output_data_dict: dict, repeat=1, thread_num=2, delta=1e-5, gpu_mem=1000
     ):
         """
         test enable_tensorrt_engine()
@@ -301,7 +301,7 @@ def trt_fp32_bz1_multi_thread_test(
         Returns:
             None
         """
-        self.pd_config.enable_use_gpu(1000, 0)
+        self.pd_config.enable_use_gpu(gpu_mem, 0)
         self.pd_config.enable_tensorrt_engine(
             workspace_size=1 << 30,
             max_batch_size=1,
@@ -320,7 +320,7 @@ def trt_fp32_bz1_multi_thread_test(
             record_thread.start()
             record_thread.join()
 
-    def trt_fp16_bz1_test(self, input_data_dict: dict, output_data_dict: dict, repeat=5, delta=1e-5):
+    def trt_fp16_bz1_test(self, input_data_dict: dict, output_data_dict: dict, repeat=5, delta=1e-5, gpu_mem=1000):
         """
         test enable_tensorrt_engine()
         batch_size = 1
@@ -334,7 +334,7 @@ def trt_fp16_bz1_test(self, input_data_dict: dict, output_data_dict: dict, repea
         Returns:
             None
         """
-        self.pd_config.enable_use_gpu(1000, 0)
+        self.pd_config.enable_use_gpu(gpu_mem, 0)
         self.pd_config.enable_tensorrt_engine(
             workspace_size=1 << 30,
             max_batch_size=1,
@@ -364,7 +364,7 @@ def trt_fp16_bz1_test(self, input_data_dict: dict, output_data_dict: dict, repea
                     abs(out_data - output_data_truth_val[j]) <= delta
                 ), f"{out_data} - {output_data_truth_val[j]} > {delta}"
 
-    def trt_fp16_more_bz_test(self, input_data_dict: dict, output_data_dict: dict, repeat=5, delta=1e-5):
+    def trt_fp16_more_bz_test(self, input_data_dict: dict, output_data_dict: dict, repeat=5, delta=1e-5, gpu_mem=1000):
         """
         test enable_tensorrt_engine()
         batch_size = 10
@@ -378,7 +378,7 @@ def trt_fp16_more_bz_test(self, input_data_dict: dict, output_data_dict: dict, r
         Returns:
             None
         """
-        self.pd_config.enable_use_gpu(1000, 0)
+        self.pd_config.enable_use_gpu(gpu_mem, 0)
         self.pd_config.enable_tensorrt_engine(
             workspace_size=1 << 30,
             max_batch_size=10,
@@ -409,7 +409,7 @@ def trt_fp16_more_bz_test(self, input_data_dict: dict, output_data_dict: dict, r
                 ), f"{out_data} - {output_data_truth_val[j]} > {delta}"
 
     def trt_fp16_bz1_multi_thread_test(
-        self, input_data_dict: dict, output_data_dict: dict, repeat=2, thread_num=5, delta=1e-5
+        self, input_data_dict: dict, output_data_dict: dict, repeat=1, thread_num=2, delta=1e-5, gpu_mem=1000
     ):
         """
         test enable_tensorrt_engine()
@@ -427,7 +427,7 @@ def trt_fp16_bz1_multi_thread_test(
         Returns:
             None
         """
-        self.pd_config.enable_use_gpu(1000, 0)
+        self.pd_config.enable_use_gpu(gpu_mem, 0)
         self.pd_config.enable_tensorrt_engine(
             workspace_size=1 << 30,
             max_batch_size=1,
@@ -447,7 +447,7 @@ def trt_fp16_bz1_multi_thread_test(
             record_thread.join()
 
     def run_multi_thread_test_predictor(
-        self, predictor, input_data_dict: dict, output_data_dict: dict, repeat=5, delta=1e-5
+        self, predictor, input_data_dict: dict, output_data_dict: dict, repeat=1, delta=1e-5
     ):
         """
         test paddle predictor in multithreaded task
diff --git a/inference/python_api_test/test_class_model/run.sh b/inference/python_api_test/test_class_model/run.sh
index c77686d12..e77f7ab71 100644
--- a/inference/python_api_test/test_class_model/run.sh
+++ b/inference/python_api_test/test_class_model/run.sh
@@ -1,6 +1,6 @@
 export FLAGS_call_stack_level=2
 cases=`find . -name "test*.py" | sort`
-ignore=""
+ignore="test_vgg11.py"
 bug=0
 
 echo "============ failed cases =============" >> result.txt
diff --git a/inference/python_api_test/test_class_model/test_resnet50.py b/inference/python_api_test/test_class_model/test_resnet50.py
index fbcc4fdb7..3b7c9ddf9 100644
--- a/inference/python_api_test/test_class_model/test_resnet50.py
+++ b/inference/python_api_test/test_class_model/test_resnet50.py
@@ -96,8 +96,8 @@ def test_trtfp32_more_bz():
 
     file_path = "./resnet50"
     images_size = 224
-    batch_size_pool = 10
-    for batch_size in range(1, batch_size_pool + 1):
+    batch_size_pool = [1, 5, 10]
+    for batch_size in batch_size_pool:
         test_suite = InferenceTest()
         test_suite.load_config(model_file="./resnet50/inference.pdmodel", params_file="./resnet50/inference.pdiparams")
         images_list, npy_list = test_suite.get_images_npy(file_path, images_size)
@@ -178,8 +178,8 @@ def test_trtfp16_more_bz():
 
     file_path = "./resnet50"
     images_size = 224
-    batch_size_pool = 10
-    for batch_size in range(1, batch_size_pool + 1):
+    batch_size_pool = [1, 5, 10]
+    for batch_size in batch_size_pool:
         test_suite = InferenceTest()
         test_suite.load_config(model_file="./resnet50/inference.pdmodel", params_file="./resnet50/inference.pdiparams")
         images_list, npy_list = test_suite.get_images_npy(file_path, images_size)
diff --git a/inference/python_api_test/test_class_model/test_vgg11.py b/inference/python_api_test/test_class_model/test_vgg11.py
new file mode 100644
index 000000000..22bcaea49
--- /dev/null
+++ b/inference/python_api_test/test_class_model/test_vgg11.py
@@ -0,0 +1,197 @@
+# -*- coding: utf-8 -*-
+# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python
+"""
+test vgg11 model
+"""
+
+import os
+import sys
+import logging
+import tarfile
+import six
+import wget
+import pytest
+import numpy as np
+
+# pylint: disable=wrong-import-position
+sys.path.append("..")
+from test_case import InferenceTest
+
+# pylint: enable=wrong-import-position
+
+
+def check_model_exist():
+    """
+    check model exist
+    """
+    vgg11_url = "https://paddle-qa.bj.bcebos.com/inference_model/2.1/class/vgg11.tgz"
+    if not os.path.exists("./vgg11/inference.pdiparams"):
+        wget.download(vgg11_url, out="./")
+        tar = tarfile.open("vgg11.tgz")
+        tar.extractall()
+        tar.close()
+
+
+@pytest.mark.p0
+@pytest.mark.config_init_combined_model
+def test_config():
+    """
+    test combined model config
+    """
+    check_model_exist()
+    test_suite = InferenceTest()
+    test_suite.load_config(model_file="./vgg11/inference.pdmodel", params_file="./vgg11/inference.pdiparams")
+    test_suite.config_test()
+
+
+@pytest.mark.p0
+@pytest.mark.config_disablegpu_memory
+def test_disable_gpu():
+    """
+    test no gpu resources occupied after disable gpu
+    """
+    check_model_exist()
+    test_suite = InferenceTest()
+    test_suite.load_config(model_file="./vgg11/inference.pdmodel", params_file="./vgg11/inference.pdiparams")
+    batch_size = 1
+    fake_input = np.random.randn(batch_size, 3, 224, 224).astype("float32")
+    print(fake_input.shape)
+    input_data_dict = {"x": fake_input}
+    test_suite.disable_gpu_test(input_data_dict)
+
+
+@pytest.mark.p1
+@pytest.mark.trt_fp32_more_bz_precision
+def test_trtfp32_more_bz():
+    """
+    compared trt fp32 batch_size=1-10 vgg11 outputs with true val
+    """
+    check_model_exist()
+
+    file_path = "./vgg11"
+    images_size = 224
+    batch_size_pool = [1, 10]
+    for batch_size in batch_size_pool:
+        test_suite = InferenceTest()
+        test_suite.load_config(model_file="./vgg11/inference.pdmodel", params_file="./vgg11/inference.pdiparams")
+        images_list, npy_list = test_suite.get_images_npy(file_path, images_size)
+        fake_input = np.array(images_list[0:batch_size]).astype("float32")
+        input_data_dict = {"x": fake_input}
+        output_data_dict = test_suite.get_truth_val(input_data_dict, device="gpu")
+
+        del test_suite  # destroy class to save memory
+
+        test_suite2 = InferenceTest()
+        test_suite2.load_config(model_file="./vgg11/inference.pdmodel", params_file="./vgg11/inference.pdiparams")
+        test_suite2.trt_fp32_more_bz_test(input_data_dict, output_data_dict)
+
+        del test_suite2  # destroy class to save memory
+
+
+@pytest.mark.p1
+@pytest.mark.trt_fp32_multi_thread_bz1_precision
+def test_trtfp32_bz1_multi_thread():
+    """
+    compared trt fp32 batch_size=1 vgg11 multi_thread outputs with true val
+    """
+    check_model_exist()
+
+    file_path = "./vgg11"
+    images_size = 224
+    batch_size = 1
+    test_suite = InferenceTest()
+    test_suite.load_config(model_file="./vgg11/inference.pdmodel", params_file="./vgg11/inference.pdiparams")
+    images_list, npy_list = test_suite.get_images_npy(file_path, images_size)
+    fake_input = np.array(images_list[0:batch_size]).astype("float32")
+    input_data_dict = {"x": fake_input}
+    output_data_dict = test_suite.get_truth_val(input_data_dict, device="gpu")
+
+    del test_suite  # destroy class to save memory
+
+    test_suite2 = InferenceTest()
+    test_suite2.load_config(model_file="./vgg11/inference.pdmodel", params_file="./vgg11/inference.pdiparams")
+    test_suite2.trt_fp32_bz1_multi_thread_test(input_data_dict, output_data_dict)
+
+    del test_suite2  # destroy class to save memory
+
+
+@pytest.mark.p1
+@pytest.mark.trt_fp16_more_bz_precision
+def test_trtfp16_more_bz():
+    """
+    compared trt fp16 batch_size=1-10 vgg11 outputs with true val
+    """
+    check_model_exist()
+
+    file_path = "./vgg11"
+    images_size = 224
+    batch_size_pool = [1, 10]
+    for batch_size in batch_size_pool:
+        test_suite = InferenceTest()
+        test_suite.load_config(model_file="./vgg11/inference.pdmodel", params_file="./vgg11/inference.pdiparams")
+        images_list, npy_list = test_suite.get_images_npy(file_path, images_size)
+        fake_input = np.array(images_list[0:batch_size]).astype("float32")
+        input_data_dict = {"x": fake_input}
+        output_data_dict = test_suite.get_truth_val(input_data_dict, device="gpu")
+
+        del test_suite  # destroy class to save memory
+
+        test_suite2 = InferenceTest()
+        test_suite2.load_config(model_file="./vgg11/inference.pdmodel", params_file="./vgg11/inference.pdiparams")
+        test_suite2.trt_fp16_more_bz_test(input_data_dict, output_data_dict, repeat=1, delta=1e-3, gpu_mem=3000)
+
+        del test_suite2  # destroy class to save memory
+
+
+@pytest.mark.p1
+@pytest.mark.trt_fp16_multi_thread_bz1_precision
+def test_trtfp16_bz1_multi_thread():
+    """
+    compared trt fp16 batch_size=1 vgg11 multi_thread outputs with true val
+    """
+    check_model_exist()
+
+    file_path = "./vgg11"
+    images_size = 224
+    batch_size = 1
+    test_suite = InferenceTest()
+    test_suite.load_config(model_file="./vgg11/inference.pdmodel", params_file="./vgg11/inference.pdiparams")
+    images_list, npy_list = test_suite.get_images_npy(file_path, images_size)
+    fake_input = np.array(images_list[0:batch_size]).astype("float32")
+    input_data_dict = {"x": fake_input}
+    output_data_dict = test_suite.get_truth_val(input_data_dict, device="gpu")
+
+    del test_suite  # destroy class to save memory
+
+    test_suite2 = InferenceTest()
+    test_suite2.load_config(model_file="./vgg11/inference.pdmodel", params_file="./vgg11/inference.pdiparams")
+    test_suite2.trt_fp16_bz1_multi_thread_test(input_data_dict, output_data_dict, gpu_mem=3000)
+
+    del test_suite2  # destroy class to save memory
+
+
+@pytest.mark.p1
+@pytest.mark.mkldnn_bz1_precision
+def test_mkldnn():
+    """
+    compared mkldnn vgg11 outputs with true val
+    """
+    check_model_exist()
+
+    file_path = "./vgg11"
+    images_size = 224
+    batch_size = 1
+    test_suite = InferenceTest()
+    test_suite.load_config(model_file="./vgg11/inference.pdmodel", params_file="./vgg11/inference.pdiparams")
+    images_list, npy_list = test_suite.get_images_npy(file_path, images_size)
+    fake_input = np.array(images_list[0:batch_size]).astype("float32")
+    input_data_dict = {"x": fake_input}
+    output_data_dict = test_suite.get_truth_val(input_data_dict, device="gpu")
+
+    del test_suite  # destroy class to save memory
+
+    test_suite2 = InferenceTest()
+    test_suite2.load_config(model_file="./vgg11/inference.pdmodel", params_file="./vgg11/inference.pdiparams")
+    test_suite2.mkldnn_test(input_data_dict, output_data_dict)
+
+    del test_suite2  # destroy class to save memory