Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

different execution times on the same rk3588 board with different operating systems (android, linux) #189

Open
egormcobakaster opened this issue Sep 24, 2024 · 2 comments

Comments

@egormcobakaster
Copy link

I slightly modified the code in examples/resnet/cpp/main to measure execution time. measurements showed different results on linux and android, with the first run out of 100 showing approximately the same time on android and linux.

#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <chrono>
#include <fstream>
#include <iostream>

#include "resnet.h"
#include "image_utils.h"
#include "file_utils.h"

constexpr const char* Imagenet_classes_file_path = "./model/synset.txt";

int main(int argc, char **argv)
{
    if (argc != 3) {
        printf("%s <model_path> <image_path>\n", argv[0]);
        return -1;
    }

    const char* model_path = argv[1];
    const char* image_path = argv[2];

    int line_count;
    char** lines = read_lines_from_file(Imagenet_classes_file_path, &line_count);
    if (lines == NULL) {
        printf("read classes label file fail! path=%s\n", Imagenet_classes_file_path);
        return -1;
    }

    int ret;
    rknn_app_context_t rknn_app_ctx;
    memset(&rknn_app_ctx, 0, sizeof(rknn_app_context_t));

    ret = init_resnet_model(model_path, &rknn_app_ctx);
    if (ret != 0) {
        printf("init_resnet_model fail! ret=%d model_path=%s\n", ret, model_path);
        return -1;
    }

    image_buffer_t src_image;
    memset(&src_image, 0, sizeof(image_buffer_t));
    ret = read_image(image_path, &src_image);
    if (ret != 0) {
        printf("read image fail! ret=%d image_path=%s\n", ret, image_path);
        return -1;
    }

    int topk = 5;
    resnet_result result[topk];

    int num_repit = 100;
    double first_run_time = 0.0, last_run_time = 0.0, avg_time = 0.0;
    double total_time = 0.0;

    // Inference loop with 100 repetitions
    for (int i = 0; i < num_repit; i++) {
        auto start_time = std::chrono::high_resolution_clock::now();
        
        ret = inference_resnet_model(&rknn_app_ctx, &src_image, result, topk);
        
        auto end_time = std::chrono::high_resolution_clock::now();
        auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count();
        if (ret != 0) {
            printf("inference_resnet_model fail! ret=%d\n", ret);
            goto out;
        }

        if (i == 0) {
            first_run_time = duration;
        }
        if (i == num_repit - 1) {
            last_run_time = duration;
        }

        total_time += duration;
    }

    avg_time = total_time / num_repit;

    printf("Inference - First run: %.2f ms, Last run: %.2f ms, Average: %.2f ms\n", first_run_time, last_run_time, avg_time);

    for (int i = 0; i < topk; i++) {
        printf("[%d] score=%.6f class=%s\n", result[i].cls, result[i].score, lines[result[i].cls]);
    }

out:
    ret = release_resnet_model(&rknn_app_ctx);
    if (ret != 0) {
        printf("release_resnet_model fail! ret=%d\n", ret);
    }

    if (src_image.virt_addr != NULL) {
        free(src_image.virt_addr);
    }

    if (lines != NULL) {
        free_lines(lines, line_count);
    }

    return 0;
}

linux output:
Inference - First run: 25.00 ms, Last run: 21.00 ms, Average: 21.88 ms
[155] score=0.879479 class=n02086240 Shih-Tzu
[154] score=0.113574 class=n02086079 Pekinese, Pekingese, Peke
[204] score=0.002490 class=n02098413 Lhasa, Lhasa apso
[262] score=0.001698 class=n02112706 Brabancon griffon
[254] score=0.000742 class=n02110958 pug, pug-dog

android output:
Inference - First run: 22.00 ms, Last run: 37.00 ms, Average: 32.86 ms
[155] score=0.879479 class=n02086240 Shih-Tzu
[154] score=0.113574 class=n02086079 Pekinese, Pekingese, Peke
[204] score=0.002490 class=n02098413 Lhasa, Lhasa apso
[262] score=0.001698 class=n02112706 Brabancon griffon
[254] score=0.000742 class=n02110958 pug, pug-dog

@swdee
Copy link

swdee commented Sep 25, 2024

You will get different execution times on the same OS based on which CPU cores the program is running on. The RK3588 has 4 fast Cortex-A76 cores at 2.4Ghz and 4 efficient Cortex-A55 cores at 1.8Ghz, so depending on how the OS has scheduled the execution of your program this will effect your inference timing.

To avoid this variation you need to set the CPU affinity of the program to run on the fast A76 cores only, eg:

#define _GNU_SOURCE
#include <stdio.h>
#include <sched.h>
#include <unistd.h>
#include <pthread.h>

int main() {
    cpu_set_t cpuset;
    pid_t pid;

    // Initialize the CPU set to zero
    CPU_ZERO(&cpuset);

    // Add the A76 cores (typically core 4, 5, 6, 7) to the CPU set
    CPU_SET(4, &cpuset);  // Add core 4
    CPU_SET(5, &cpuset);  // Add core 5
    CPU_SET(6, &cpuset);  // Add core 6
    CPU_SET(7, &cpuset);  // Add core 7

    // Get the process ID (0 means the calling process)
    pid = getpid();

    // Set the CPU affinity for the process
    if (sched_setaffinity(pid, sizeof(cpu_set_t), &cpuset) == -1) {
        perror("sched_setaffinity");
        return -1;
    }

    // Print confirmation
    printf("CPU affinity set to A76 cores only (4, 5, 6, 7)\n");

    // Your program logic here...

    return 0;
}

@zen-xingle
Copy link
Collaborator

Have you set the CPU/NPU/DDR frequency? This script helps to do this https://github.com/airockchip/rknn_model_zoo/blob/main/scaling_frequency.sh

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants