You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I slightly modified the code in examples/resnet/cpp/main to measure execution time. measurements showed different results on linux and android, with the first run out of 100 showing approximately the same time on android and linux.
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <chrono>
#include <fstream>
#include <iostream>
#include "resnet.h"
#include "image_utils.h"
#include "file_utils.h"
constexpr const char* Imagenet_classes_file_path = "./model/synset.txt";
int main(int argc, char **argv)
{
if (argc != 3) {
printf("%s <model_path> <image_path>\n", argv[0]);
return -1;
}
const char* model_path = argv[1];
const char* image_path = argv[2];
int line_count;
char** lines = read_lines_from_file(Imagenet_classes_file_path, &line_count);
if (lines == NULL) {
printf("read classes label file fail! path=%s\n", Imagenet_classes_file_path);
return -1;
}
int ret;
rknn_app_context_t rknn_app_ctx;
memset(&rknn_app_ctx, 0, sizeof(rknn_app_context_t));
ret = init_resnet_model(model_path, &rknn_app_ctx);
if (ret != 0) {
printf("init_resnet_model fail! ret=%d model_path=%s\n", ret, model_path);
return -1;
}
image_buffer_t src_image;
memset(&src_image, 0, sizeof(image_buffer_t));
ret = read_image(image_path, &src_image);
if (ret != 0) {
printf("read image fail! ret=%d image_path=%s\n", ret, image_path);
return -1;
}
int topk = 5;
resnet_result result[topk];
int num_repit = 100;
double first_run_time = 0.0, last_run_time = 0.0, avg_time = 0.0;
double total_time = 0.0;
// Inference loop with 100 repetitions
for (int i = 0; i < num_repit; i++) {
auto start_time = std::chrono::high_resolution_clock::now();
ret = inference_resnet_model(&rknn_app_ctx, &src_image, result, topk);
auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time).count();
if (ret != 0) {
printf("inference_resnet_model fail! ret=%d\n", ret);
goto out;
}
if (i == 0) {
first_run_time = duration;
}
if (i == num_repit - 1) {
last_run_time = duration;
}
total_time += duration;
}
avg_time = total_time / num_repit;
printf("Inference - First run: %.2f ms, Last run: %.2f ms, Average: %.2f ms\n", first_run_time, last_run_time, avg_time);
for (int i = 0; i < topk; i++) {
printf("[%d] score=%.6f class=%s\n", result[i].cls, result[i].score, lines[result[i].cls]);
}
out:
ret = release_resnet_model(&rknn_app_ctx);
if (ret != 0) {
printf("release_resnet_model fail! ret=%d\n", ret);
}
if (src_image.virt_addr != NULL) {
free(src_image.virt_addr);
}
if (lines != NULL) {
free_lines(lines, line_count);
}
return 0;
}
You will get different execution times on the same OS based on which CPU cores the program is running on. The RK3588 has 4 fast Cortex-A76 cores at 2.4Ghz and 4 efficient Cortex-A55 cores at 1.8Ghz, so depending on how the OS has scheduled the execution of your program this will effect your inference timing.
To avoid this variation you need to set the CPU affinity of the program to run on the fast A76 cores only, eg:
#define _GNU_SOURCE
#include <stdio.h>
#include <sched.h>
#include <unistd.h>
#include <pthread.h>
int main() {
cpu_set_t cpuset;
pid_t pid;
// Initialize the CPU set to zero
CPU_ZERO(&cpuset);
// Add the A76 cores (typically core 4, 5, 6, 7) to the CPU set
CPU_SET(4, &cpuset); // Add core 4
CPU_SET(5, &cpuset); // Add core 5
CPU_SET(6, &cpuset); // Add core 6
CPU_SET(7, &cpuset); // Add core 7
// Get the process ID (0 means the calling process)
pid = getpid();
// Set the CPU affinity for the process
if (sched_setaffinity(pid, sizeof(cpu_set_t), &cpuset) == -1) {
perror("sched_setaffinity");
return -1;
}
// Print confirmation
printf("CPU affinity set to A76 cores only (4, 5, 6, 7)\n");
// Your program logic here...
return 0;
}
I slightly modified the code in examples/resnet/cpp/main to measure execution time. measurements showed different results on linux and android, with the first run out of 100 showing approximately the same time on android and linux.
linux output:
Inference - First run: 25.00 ms, Last run: 21.00 ms, Average: 21.88 ms
[155] score=0.879479 class=n02086240 Shih-Tzu
[154] score=0.113574 class=n02086079 Pekinese, Pekingese, Peke
[204] score=0.002490 class=n02098413 Lhasa, Lhasa apso
[262] score=0.001698 class=n02112706 Brabancon griffon
[254] score=0.000742 class=n02110958 pug, pug-dog
android output:
Inference - First run: 22.00 ms, Last run: 37.00 ms, Average: 32.86 ms
[155] score=0.879479 class=n02086240 Shih-Tzu
[154] score=0.113574 class=n02086079 Pekinese, Pekingese, Peke
[204] score=0.002490 class=n02098413 Lhasa, Lhasa apso
[262] score=0.001698 class=n02112706 Brabancon griffon
[254] score=0.000742 class=n02110958 pug, pug-dog
The text was updated successfully, but these errors were encountered: