Skip to content

Commit c5e9151

Browse files
committed
refactor: modifications made based on reviews
1 parent 113cfc2 commit c5e9151

File tree

8 files changed

+241
-169
lines changed

8 files changed

+241
-169
lines changed

tools/mtmd/CMakeLists.txt

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,6 @@ if (MTMD_WITH_FFMPEG)
3939
endif()
4040
endif()
4141

42-
option(MTMD_MAX_VIDEO_FRAMES_SMALL "Set a small number of frames for fast test locally" OFF)
43-
if(MTMD_MAX_VIDEO_FRAMES_SMALL)
44-
target_compile_definitions(mtmd PRIVATE MTMD_MAX_VIDEO_FRAMES_SMALL)
45-
endif()
46-
4742
if (BUILD_SHARED_LIBS)
4843
set_target_properties (mtmd PROPERTIES POSITION_INDEPENDENT_CODE ON)
4944
target_compile_definitions(mtmd PRIVATE LLAMA_BUILD)

tools/mtmd/clip.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4842,12 +4842,6 @@ bool clip_has_whisper_encoder(const struct clip_ctx * ctx) {
48424842
|| ctx->proj_type() == PROJECTOR_TYPE_VOXTRAL;
48434843
}
48444844

4845-
void clip_set_minicpmv_max_slice_nums(struct clip_ctx * ctx, int n) {
4846-
if (!ctx) return;
4847-
if (n < 0) n = 0;
4848-
ctx->model.hparams.minicpmv_max_slice_nums = n;
4849-
}
4850-
48514845
bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec) {
48524846
clip_image_f32 clip_img;
48534847
clip_img.buf.resize(h * w * 3);

tools/mtmd/clip.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@ bool clip_image_encode (struct clip_ctx * ctx, int n_threads, struct clip_i
9191
bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, const struct clip_image_f32_batch * imgs, float * vec);
9292

9393
int clip_is_minicpmv(const struct clip_ctx * ctx);
94-
void clip_set_minicpmv_max_slice_nums(struct clip_ctx * ctx, int n);
9594
bool clip_is_glm(const struct clip_ctx * ctx);
9695
bool clip_is_qwen2vl(const struct clip_ctx * ctx);
9796
bool clip_is_llava(const struct clip_ctx * ctx);

tools/mtmd/mtmd-helper.cpp

Lines changed: 4 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -447,9 +447,11 @@ mtmd_bitmap * mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx, const unsigne
447447
}
448448

449449
mtmd_bitmap * mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * path) {
450-
// although we could read the file into memory and call mtmd_helper_bitmap_init_from_buf,
450+
// Attention! A directory containing frames images is also considered a video
451+
// so path which is a directory should be handled by mtmd_video::init_video_bitmap
452+
// Besides, although we could read the file into memory and call mtmd_helper_bitmap_init_from_buf,
451453
// but for video files, it's better to let ffmpeg read from file
452-
if(mtmd_video::is_video_file(path) || mtmd_helper::is_dir(path)){
454+
if(mtmd_video::is_video_file(path)){
453455
return mtmd_video::init_video_bitmap(ctx, path);
454456
}
455457

@@ -475,68 +477,3 @@ mtmd_bitmap * mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char *
475477
delete [] buf;
476478
return res;
477479
}
478-
479-
namespace mtmd_helper{
480-
481-
bool has_image_ext(const std::string & name) {
482-
auto lower = name;
483-
std::transform(lower.begin(), lower.end(), lower.begin(), [](unsigned char c){ return (char)std::tolower(c); });
484-
return lower.rfind(".jpg") != std::string::npos ||
485-
lower.rfind(".jpeg") != std::string::npos ||
486-
lower.rfind(".png") != std::string::npos ||
487-
lower.rfind(".bmp") != std::string::npos ||
488-
lower.rfind(".gif") != std::string::npos ||
489-
lower.rfind(".webp") != std::string::npos;
490-
}
491-
492-
bool is_dir(const std::string & path) {
493-
#if defined(_WIN32)
494-
DWORD attrs = GetFileAttributesA(path.c_str());
495-
return (attrs != INVALID_FILE_ATTRIBUTES) && (attrs & FILE_ATTRIBUTE_DIRECTORY);
496-
#else
497-
struct stat st;
498-
if (stat(path.c_str(), &st) != 0) return false;
499-
return S_ISDIR(st.st_mode);
500-
#endif
501-
}
502-
503-
void list_files(const std::string & dir, std::vector<std::string> & out, bool recursive) {
504-
#if defined(_WIN32)
505-
std::string pattern = dir;
506-
if (!pattern.empty() && pattern.back() != '/' && pattern.back() != '\\') pattern += "\\";
507-
pattern += "*";
508-
WIN32_FIND_DATAA ffd;
509-
HANDLE hFind = FindFirstFileA(pattern.c_str(), &ffd);
510-
if (hFind == INVALID_HANDLE_VALUE) return;
511-
do {
512-
std::string name = ffd.cFileName;
513-
if (name == "." || name == "..") continue;
514-
std::string path = dir;
515-
if (!path.empty() && path.back() != '/' && path.back() != '\\') path += "\\";
516-
path += name;
517-
if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
518-
if (recursive) list_files(path, out, recursive);
519-
} else {
520-
out.push_back(path);
521-
}
522-
} while (FindNextFileA(hFind, &ffd) != 0);
523-
FindClose(hFind);
524-
#else
525-
DIR * dp = opendir(dir.c_str());
526-
if (!dp) return;
527-
struct dirent * de;
528-
while ((de = readdir(dp)) != nullptr) {
529-
std::string name = de->d_name;
530-
if (name == "." || name == "..") continue;
531-
std::string path = dir + "/" + name;
532-
if (is_dir(path)) {
533-
if (recursive) list_files(path, out, recursive);
534-
} else {
535-
out.push_back(path);
536-
}
537-
}
538-
closedir(dp);
539-
#endif
540-
}
541-
542-
}

tools/mtmd/mtmd-helper.h

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ extern "C" {
2121
//
2222

2323
// helper function to construct a mtmd_bitmap from a file
24-
// it calls mtmd_helper_bitmap_init_from_buf() internally
24+
// for image and audio, it calls mtmd_helper_bitmap_init_from_buf() internally
25+
// for video, it it calls init_video_bitmap() to reads and decodes and streams individual image frames to a bitmap
2526
// returns nullptr on failure
2627
// this function is thread-safe
2728
MTMD_API mtmd_bitmap * mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * path);
@@ -88,10 +89,4 @@ MTMD_API int32_t mtmd_helper_decode_image_chunk(mtmd_context * ctx,
8889
// C++ wrappers
8990
//
9091

91-
namespace mtmd_helper{
92-
bool has_image_ext(const std::string & name);
93-
bool is_dir(const std::string & path);
94-
void list_files(const std::string & dir, std::vector<std::string> & out, bool recursive);
95-
}
96-
9792
#endif

0 commit comments

Comments
 (0)