Skip to content

Fix for build on Windows #108

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ models/*.bin
__pycache__
dist
*.gguf

.vs
**/*.env
12 changes: 7 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
cmake_minimum_required(VERSION 3.12)
project("CLIP.cpp" C CXX)

set(CMAKE_CXX_STANDARD 20)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
Expand All @@ -22,7 +24,7 @@ endif()

# general
option(CLIP_STATIC "CLIP: static link libraries" OFF)
option(CLIP_BUILD_TESTS "CLIP: build tests" ${CLIP_STANDALONE})
option(CLIP_BUILD_TESTS "CLIP: build tests" ${CLIP_STANDALONE})
option(CLIP_BUILD_EXAMPLES "CLIP: build examples" ${CLIP_STANDALONE})
option(CLIP_BUILD_IMAGE_SEARCH "CLIP: build image-search" OFF)
option(CLIP_NATIVE "CLIP: enable -march=native flag" ON)
Expand All @@ -42,12 +44,12 @@ option(CLIP_SANITIZE_UNDEFINED "CLIP: enable undefined sanitizer"
option(CLIP_AVX "CLIP: enable AVX" ON)
option(CLIP_AVX2 "CLIP: enable AVX2" ON)
option(CLIP_FMA "CLIP: enable FMA" ON)
option(CLIP_AVX512 "clip: enable AVX512" OFF)
option(CLIP_AVX512_VBMI "clip: enable AVX512-VBMI" OFF)
option(CLIP_AVX512_VNNI "clip: enable AVX512-VNNI" OFF)
option(CLIP_AVX512 "CLIP: enable AVX512" OFF)
option(CLIP_AVX512_VBMI "CLIP: enable AVX512-VBMI" OFF)
option(CLIP_AVX512_VNNI "CLIP: enable AVX512-VNNI" OFF)
# in MSVC F16C is implied with AVX2/AVX512
if (NOT MSVC)
option(CLIP_F16C "clip: enable F16C" ON)
option(CLIP_F16C "CLIP: enable F16C" ON)
endif()


Expand Down
70 changes: 63 additions & 7 deletions clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,65 @@
#include <fstream>
#include <iostream>
#include <map>
#include <pthread.h>
#include <regex>
#include <stdexcept>
#include <thread>
#include <vector>
#include <algorithm>

#include "clip.h"
#include "ggml/ggml.h"

#if defined(_WIN32)

#define NOMINMAX
#include <windows.h>

typedef volatile LONG atomic_int;
typedef atomic_int atomic_bool;

static void atomic_store(atomic_int * ptr, LONG val) { InterlockedExchange(ptr, val); }
static LONG atomic_load(atomic_int * ptr) { return InterlockedCompareExchange(ptr, 0, 0); }
static LONG atomic_fetch_add(atomic_int * ptr, LONG inc) { return InterlockedExchangeAdd(ptr, inc); }
static LONG atomic_fetch_sub(atomic_int * ptr, LONG dec) { return atomic_fetch_add(ptr, -(dec)); }

typedef HANDLE pthread_t;

typedef DWORD thread_ret_t;
static int pthread_create(pthread_t * out, void * unused, thread_ret_t (*func)(void *), void * arg) {
(void)unused;
HANDLE handle = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)func, arg, 0, NULL);
if (handle == NULL) {
return EAGAIN;
}

*out = handle;
return 0;
}

static int pthread_join(pthread_t thread, void * unused) {
(void)unused;
return (int)WaitForSingleObject(thread, INFINITE);
}

static int sched_yield(void) {
Sleep(0);
return 0;
}

#define pthread_exit(stat) return stat;
#else
#include <pthread.h>
#include <stdatomic.h>

typedef void * thread_ret_t;

#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#endif

#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"

Expand Down Expand Up @@ -810,7 +860,7 @@ typedef struct {
} ImageDataRange;

// Function to preprocess a single image in a thread
void * preprocess_image(void * arg) {
thread_ret_t preprocess_image(void * arg) {
ImageDataRange * imageDataRange = static_cast<ImageDataRange *>(arg);

ImageData * imageData_start = imageDataRange->start;
Expand Down Expand Up @@ -1407,8 +1457,8 @@ bool clip_compare_text_and_image(const clip_ctx * ctx, const int n_threads, cons

// prepare image and text vectors
const int projection_dim = ctx->vision_model.hparams.projection_dim;
float img_vec[projection_dim];
float txt_vec[projection_dim];
float *img_vec = new float[projection_dim];
float *txt_vec = new float[projection_dim];

// tokenize and encode text
clip_tokens tokens;
Expand All @@ -1434,6 +1484,8 @@ bool clip_compare_text_and_image(const clip_ctx * ctx, const int n_threads, cons
// compute similarity
*score = clip_similarity_score(img_vec, txt_vec, projection_dim);

delete[] img_vec;
delete[] txt_vec;
return true;
}

Expand Down Expand Up @@ -1502,14 +1554,14 @@ bool clip_zero_shot_label_image(struct clip_ctx * ctx, const int n_threads, cons

clip_image_preprocess(ctx, input_img, &img_res);

float img_vec[vec_dim];
float *img_vec = new float[vec_dim];
if (!clip_image_encode(ctx, n_threads, &img_res, img_vec, false)) {
return false;
}

// encode texts and compute similarities
float txt_vec[vec_dim];
float similarities[n_labels];
float *txt_vec = new float[vec_dim];
float *similarities = new float[n_labels];

for (int i = 0; i < n_labels; i++) {
const auto & text = labels[i];
Expand All @@ -1522,6 +1574,10 @@ bool clip_zero_shot_label_image(struct clip_ctx * ctx, const int n_threads, cons
// apply softmax and sort scores
softmax_with_sorting(similarities, n_labels, scores, indices);

delete[] img_vec;
delete[] txt_vec;
delete[] similarities;

return true;
}

Expand Down
10 changes: 10 additions & 0 deletions clip.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@

struct clip_ctx;

#if defined(_WIN32)

#define NOMINMAX
#include <windows.h>

typedef HANDLE pthread_t;
typedef DWORD thread_ret_t;

#endif

#ifdef __cplusplus
extern "C" {
#endif
Expand Down
7 changes: 5 additions & 2 deletions examples/extract.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ int main(int argc, char ** argv) {
int totalInputs = params.image_paths.size() + params.texts.size();
int processedInputs = 0;
int textCounter = 0; // Counter for generating unique filenames for text vectors
float * vec;
for (const std::string & img_path : params.image_paths) {
// load the image
const char * img_path_cstr = img_path.c_str();
Expand All @@ -45,7 +46,7 @@ int main(int argc, char ** argv) {

const int vec_dim = clip_get_vision_hparams(ctx)->projection_dim;
int shape[2] = {1, vec_dim};
float vec[vec_dim];
vec = new float[vec_dim];
clip_image_encode(ctx, params.n_threads, &img_res, vec, false);

// Generate a unique output filename for each image
Expand All @@ -57,6 +58,7 @@ int main(int argc, char ** argv) {
float progressPercentage = (float)processedInputs / totalInputs * 100.0f;
printf("\rProcessing: %.2f%%", progressPercentage);
fflush(stdout);
delete[] vec;
}

for (const std::string & text : params.texts) {
Expand All @@ -69,7 +71,7 @@ int main(int argc, char ** argv) {

const int vec_dim = clip_get_text_hparams(ctx)->projection_dim;
int shape[2] = {1, vec_dim};
float vec[vec_dim];
vec = new float[vec_dim];

if (!clip_text_encode(ctx, params.n_threads, &tokens, vec, false)) {
printf("Unable to encode text\n");
Expand All @@ -85,6 +87,7 @@ int main(int argc, char ** argv) {
// Generate a unique output filename for each text
std::string output_filename = "./text_vec_" + std::to_string(textCounter++) + ".npy";
writeNpyFile(output_filename.c_str(), vec, shape, 2);
delete[] vec;
}

printf("\n"); // Print a newline to clear the progress bar line
Expand Down
6 changes: 4 additions & 2 deletions examples/simple.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ int main() {
}

// Encode image
float img_vec[vec_dim];
float *img_vec = (float*)malloc(vec_dim * sizeof(float));
if (!clip_image_encode(ctx, n_threads, img_res, img_vec, true)) {
fprintf(stderr, "%s: failed to encode image\n", __func__);
return 1;
Expand All @@ -46,7 +46,7 @@ int main() {
clip_tokenize(ctx, text, tokens);

// Encode text
float txt_vec[vec_dim];
float *txt_vec= (float *)malloc(vec_dim * sizeof(float));
if (!clip_text_encode(ctx, n_threads, tokens, txt_vec, true)) {
fprintf(stderr, "%s: failed to encode text\n", __func__);
return 1;
Expand All @@ -66,6 +66,8 @@ int main() {

// Cleanup
clip_free(ctx);
free(img_vec);
free(txt_vec);

return 0;
}
9 changes: 6 additions & 3 deletions examples/zsl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ int main(int argc, char ** argv) {
printf("%s: You must specify at least 2 texts for zero-shot labeling\n", __func__);
}

const char * labels[n_labels];
const char ** labels = new const char *[n_labels];
for (size_t i = 0; i < n_labels; ++i) {
labels[i] = params.texts[i].c_str();
}
Expand All @@ -34,8 +34,8 @@ int main(int argc, char ** argv) {
return 1;
}

float sorted_scores[n_labels];
int sorted_indices[n_labels];
float *sorted_scores = new float[n_labels];
int *sorted_indices = new int[n_labels];
if (!clip_zero_shot_label_image(ctx, params.n_threads, &input_img, labels, n_labels, sorted_scores, sorted_indices)) {
fprintf(stderr, "Unable to apply ZSL\n");
return 1;
Expand All @@ -48,6 +48,9 @@ int main(int argc, char ** argv) {
}

clip_free(ctx);
delete[] labels;
delete[] sorted_scores;
delete[] sorted_indices;

return 0;
}
17 changes: 11 additions & 6 deletions tests/benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ int main(int argc, char ** argv) {

const int vec_dim = clip_get_text_hparams(ctx)->projection_dim;

float txt_vecs[n_labels * vec_dim];
float *txt_vecs = new float[n_labels * vec_dim];

ggml_time_init();

Expand All @@ -79,11 +79,11 @@ int main(int argc, char ** argv) {
int n_total_items = 0; // total number of images processed
float total_acc1_score = 0.0f; // total accuracy at 1 for the intire dataset
float total_acc5_score = 0.0f; // total accuracy at 5 in intitre dataset
float img_vecs[vec_dim * batch_size];
float *img_vecs = new float[vec_dim * batch_size];

float similarities[n_labels];
float sorted_scores[n_labels];
int indices[n_labels];
float *similarities = new float[n_labels];
float *sorted_scores = new float[n_labels];
int *indices = new int[n_labels];
std::vector<clip_image_u8> img_inputs(batch_size);
std::vector<clip_image_f32> imgs_resized(batch_size);

Expand Down Expand Up @@ -167,6 +167,11 @@ int main(int argc, char ** argv) {
}

clip_free(ctx);
delete[] txt_vecs;
delete[] img_vecs;
delete[] similarities;
delete[] sorted_scores;
delete[] indices;

return 0;
}
}