From 172bab22160b5bfa889de5cfe264ea8662c2818d Mon Sep 17 00:00:00 2001 From: Madhav Goyal Date: Tue, 11 Mar 2025 13:03:22 +0530 Subject: [PATCH] Fix for build on Windows --- .gitignore | 3 ++ CMakeLists.txt | 12 ++++---- clip.cpp | 70 +++++++++++++++++++++++++++++++++++++++----- clip.h | 10 +++++++ examples/extract.cpp | 7 +++-- examples/simple.c | 6 ++-- examples/zsl.cpp | 9 ++++-- tests/benchmark.cpp | 17 +++++++---- 8 files changed, 109 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index 303e608..30b36d4 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,6 @@ models/*.bin __pycache__ dist *.gguf + +.vs +**/*.env \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index bca4bdb..f06229d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,8 @@ cmake_minimum_required(VERSION 3.12) project("CLIP.cpp" C CXX) +set(CMAKE_CXX_STANDARD 20) + set(CMAKE_EXPORT_COMPILE_COMMANDS ON) if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) @@ -22,7 +24,7 @@ endif() # general option(CLIP_STATIC "CLIP: static link libraries" OFF) -option(CLIP_BUILD_TESTS "CLIP: build tests" ${CLIP_STANDALONE}) +option(CLIP_BUILD_TESTS "CLIP: build tests" ${CLIP_STANDALONE}) option(CLIP_BUILD_EXAMPLES "CLIP: build examples" ${CLIP_STANDALONE}) option(CLIP_BUILD_IMAGE_SEARCH "CLIP: build image-search" OFF) option(CLIP_NATIVE "CLIP: enable -march=native flag" ON) @@ -42,12 +44,12 @@ option(CLIP_SANITIZE_UNDEFINED "CLIP: enable undefined sanitizer" option(CLIP_AVX "CLIP: enable AVX" ON) option(CLIP_AVX2 "CLIP: enable AVX2" ON) option(CLIP_FMA "CLIP: enable FMA" ON) -option(CLIP_AVX512 "clip: enable AVX512" OFF) -option(CLIP_AVX512_VBMI "clip: enable AVX512-VBMI" OFF) -option(CLIP_AVX512_VNNI "clip: enable AVX512-VNNI" OFF) +option(CLIP_AVX512 "CLIP: enable AVX512" OFF) +option(CLIP_AVX512_VBMI "CLIP: enable AVX512-VBMI" OFF) +option(CLIP_AVX512_VNNI "CLIP: enable AVX512-VNNI" OFF) # in MSVC F16C is implied with AVX2/AVX512 if (NOT MSVC) - option(CLIP_F16C "clip: enable F16C" ON) + option(CLIP_F16C "CLIP: enable F16C" ON) endif() diff --git a/clip.cpp b/clip.cpp index a3710de..ec568a6 100644 --- a/clip.cpp +++ b/clip.cpp @@ -5,15 +5,65 @@ #include #include #include -#include #include #include #include #include +#include #include "clip.h" #include "ggml/ggml.h" +#if defined(_WIN32) + +#define NOMINMAX +#include + +typedef volatile LONG atomic_int; +typedef atomic_int atomic_bool; + +static void atomic_store(atomic_int * ptr, LONG val) { InterlockedExchange(ptr, val); } +static LONG atomic_load(atomic_int * ptr) { return InterlockedCompareExchange(ptr, 0, 0); } +static LONG atomic_fetch_add(atomic_int * ptr, LONG inc) { return InterlockedExchangeAdd(ptr, inc); } +static LONG atomic_fetch_sub(atomic_int * ptr, LONG dec) { return atomic_fetch_add(ptr, -(dec)); } + +typedef HANDLE pthread_t; + +typedef DWORD thread_ret_t; +static int pthread_create(pthread_t * out, void * unused, thread_ret_t (*func)(void *), void * arg) { + (void)unused; + HANDLE handle = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)func, arg, 0, NULL); + if (handle == NULL) { + return EAGAIN; + } + + *out = handle; + return 0; +} + +static int pthread_join(pthread_t thread, void * unused) { + (void)unused; + return (int)WaitForSingleObject(thread, INFINITE); +} + +static int sched_yield(void) { + Sleep(0); + return 0; +} + +#define pthread_exit(stat) return stat; +#else +#include +#include + +typedef void * thread_ret_t; + +#include +#include +#include + +#endif + #define STB_IMAGE_IMPLEMENTATION #include "stb_image.h" @@ -810,7 +860,7 @@ typedef struct { } ImageDataRange; // Function to preprocess a single image in a thread -void * preprocess_image(void * arg) { +thread_ret_t preprocess_image(void * arg) { ImageDataRange * imageDataRange = static_cast(arg); ImageData * imageData_start = imageDataRange->start; @@ -1407,8 +1457,8 @@ bool clip_compare_text_and_image(const clip_ctx * ctx, const int n_threads, cons // prepare image and text vectors const int projection_dim = ctx->vision_model.hparams.projection_dim; - float img_vec[projection_dim]; - float txt_vec[projection_dim]; + float *img_vec = new float[projection_dim]; + float *txt_vec = new float[projection_dim]; // tokenize and encode text clip_tokens tokens; @@ -1434,6 +1484,8 @@ bool clip_compare_text_and_image(const clip_ctx * ctx, const int n_threads, cons // compute similarity *score = clip_similarity_score(img_vec, txt_vec, projection_dim); + delete[] img_vec; + delete[] txt_vec; return true; } @@ -1502,14 +1554,14 @@ bool clip_zero_shot_label_image(struct clip_ctx * ctx, const int n_threads, cons clip_image_preprocess(ctx, input_img, &img_res); - float img_vec[vec_dim]; + float *img_vec = new float[vec_dim]; if (!clip_image_encode(ctx, n_threads, &img_res, img_vec, false)) { return false; } // encode texts and compute similarities - float txt_vec[vec_dim]; - float similarities[n_labels]; + float *txt_vec = new float[vec_dim]; + float *similarities = new float[n_labels]; for (int i = 0; i < n_labels; i++) { const auto & text = labels[i]; @@ -1522,6 +1574,10 @@ bool clip_zero_shot_label_image(struct clip_ctx * ctx, const int n_threads, cons // apply softmax and sort scores softmax_with_sorting(similarities, n_labels, scores, indices); + delete[] img_vec; + delete[] txt_vec; + delete[] similarities; + return true; } diff --git a/clip.h b/clip.h index 183b22d..6364238 100644 --- a/clip.h +++ b/clip.h @@ -7,6 +7,16 @@ struct clip_ctx; +#if defined(_WIN32) + +#define NOMINMAX +#include + +typedef HANDLE pthread_t; +typedef DWORD thread_ret_t; + +#endif + #ifdef __cplusplus extern "C" { #endif diff --git a/examples/extract.cpp b/examples/extract.cpp index 22e736e..1bdf579 100644 --- a/examples/extract.cpp +++ b/examples/extract.cpp @@ -28,6 +28,7 @@ int main(int argc, char ** argv) { int totalInputs = params.image_paths.size() + params.texts.size(); int processedInputs = 0; int textCounter = 0; // Counter for generating unique filenames for text vectors + float * vec; for (const std::string & img_path : params.image_paths) { // load the image const char * img_path_cstr = img_path.c_str(); @@ -45,7 +46,7 @@ int main(int argc, char ** argv) { const int vec_dim = clip_get_vision_hparams(ctx)->projection_dim; int shape[2] = {1, vec_dim}; - float vec[vec_dim]; + vec = new float[vec_dim]; clip_image_encode(ctx, params.n_threads, &img_res, vec, false); // Generate a unique output filename for each image @@ -57,6 +58,7 @@ int main(int argc, char ** argv) { float progressPercentage = (float)processedInputs / totalInputs * 100.0f; printf("\rProcessing: %.2f%%", progressPercentage); fflush(stdout); + delete[] vec; } for (const std::string & text : params.texts) { @@ -69,7 +71,7 @@ int main(int argc, char ** argv) { const int vec_dim = clip_get_text_hparams(ctx)->projection_dim; int shape[2] = {1, vec_dim}; - float vec[vec_dim]; + vec = new float[vec_dim]; if (!clip_text_encode(ctx, params.n_threads, &tokens, vec, false)) { printf("Unable to encode text\n"); @@ -85,6 +87,7 @@ int main(int argc, char ** argv) { // Generate a unique output filename for each text std::string output_filename = "./text_vec_" + std::to_string(textCounter++) + ".npy"; writeNpyFile(output_filename.c_str(), vec, shape, 2); + delete[] vec; } printf("\n"); // Print a newline to clear the progress bar line diff --git a/examples/simple.c b/examples/simple.c index 36aa0d3..1f69f88 100644 --- a/examples/simple.c +++ b/examples/simple.c @@ -35,7 +35,7 @@ int main() { } // Encode image - float img_vec[vec_dim]; + float *img_vec = (float*)malloc(vec_dim * sizeof(float)); if (!clip_image_encode(ctx, n_threads, img_res, img_vec, true)) { fprintf(stderr, "%s: failed to encode image\n", __func__); return 1; @@ -46,7 +46,7 @@ int main() { clip_tokenize(ctx, text, tokens); // Encode text - float txt_vec[vec_dim]; + float *txt_vec= (float *)malloc(vec_dim * sizeof(float)); if (!clip_text_encode(ctx, n_threads, tokens, txt_vec, true)) { fprintf(stderr, "%s: failed to encode text\n", __func__); return 1; @@ -66,6 +66,8 @@ int main() { // Cleanup clip_free(ctx); + free(img_vec); + free(txt_vec); return 0; } diff --git a/examples/zsl.cpp b/examples/zsl.cpp index b13f832..88bfad2 100644 --- a/examples/zsl.cpp +++ b/examples/zsl.cpp @@ -15,7 +15,7 @@ int main(int argc, char ** argv) { printf("%s: You must specify at least 2 texts for zero-shot labeling\n", __func__); } - const char * labels[n_labels]; + const char ** labels = new const char *[n_labels]; for (size_t i = 0; i < n_labels; ++i) { labels[i] = params.texts[i].c_str(); } @@ -34,8 +34,8 @@ int main(int argc, char ** argv) { return 1; } - float sorted_scores[n_labels]; - int sorted_indices[n_labels]; + float *sorted_scores = new float[n_labels]; + int *sorted_indices = new int[n_labels]; if (!clip_zero_shot_label_image(ctx, params.n_threads, &input_img, labels, n_labels, sorted_scores, sorted_indices)) { fprintf(stderr, "Unable to apply ZSL\n"); return 1; @@ -48,6 +48,9 @@ int main(int argc, char ** argv) { } clip_free(ctx); + delete[] labels; + delete[] sorted_scores; + delete[] sorted_indices; return 0; } diff --git a/tests/benchmark.cpp b/tests/benchmark.cpp index 3640ae5..acb4dfc 100644 --- a/tests/benchmark.cpp +++ b/tests/benchmark.cpp @@ -52,7 +52,7 @@ int main(int argc, char ** argv) { const int vec_dim = clip_get_text_hparams(ctx)->projection_dim; - float txt_vecs[n_labels * vec_dim]; + float *txt_vecs = new float[n_labels * vec_dim]; ggml_time_init(); @@ -79,11 +79,11 @@ int main(int argc, char ** argv) { int n_total_items = 0; // total number of images processed float total_acc1_score = 0.0f; // total accuracy at 1 for the intire dataset float total_acc5_score = 0.0f; // total accuracy at 5 in intitre dataset - float img_vecs[vec_dim * batch_size]; + float *img_vecs = new float[vec_dim * batch_size]; - float similarities[n_labels]; - float sorted_scores[n_labels]; - int indices[n_labels]; + float *similarities = new float[n_labels]; + float *sorted_scores = new float[n_labels]; + int *indices = new int[n_labels]; std::vector img_inputs(batch_size); std::vector imgs_resized(batch_size); @@ -167,6 +167,11 @@ int main(int argc, char ** argv) { } clip_free(ctx); + delete[] txt_vecs; + delete[] img_vecs; + delete[] similarities; + delete[] sorted_scores; + delete[] indices; return 0; -} +} \ No newline at end of file