Skip to content

Fix for build on Windows #108

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -7,3 +7,6 @@ models/*.bin
__pycache__
dist
*.gguf

.vs
**/*.env
12 changes: 7 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
cmake_minimum_required(VERSION 3.12)
project("CLIP.cpp" C CXX)

set(CMAKE_CXX_STANDARD 20)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
@@ -22,7 +24,7 @@ endif()

# general
option(CLIP_STATIC "CLIP: static link libraries" OFF)
option(CLIP_BUILD_TESTS "CLIP: build tests" ${CLIP_STANDALONE})
option(CLIP_BUILD_TESTS "CLIP: build tests" ${CLIP_STANDALONE})
option(CLIP_BUILD_EXAMPLES "CLIP: build examples" ${CLIP_STANDALONE})
option(CLIP_BUILD_IMAGE_SEARCH "CLIP: build image-search" OFF)
option(CLIP_NATIVE "CLIP: enable -march=native flag" ON)
@@ -42,12 +44,12 @@ option(CLIP_SANITIZE_UNDEFINED "CLIP: enable undefined sanitizer"
option(CLIP_AVX "CLIP: enable AVX" ON)
option(CLIP_AVX2 "CLIP: enable AVX2" ON)
option(CLIP_FMA "CLIP: enable FMA" ON)
option(CLIP_AVX512 "clip: enable AVX512" OFF)
option(CLIP_AVX512_VBMI "clip: enable AVX512-VBMI" OFF)
option(CLIP_AVX512_VNNI "clip: enable AVX512-VNNI" OFF)
option(CLIP_AVX512 "CLIP: enable AVX512" OFF)
option(CLIP_AVX512_VBMI "CLIP: enable AVX512-VBMI" OFF)
option(CLIP_AVX512_VNNI "CLIP: enable AVX512-VNNI" OFF)
# in MSVC F16C is implied with AVX2/AVX512
if (NOT MSVC)
option(CLIP_F16C "clip: enable F16C" ON)
option(CLIP_F16C "CLIP: enable F16C" ON)
endif()


70 changes: 63 additions & 7 deletions clip.cpp
Original file line number Diff line number Diff line change
@@ -5,15 +5,65 @@
#include <fstream>
#include <iostream>
#include <map>
#include <pthread.h>
#include <regex>
#include <stdexcept>
#include <thread>
#include <vector>
#include <algorithm>

#include "clip.h"
#include "ggml/ggml.h"

#if defined(_WIN32)

#define NOMINMAX
#include <windows.h>

typedef volatile LONG atomic_int;
typedef atomic_int atomic_bool;

static void atomic_store(atomic_int * ptr, LONG val) { InterlockedExchange(ptr, val); }
static LONG atomic_load(atomic_int * ptr) { return InterlockedCompareExchange(ptr, 0, 0); }
static LONG atomic_fetch_add(atomic_int * ptr, LONG inc) { return InterlockedExchangeAdd(ptr, inc); }
static LONG atomic_fetch_sub(atomic_int * ptr, LONG dec) { return atomic_fetch_add(ptr, -(dec)); }

typedef HANDLE pthread_t;

typedef DWORD thread_ret_t;
static int pthread_create(pthread_t * out, void * unused, thread_ret_t (*func)(void *), void * arg) {
(void)unused;
HANDLE handle = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)func, arg, 0, NULL);
if (handle == NULL) {
return EAGAIN;
}

*out = handle;
return 0;
}

static int pthread_join(pthread_t thread, void * unused) {
(void)unused;
return (int)WaitForSingleObject(thread, INFINITE);
}

static int sched_yield(void) {
Sleep(0);
return 0;
}

#define pthread_exit(stat) return stat;
#else
#include <pthread.h>
#include <stdatomic.h>

typedef void * thread_ret_t;

#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#endif

#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"

@@ -810,7 +860,7 @@ typedef struct {
} ImageDataRange;

// Function to preprocess a single image in a thread
void * preprocess_image(void * arg) {
thread_ret_t preprocess_image(void * arg) {
ImageDataRange * imageDataRange = static_cast<ImageDataRange *>(arg);

ImageData * imageData_start = imageDataRange->start;
@@ -1407,8 +1457,8 @@ bool clip_compare_text_and_image(const clip_ctx * ctx, const int n_threads, cons

// prepare image and text vectors
const int projection_dim = ctx->vision_model.hparams.projection_dim;
float img_vec[projection_dim];
float txt_vec[projection_dim];
float *img_vec = new float[projection_dim];
float *txt_vec = new float[projection_dim];

// tokenize and encode text
clip_tokens tokens;
@@ -1434,6 +1484,8 @@ bool clip_compare_text_and_image(const clip_ctx * ctx, const int n_threads, cons
// compute similarity
*score = clip_similarity_score(img_vec, txt_vec, projection_dim);

delete[] img_vec;
delete[] txt_vec;
return true;
}

@@ -1502,14 +1554,14 @@ bool clip_zero_shot_label_image(struct clip_ctx * ctx, const int n_threads, cons

clip_image_preprocess(ctx, input_img, &img_res);

float img_vec[vec_dim];
float *img_vec = new float[vec_dim];
if (!clip_image_encode(ctx, n_threads, &img_res, img_vec, false)) {
return false;
}

// encode texts and compute similarities
float txt_vec[vec_dim];
float similarities[n_labels];
float *txt_vec = new float[vec_dim];
float *similarities = new float[n_labels];

for (int i = 0; i < n_labels; i++) {
const auto & text = labels[i];
@@ -1522,6 +1574,10 @@ bool clip_zero_shot_label_image(struct clip_ctx * ctx, const int n_threads, cons
// apply softmax and sort scores
softmax_with_sorting(similarities, n_labels, scores, indices);

delete[] img_vec;
delete[] txt_vec;
delete[] similarities;

return true;
}

10 changes: 10 additions & 0 deletions clip.h
Original file line number Diff line number Diff line change
@@ -7,6 +7,16 @@

struct clip_ctx;

#if defined(_WIN32)

#define NOMINMAX
#include <windows.h>

typedef HANDLE pthread_t;
typedef DWORD thread_ret_t;

#endif

#ifdef __cplusplus
extern "C" {
#endif
7 changes: 5 additions & 2 deletions examples/extract.cpp
Original file line number Diff line number Diff line change
@@ -28,6 +28,7 @@ int main(int argc, char ** argv) {
int totalInputs = params.image_paths.size() + params.texts.size();
int processedInputs = 0;
int textCounter = 0; // Counter for generating unique filenames for text vectors
float * vec;
for (const std::string & img_path : params.image_paths) {
// load the image
const char * img_path_cstr = img_path.c_str();
@@ -45,7 +46,7 @@ int main(int argc, char ** argv) {

const int vec_dim = clip_get_vision_hparams(ctx)->projection_dim;
int shape[2] = {1, vec_dim};
float vec[vec_dim];
vec = new float[vec_dim];
clip_image_encode(ctx, params.n_threads, &img_res, vec, false);

// Generate a unique output filename for each image
@@ -57,6 +58,7 @@ int main(int argc, char ** argv) {
float progressPercentage = (float)processedInputs / totalInputs * 100.0f;
printf("\rProcessing: %.2f%%", progressPercentage);
fflush(stdout);
delete[] vec;
}

for (const std::string & text : params.texts) {
@@ -69,7 +71,7 @@ int main(int argc, char ** argv) {

const int vec_dim = clip_get_text_hparams(ctx)->projection_dim;
int shape[2] = {1, vec_dim};
float vec[vec_dim];
vec = new float[vec_dim];

if (!clip_text_encode(ctx, params.n_threads, &tokens, vec, false)) {
printf("Unable to encode text\n");
@@ -85,6 +87,7 @@ int main(int argc, char ** argv) {
// Generate a unique output filename for each text
std::string output_filename = "./text_vec_" + std::to_string(textCounter++) + ".npy";
writeNpyFile(output_filename.c_str(), vec, shape, 2);
delete[] vec;
}

printf("\n"); // Print a newline to clear the progress bar line
6 changes: 4 additions & 2 deletions examples/simple.c
Original file line number Diff line number Diff line change
@@ -35,7 +35,7 @@ int main() {
}

// Encode image
float img_vec[vec_dim];
float *img_vec = (float*)malloc(vec_dim * sizeof(float));
if (!clip_image_encode(ctx, n_threads, img_res, img_vec, true)) {
fprintf(stderr, "%s: failed to encode image\n", __func__);
return 1;
@@ -46,7 +46,7 @@ int main() {
clip_tokenize(ctx, text, tokens);

// Encode text
float txt_vec[vec_dim];
float *txt_vec= (float *)malloc(vec_dim * sizeof(float));
if (!clip_text_encode(ctx, n_threads, tokens, txt_vec, true)) {
fprintf(stderr, "%s: failed to encode text\n", __func__);
return 1;
@@ -66,6 +66,8 @@ int main() {

// Cleanup
clip_free(ctx);
free(img_vec);
free(txt_vec);

return 0;
}
9 changes: 6 additions & 3 deletions examples/zsl.cpp
Original file line number Diff line number Diff line change
@@ -15,7 +15,7 @@ int main(int argc, char ** argv) {
printf("%s: You must specify at least 2 texts for zero-shot labeling\n", __func__);
}

const char * labels[n_labels];
const char ** labels = new const char *[n_labels];
for (size_t i = 0; i < n_labels; ++i) {
labels[i] = params.texts[i].c_str();
}
@@ -34,8 +34,8 @@ int main(int argc, char ** argv) {
return 1;
}

float sorted_scores[n_labels];
int sorted_indices[n_labels];
float *sorted_scores = new float[n_labels];
int *sorted_indices = new int[n_labels];
if (!clip_zero_shot_label_image(ctx, params.n_threads, &input_img, labels, n_labels, sorted_scores, sorted_indices)) {
fprintf(stderr, "Unable to apply ZSL\n");
return 1;
@@ -48,6 +48,9 @@ int main(int argc, char ** argv) {
}

clip_free(ctx);
delete[] labels;
delete[] sorted_scores;
delete[] sorted_indices;

return 0;
}
17 changes: 11 additions & 6 deletions tests/benchmark.cpp
Original file line number Diff line number Diff line change
@@ -52,7 +52,7 @@ int main(int argc, char ** argv) {

const int vec_dim = clip_get_text_hparams(ctx)->projection_dim;

float txt_vecs[n_labels * vec_dim];
float *txt_vecs = new float[n_labels * vec_dim];

ggml_time_init();

@@ -79,11 +79,11 @@ int main(int argc, char ** argv) {
int n_total_items = 0; // total number of images processed
float total_acc1_score = 0.0f; // total accuracy at 1 for the intire dataset
float total_acc5_score = 0.0f; // total accuracy at 5 in intitre dataset
float img_vecs[vec_dim * batch_size];
float *img_vecs = new float[vec_dim * batch_size];

float similarities[n_labels];
float sorted_scores[n_labels];
int indices[n_labels];
float *similarities = new float[n_labels];
float *sorted_scores = new float[n_labels];
int *indices = new int[n_labels];
std::vector<clip_image_u8> img_inputs(batch_size);
std::vector<clip_image_f32> imgs_resized(batch_size);

@@ -167,6 +167,11 @@ int main(int argc, char ** argv) {
}

clip_free(ctx);
delete[] txt_vecs;
delete[] img_vecs;
delete[] similarities;
delete[] sorted_scores;
delete[] indices;

return 0;
}
}