From 017df505bd8349df8f1b14a726fa83d670b47804 Mon Sep 17 00:00:00 2001 From: Ilham Syahid S Date: Wed, 5 Feb 2025 18:23:19 +0300 Subject: [PATCH 1/3] ggml: init fft operations Signed-off-by: Ilham Syahid S --- include/ggml.h | 12 +++++ src/ggml-cpu/ggml-cpu.c | 111 ++++++++++++++++++++++++++++++++++++++++ src/ggml.c | 59 +++++++++++++++++++++ 3 files changed, 182 insertions(+) diff --git a/include/ggml.h b/include/ggml.h index 1198dc1fd..d21af3869 100644 --- a/include/ggml.h +++ b/include/ggml.h @@ -521,6 +521,9 @@ extern "C" { GGML_OP_OPT_STEP_ADAMW, GGML_OP_COUNT, + + GGML_OP_FFT, // Fast Fourier Transform + GGML_OP_IFFT, // Inverse Fast Fourier Transform }; enum ggml_unary_op { @@ -1775,6 +1778,15 @@ extern "C" { struct ggml_tensor * a, int k); + // Fast Fourier Transform operations + GGML_API struct ggml_tensor * ggml_fft( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_ifft( + struct ggml_context * ctx, + struct ggml_tensor * a); + #define GGML_KQ_MASK_PAD 32 // q: [n_embd, n_batch, n_head, 1] diff --git a/src/ggml-cpu/ggml-cpu.c b/src/ggml-cpu/ggml-cpu.c index e809f05d2..0c03d4eb6 100644 --- a/src/ggml-cpu/ggml-cpu.c +++ b/src/ggml-cpu/ggml-cpu.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -12716,6 +12717,106 @@ static void ggml_compute_forward_opt_step_adamw( } } } + +// FFT + +static bool is_power_of_2(int64_t n) { + return (n & (n - 1)) == 0; +} + +// Recursive implementation of FFT +static void fft_recursive(float complex *x, int64_t n, int64_t stride, bool inverse) { + if (n <= 1) return; + + // Split into even and odd + int64_t half = n / 2; + float complex *even = (float complex *)malloc(half * sizeof(float complex)); + float complex *odd = (float complex *)malloc(half * sizeof(float complex)); + + for (int64_t i = 0; i < half; i++) { + even[i] = x[2*i*stride]; + odd[i] = x[(2*i+1)*stride]; + } + + // Recursive FFT on even and odd parts + fft_recursive(even, half, 1, inverse); + fft_recursive(odd, half, 1, inverse); + + // Combine results + float angle_factor = inverse ? 2.0 * M_PI / n : -2.0 * M_PI / n; + for (int64_t k = 0; k < half; k++) { + float complex t = cexp(angle_factor * k * I) * odd[k]; + x[k*stride] = even[k] + t; + x[(k+half)*stride] = even[k] - t; + } + + // Scale if inverse + if (inverse) { + for (int64_t k = 0; k < n; k++) { + x[k*stride] /= 2.0; + } + } + + free(even); + free(odd); +} + +static void ggml_compute_forward_fft(struct ggml_tensor * dst, const struct ggml_tensor * src) { + GGML_ASSERT(src->type == GGML_TYPE_F32); + GGML_ASSERT(ggml_is_vector(src)); // Only support 1D FFT for now + GGML_ASSERT(is_power_of_2(src->ne[0])); // Length must be power of 2 + + // Allocate temporary complex array + int64_t n = src->ne[0]; + float complex *x = (float complex *)malloc(n * sizeof(float complex)); + + // Copy input data to complex array + float *src_data = (float *)src->data; + for (int64_t i = 0; i < n; i++) { + x[i] = src_data[i] + 0.0*I; + } + + // Perform FFT + fft_recursive(x, n, 1, false); + + // Copy result back + float *dst_data = (float *)dst->data; + for (int64_t i = 0; i < n; i++) { + // Store real and imaginary parts in interleaved format + dst_data[2*i] = crealf(x[i]); + dst_data[2*i+1] = cimagf(x[i]); + } + + free(x); +} + +static void ggml_compute_forward_ifft(struct ggml_tensor * dst, const struct ggml_tensor * src) { + GGML_ASSERT(src->type == GGML_TYPE_F32); + GGML_ASSERT(ggml_is_vector(src)); // Only support 1D IFFT for now + GGML_ASSERT(is_power_of_2(src->ne[0]/2)); // Complex length must be power of 2 + + // Allocate temporary complex array + int64_t n = src->ne[0]/2; // Complex length is half of the real array length + float complex *x = (float complex *)malloc(n * sizeof(float complex)); + + // Copy input data to complex array + float *src_data = (float *)src->data; + for (int64_t i = 0; i < n; i++) { + x[i] = src_data[2*i] + src_data[2*i+1]*I; + } + + // Perform IFFT + fft_recursive(x, n, 1, true); + + // Copy result back (real part only for IFFT) + float *dst_data = (float *)dst->data; + for (int64_t i = 0; i < n; i++) { + dst_data[i] = crealf(x[i]); + } + + free(x); +} + ///////////////////////////////// static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) { @@ -13081,6 +13182,16 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm ggml_compute_forward_opt_step_adamw(params, tensor); } break; + case GGML_OP_FFT: + { + ggml_compute_forward_fft(tensor, tensor->src[0]); + } + break; + case GGML_OP_IFFT: + { + ggml_compute_forward_ifft(tensor, tensor->src[0]); + } + break; case GGML_OP_NONE: { // nop diff --git a/src/ggml.c b/src/ggml.c index 3b4861542..4ae3a45cf 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -5130,6 +5130,65 @@ struct ggml_tensor * ggml_opt_step_adamw( return result; } +// ggml_fft + +static struct ggml_tensor * ggml_fft_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + bool inplace) { + GGML_ASSERT(ggml_is_vector(a)); + bool is_node = false; + + // Instead of checking a->grad directly, we should check if the tensor + // is part of a computation graph and has gradients enabled + if (!inplace && (a->flags & GGML_TENSOR_FLAG_PARAM)) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + // Set the operation type directly + result->op = GGML_OP_FFT; + // Initialize op params if needed + ggml_set_op_params(result, NULL, 0); + + // Instead of setting grad directly, we should mark the tensor as requiring gradients + if (is_node) { + result->flags |= GGML_TENSOR_FLAG_PARAM; + } + result->src[0] = a; + + return result; +} + +struct ggml_tensor * ggml_fft( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_fft_impl(ctx, a, false); +} + +// ggml_ifft + +struct ggml_tensor * ggml_ifft( + struct ggml_context * ctx, + struct ggml_tensor * a) { + GGML_ASSERT(ggml_is_vector(a)); + struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, a->ne[0]/2); + + // Set the operation type directly + result->op = GGML_OP_IFFT; + // Initialize op params if needed + ggml_set_op_params(result, NULL, 0); + + // Instead of checking a->grad directly, check if input tensor requires gradients + if (a->flags & GGML_TENSOR_FLAG_PARAM) { + // Mark the result tensor as requiring gradients + result->flags |= GGML_TENSOR_FLAG_PARAM; + } + result->src[0] = a; + return result; +} + //////////////////////////////////////////////////////////////////////////////// struct ggml_hash_set ggml_hash_set_new(size_t size) { From 0120b36b1e978ac9d3bebda6541709cc9457ee22 Mon Sep 17 00:00:00 2001 From: Ilham Syahid S Date: Thu, 6 Feb 2025 10:11:34 +0300 Subject: [PATCH 2/3] ggml: fix FFT/IFFT implementation Signed-off-by: Ilham Syahid S --- include/ggml.h | 4 +-- src/ggml-cpu/ggml-cpu.c | 78 ++++++++++++++++++++++++++--------------- src/ggml.c | 42 +++++++--------------- 3 files changed, 64 insertions(+), 60 deletions(-) diff --git a/include/ggml.h b/include/ggml.h index d21af3869..1b893117f 100644 --- a/include/ggml.h +++ b/include/ggml.h @@ -520,10 +520,10 @@ extern "C" { GGML_OP_CROSS_ENTROPY_LOSS_BACK, GGML_OP_OPT_STEP_ADAMW, - GGML_OP_COUNT, - GGML_OP_FFT, // Fast Fourier Transform GGML_OP_IFFT, // Inverse Fast Fourier Transform + + GGML_OP_COUNT, }; enum ggml_unary_op { diff --git a/src/ggml-cpu/ggml-cpu.c b/src/ggml-cpu/ggml-cpu.c index 0c03d4eb6..49248ba95 100644 --- a/src/ggml-cpu/ggml-cpu.c +++ b/src/ggml-cpu/ggml-cpu.c @@ -12721,11 +12721,11 @@ static void ggml_compute_forward_opt_step_adamw( // FFT static bool is_power_of_2(int64_t n) { - return (n & (n - 1)) == 0; + return n > 0 && (n & (n - 1)) == 0; } // Recursive implementation of FFT -static void fft_recursive(float complex *x, int64_t n, int64_t stride, bool inverse) { +static void fft_recursive(float complex *x, int64_t n, bool inverse) { if (n <= 1) return; // Split into even and odd @@ -12734,38 +12734,40 @@ static void fft_recursive(float complex *x, int64_t n, int64_t stride, bool inve float complex *odd = (float complex *)malloc(half * sizeof(float complex)); for (int64_t i = 0; i < half; i++) { - even[i] = x[2*i*stride]; - odd[i] = x[(2*i+1)*stride]; + even[i] = x[2*i]; + odd[i] = x[(2*i+1)]; } // Recursive FFT on even and odd parts - fft_recursive(even, half, 1, inverse); - fft_recursive(odd, half, 1, inverse); + fft_recursive(even, half, inverse); + fft_recursive(odd, half, inverse); // Combine results float angle_factor = inverse ? 2.0 * M_PI / n : -2.0 * M_PI / n; for (int64_t k = 0; k < half; k++) { - float complex t = cexp(angle_factor * k * I) * odd[k]; - x[k*stride] = even[k] + t; - x[(k+half)*stride] = even[k] - t; - } - - // Scale if inverse - if (inverse) { - for (int64_t k = 0; k < n; k++) { - x[k*stride] /= 2.0; - } + float complex t = cexpf(angle_factor * k * I) * odd[k]; + x[k] = even[k] + t; + x[k+half] = even[k] - t; } free(even); free(odd); } -static void ggml_compute_forward_fft(struct ggml_tensor * dst, const struct ggml_tensor * src) { - GGML_ASSERT(src->type == GGML_TYPE_F32); +static void ggml_compute_forward_fft( + const struct ggml_compute_params * params, + struct ggml_tensor * dst) { + + const struct ggml_tensor * src = dst->src[0]; + + GGML_ASSERT(src->type == GGML_TYPE_F32); // Only support f32 for now GGML_ASSERT(ggml_is_vector(src)); // Only support 1D FFT for now GGML_ASSERT(is_power_of_2(src->ne[0])); // Length must be power of 2 + if (params->ith != 0) { + return; + } + // Allocate temporary complex array int64_t n = src->ne[0]; float complex *x = (float complex *)malloc(n * sizeof(float complex)); @@ -12773,11 +12775,11 @@ static void ggml_compute_forward_fft(struct ggml_tensor * dst, const struct ggml // Copy input data to complex array float *src_data = (float *)src->data; for (int64_t i = 0; i < n; i++) { - x[i] = src_data[i] + 0.0*I; + x[i] = src_data[i] + 0.0f * I; } // Perform FFT - fft_recursive(x, n, 1, false); + fft_recursive(x, n, false); // Copy result back float *dst_data = (float *)dst->data; @@ -12790,13 +12792,22 @@ static void ggml_compute_forward_fft(struct ggml_tensor * dst, const struct ggml free(x); } -static void ggml_compute_forward_ifft(struct ggml_tensor * dst, const struct ggml_tensor * src) { +static void ggml_compute_forward_ifft( + const struct ggml_compute_params * params, + struct ggml_tensor * dst) { + + const struct ggml_tensor * src = dst->src[0]; + GGML_ASSERT(src->type == GGML_TYPE_F32); - GGML_ASSERT(ggml_is_vector(src)); // Only support 1D IFFT for now - GGML_ASSERT(is_power_of_2(src->ne[0]/2)); // Complex length must be power of 2 + GGML_ASSERT(ggml_is_vector(src)); + GGML_ASSERT(is_power_of_2(src->ne[0]/2)); + + if (params->ith != 0) { + return; + } // Allocate temporary complex array - int64_t n = src->ne[0]/2; // Complex length is half of the real array length + int64_t n = src->ne[0]; float complex *x = (float complex *)malloc(n * sizeof(float complex)); // Copy input data to complex array @@ -12805,8 +12816,14 @@ static void ggml_compute_forward_ifft(struct ggml_tensor * dst, const struct ggm x[i] = src_data[2*i] + src_data[2*i+1]*I; } - // Perform IFFT - fft_recursive(x, n, 1, true); + // Perform recursive IFFT + fft_recursive(x, n, true); + + // Scale the result by 1/N + float scale = 1.0f / n; + for (int64_t i = 0; i < n; i++) { + x[i] *= scale; + } // Copy result back (real part only for IFFT) float *dst_data = (float *)dst->data; @@ -13184,12 +13201,12 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm break; case GGML_OP_FFT: { - ggml_compute_forward_fft(tensor, tensor->src[0]); + ggml_compute_forward_fft(params, tensor); } break; case GGML_OP_IFFT: { - ggml_compute_forward_ifft(tensor, tensor->src[0]); + ggml_compute_forward_ifft(params, tensor); } break; case GGML_OP_NONE: @@ -13478,6 +13495,11 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { { GGML_ABORT("fatal error"); } + case GGML_OP_FFT: + case GGML_OP_IFFT: + { + n_tasks = 1; + } break; default: { fprintf(stderr, "%s: op not implemented: ", __func__); diff --git a/src/ggml.c b/src/ggml.c index 4ae3a45cf..e192d0e18 100644 --- a/src/ggml.c +++ b/src/ggml.c @@ -990,9 +990,12 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = { "CROSS_ENTROPY_LOSS", "CROSS_ENTROPY_LOSS_BACK", "OPT_STEP_ADAMW", + + "FFT", + "IFFT", }; -static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 83"); +static_assert(GGML_OP_COUNT == 85, "GGML_OP_COUNT != 85"); static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "none", @@ -1087,9 +1090,12 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "cross_entropy_loss(x,y)", "cross_entropy_loss_back(x,y)", "adamw(x)", + + "fft(x)", + "ifft(x)", }; -static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 83"); +static_assert(GGML_OP_COUNT == 85, "GGML_OP_COUNT != 85"); static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2"); @@ -5134,28 +5140,12 @@ struct ggml_tensor * ggml_opt_step_adamw( static struct ggml_tensor * ggml_fft_impl( struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { + struct ggml_tensor * a) { GGML_ASSERT(ggml_is_vector(a)); - bool is_node = false; - // Instead of checking a->grad directly, we should check if the tensor - // is part of a computation graph and has gradients enabled - if (!inplace && (a->flags & GGML_TENSOR_FLAG_PARAM)) { - is_node = true; - } + struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, a->ne[0]); - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - // Set the operation type directly result->op = GGML_OP_FFT; - // Initialize op params if needed - ggml_set_op_params(result, NULL, 0); - - // Instead of setting grad directly, we should mark the tensor as requiring gradients - if (is_node) { - result->flags |= GGML_TENSOR_FLAG_PARAM; - } result->src[0] = a; return result; @@ -5164,7 +5154,7 @@ static struct ggml_tensor * ggml_fft_impl( struct ggml_tensor * ggml_fft( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_fft_impl(ctx, a, false); + return ggml_fft_impl(ctx, a); } // ggml_ifft @@ -5175,17 +5165,9 @@ struct ggml_tensor * ggml_ifft( GGML_ASSERT(ggml_is_vector(a)); struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, a->ne[0]/2); - // Set the operation type directly result->op = GGML_OP_IFFT; - // Initialize op params if needed - ggml_set_op_params(result, NULL, 0); - - // Instead of checking a->grad directly, check if input tensor requires gradients - if (a->flags & GGML_TENSOR_FLAG_PARAM) { - // Mark the result tensor as requiring gradients - result->flags |= GGML_TENSOR_FLAG_PARAM; - } result->src[0] = a; + return result; } From ae19296ce40b554cf3c8934962838fc1331ff311 Mon Sep 17 00:00:00 2001 From: Ilham Syahid S Date: Thu, 6 Feb 2025 10:13:05 +0300 Subject: [PATCH 3/3] tests: Add FFT test case for signal reconstruction This commit adds a new test file `test-fft.c` to validate the FFT and IFFT implementations. The test generates a simple sinusoidal signal, performs a forward FFT followed by an inverse FFT, and checks that the reconstructed signal matches the original signal within a specified tolerance. Signed-off-by: Ilham Syahid S --- tests/CMakeLists.txt | 15 +++++++ tests/test-fft.c | 97 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 tests/test-fft.c diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ff723b7c5..94404cf3c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -299,6 +299,21 @@ endif() add_test(NAME ${TEST_TARGET} COMMAND $) set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") +# +# test-fft + +set(TEST_TARGET test-fft) +add_executable(${TEST_TARGET} ${TEST_TARGET}.c) +target_link_libraries(${TEST_TARGET} PRIVATE ggml) +if (MSVC) + target_link_options(${TEST_TARGET} PRIVATE "/STACK: 8388608") # 8MB +endif() +if (MATH_LIBRARY) + target_link_libraries(${TEST_TARGET} PRIVATE ${MATH_LIBRARY}) +endif() +add_test(NAME ${TEST_TARGET} COMMAND $) +set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw") + # # test-pad-reflect-1d diff --git a/tests/test-fft.c b/tests/test-fft.c new file mode 100644 index 000000000..227adb653 --- /dev/null +++ b/tests/test-fft.c @@ -0,0 +1,97 @@ +#include +#include +#include +#include +#include + +#include "ggml.h" +#include "ggml-cpu.h" + +#define N_SAMPLES 16 // Must be power of 2 + +// Helper function to generate a simple test signal +void generate_test_signal(float * signal, int n) { + // Generate a simple sinusoidal signal + for (int i = 0; i < n; i++) { + signal[i] = sinf(2.0f * M_PI * i / n) + 0.5f * sinf(4.0f * M_PI * i / n); + } +} + +// Helper function to compare arrays with tolerance +bool compare_arrays(float * a, float * b, int n, float tolerance) { + for (int i = 0; i < n; i++) { + if (fabsf(a[i] - b[i]) > tolerance) { + printf("Mismatch at index %d: %f != %f\n", i, a[i], b[i]); + return false; + } + } + return true; +} + +int main(int argc, const char ** argv) { + struct ggml_init_params params = { + .mem_size = 128*1024*1024, + .mem_buffer = NULL, + .no_alloc = false, + }; + + // initialize the backend + struct ggml_context * ctx = ggml_init(params); + + // Create test signal + float input_signal[N_SAMPLES]; + float output_signal[N_SAMPLES]; + generate_test_signal(input_signal, N_SAMPLES); + + // Create tensors + struct ggml_tensor * input = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, N_SAMPLES); + struct ggml_tensor * fft_result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 2 * N_SAMPLES); + struct ggml_tensor * ifft_result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, N_SAMPLES); + + // Copy input signal to tensor + memcpy(input->data, input_signal, N_SAMPLES * sizeof(float)); + + // Create compute graph + struct ggml_cgraph * gf = ggml_new_graph(ctx); + struct ggml_cgraph * gb = ggml_new_graph(ctx); + + // Perform FFT + fft_result = ggml_fft(ctx, input); + ggml_build_forward_expand(gf, fft_result); + + // Perform IFFT + ifft_result = ggml_ifft(ctx, fft_result); + ggml_build_forward_expand(gb, ifft_result); + + // Compute the graphs + ggml_graph_compute_with_ctx(ctx, gf, 1); + ggml_graph_compute_with_ctx(ctx, gb, 1); + + // Copy result back + memcpy(output_signal, ifft_result->data, N_SAMPLES * sizeof(float)); + + // Compare input and output + const float tolerance = 1e-5f; + bool success = compare_arrays(input_signal, output_signal, N_SAMPLES, tolerance); + + if (success) { + printf("FFT/IFFT test passed! Signal was correctly reconstructed within tolerance %f\n", tolerance); + } else { + printf("FFT/IFFT test failed! Signal reconstruction error exceeded tolerance %f\n", tolerance); + + // Print signals for comparison + printf("\nOriginal signal:\n"); + for (int i = 0; i < N_SAMPLES; i++) { + printf("%f ", input_signal[i]); + } + printf("\n\nReconstructed signal:\n"); + for (int i = 0; i < N_SAMPLES; i++) { + printf("%f ", output_signal[i]); + } + printf("\n"); + } + + ggml_free(ctx); + + return success ? 0 : 1; +}