Skip to content

Commit 865a8cd

Browse files
committed
Add test for CSR format
1 parent 6bc7e70 commit 865a8cd

File tree

8 files changed

+205
-4
lines changed

8 files changed

+205
-4
lines changed

CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,4 @@ FetchContent_MakeAvailable(fmt)
2424
find_package(HDF5 REQUIRED COMPONENTS CXX)
2525
add_subdirectory(include)
2626
add_subdirectory(examples)
27+
add_subdirectory(test)

examples/data/matrix.hdf5

-4.27 KB
Binary file not shown.

include/binsparse/binsparse.hpp

+80-4
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ namespace binsparse {
1616

1717
inline constexpr double version = 0.1;
1818

19-
// CSR Format
19+
// Dense Format
2020

2121
template <typename T, typename I, typename Order>
2222
void write_dense_matrix(std::string fname,
@@ -31,7 +31,7 @@ void write_dense_matrix(std::string fname,
3131
using json = nlohmann::json;
3232
json j;
3333
j["binsparse"]["version"] = version;
34-
j["binsparse"]["format"] = __detail::get_matrix_string(m);
34+
j["binsparse"]["format"] = __detail::get_matrix_format_string(m);
3535
j["binsparse"]["shape"] = {m.m, m.n};
3636
j["binsparse"]["nnz"] = m.m * m.n;
3737
j["binsparse"]["data_types"]["values"] = type_info<T>::label();
@@ -57,7 +57,9 @@ auto read_dense_matrix(std::string fname, Allocator&& alloc = Allocator{}) {
5757
std::cout << "Reading values...\n";
5858
auto binsparse_metadata = data["binsparse"];
5959

60-
assert(binsparse_metadata["format"] == __detail::get_matrix_string(dense_matrix<T, I, Order>{}));
60+
auto format = __detail::unalias_format(binsparse_metadata["format"]);
61+
62+
assert(format == __detail::get_matrix_format_string(dense_matrix<T, I, Order>{}));
6163

6264
auto nrows = binsparse_metadata["shape"][0];
6365
auto ncols = binsparse_metadata["shape"][1];
@@ -68,6 +70,8 @@ auto read_dense_matrix(std::string fname, Allocator&& alloc = Allocator{}) {
6870
return dense_matrix<T, I, Order>{values.data(), nrows, ncols};
6971
}
7072

73+
// CSR Format
74+
7175
template <typename T, typename I>
7276
void write_csr_matrix(std::string fname,
7377
csr_matrix<T, I> m,
@@ -129,11 +133,80 @@ csr_matrix<T, I> read_csr_matrix(std::string fname, Allocator&& alloc) {
129133
return csr_matrix<T, I>{values.data(), colind.data(), row_ptr.data(), nrows, ncols, nnz};
130134
}
131135

136+
132137
template <typename T, typename I>
133138
csr_matrix<T, I> read_csr_matrix(std::string fname) {
134139
return read_csr_matrix<T, I>(fname, std::allocator<T>{});
135140
}
136141

142+
// CSC Format
143+
144+
template <typename T, typename I>
145+
void write_csc_matrix(std::string fname,
146+
csc_matrix<T, I> m,
147+
nlohmann::json user_keys = {}) {
148+
149+
H5::H5File f(fname.c_str(), H5F_ACC_TRUNC);
150+
151+
std::span<T> values(m.values, m.nnz);
152+
std::span<I> rowind(m.rowind, m.nnz);
153+
std::span<I> col_ptr(m.col_ptr, m.m+1);
154+
155+
hdf5_tools::write_dataset(f, "values", values);
156+
hdf5_tools::write_dataset(f, "indices_1", rowind);
157+
hdf5_tools::write_dataset(f, "pointers_to_1", col_ptr);
158+
159+
using json = nlohmann::json;
160+
json j;
161+
j["binsparse"]["version"] = version;
162+
j["binsparse"]["format"] = "CSR";
163+
j["binsparse"]["shape"] = {m.m, m.n};
164+
j["binsparse"]["nnz"] = m.nnz;
165+
j["binsparse"]["data_types"]["pointers_to_1"] = type_info<I>::label();
166+
j["binsparse"]["data_types"]["indices_1"] = type_info<I>::label();
167+
j["binsparse"]["data_types"]["values"] = type_info<T>::label();
168+
169+
for (auto&& v : user_keys.items()) {
170+
j[v.key()] = v.value();
171+
}
172+
173+
hdf5_tools::set_attribute(f, "binsparse", j.dump(2));
174+
175+
f.close();
176+
}
177+
178+
template <typename T, typename I, typename Allocator>
179+
csc_matrix<T, I> read_csc_matrix(std::string fname, Allocator&& alloc) {
180+
H5::H5File f(fname.c_str(), H5F_ACC_RDWR);
181+
182+
auto metadata = hdf5_tools::get_attribute(f, "binsparse");
183+
184+
using json = nlohmann::json;
185+
auto data = json::parse(metadata);
186+
187+
auto binsparse_metadata = data["binsparse"];
188+
189+
assert(binsparse_metadata["format"] == "CSC");
190+
191+
auto nrows = binsparse_metadata["shape"][0];
192+
auto ncols = binsparse_metadata["shape"][1];
193+
auto nnz = binsparse_metadata["nnz"];
194+
195+
typename std::allocator_traits<std::remove_cvref_t<Allocator>>
196+
:: template rebind_alloc<I> i_alloc(alloc);
197+
198+
auto values = hdf5_tools::read_dataset<T>(f, "values", alloc);
199+
auto rowind = hdf5_tools::read_dataset<I>(f, "indices_1", i_alloc);
200+
auto col_ptr = hdf5_tools::read_dataset<I>(f, "pointers_to_1", i_alloc);
201+
202+
return csc_matrix<T, I>{values.data(), rowind.data(), col_ptr.data(), nrows, ncols, nnz};
203+
}
204+
205+
template <typename T, typename I>
206+
csc_matrix<T, I> read_csc_matrix(std::string fname) {
207+
return read_csc_matrix<T, I>(fname, std::allocator<T>{});
208+
}
209+
137210
// COO Format
138211

139212
template <typename T, typename I>
@@ -181,7 +254,9 @@ coo_matrix<T, I> read_coo_matrix(std::string fname, Allocator&& alloc) {
181254

182255
auto binsparse_metadata = data["binsparse"];
183256

184-
assert(binsparse_metadata["format"] == "COO" || binsparse_metadata["format"] == "COOR");
257+
auto format = __detail::unalias_format(binsparse_metadata["format"]);
258+
259+
assert(format == "COOR" || format == "COOC");
185260

186261
auto nrows = binsparse_metadata["shape"][0];
187262
auto ncols = binsparse_metadata["shape"][1];
@@ -202,6 +277,7 @@ coo_matrix<T, I> read_coo_matrix(std::string fname) {
202277
return read_coo_matrix<T, I>(fname, std::allocator<T>{});
203278
}
204279

280+
205281
inline auto inspect(std::string fname) {
206282
H5::H5File f(fname.c_str(), H5F_ACC_RDWR);
207283

include/binsparse/containers/matrices.hpp

+9
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,15 @@ struct csr_matrix {
2525
I m, n, nnz;
2626
};
2727

28+
template <typename T, typename I>
29+
struct csc_matrix {
30+
T* values;
31+
I* rowind;
32+
I* col_ptr;
33+
34+
I m, n, nnz;
35+
};
36+
2837
template <typename T, typename I>
2938
struct coo_matrix {
3039
T* values;

include/binsparse/detail.hpp

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#pragma once
2+
3+
#include <binsparse/containers/matrices.hpp>
4+
5+
namespace binsparse {
6+
7+
namespace __detail {
8+
9+
template <typename T, typename I>
10+
inline std::string get_matrix_format_string(dense_matrix<T, I, row_major> m) {
11+
return "DMATR";
12+
}
13+
14+
template <typename T, typename I>
15+
inline std::string get_matrix_format_string(dense_matrix<T, I, column_major> m) {
16+
return "DMATC";
17+
}
18+
19+
inline std::string unalias_format(const std::string& format) {
20+
if (format == "DMAT") {
21+
return "DMATR";
22+
}else if (format == "COO") {
23+
return "COOR";
24+
} else {
25+
return format;
26+
}
27+
}
28+
29+
} // end __detail
30+
31+
} // end binsparse

test/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
add_subdirectory(gtest)

test/gtest/CMakeLists.txt

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
function(download_data url file_name)
2+
set(DATASET_ARCHIVE ${CMAKE_BINARY_DIR}/data/${file_name})
3+
4+
file(DOWNLOAD
5+
${url}
6+
${DATASET_ARCHIVE})
7+
8+
string(REPLACE
9+
".tar.gz" ""
10+
DATSET_DIR
11+
${DATASET_ARCHIVE})
12+
13+
file(ARCHIVE_EXTRACT INPUT
14+
${DATASET_ARCHIVE}
15+
${DATASET_DIR})
16+
endfunction()
17+
18+
enable_testing()
19+
20+
add_executable(
21+
binsparse-tests
22+
csr_test.cpp
23+
)
24+
25+
target_link_libraries(binsparse-tests binsparse fmt GTest::gtest_main)
26+
27+
download_data(https://suitesparse-collection-website.herokuapp.com/MM/DIMACS10/chesapeake.tar.gz
28+
chesapeake.tar.gz)
29+
30+
download_data(https://suitesparse-collection-website.herokuapp.com/MM/HB/1138_bus.tar.gz
31+
1138_bus.tar.gz)
32+
33+
download_data(https://suitesparse-collection-website.herokuapp.com/MM/Belcastro/mouse_gene.tar.gz
34+
mouse_gene.tar.gz)
35+
36+
include(GoogleTest)
37+
gtest_discover_tests(binsparse-tests)

test/gtest/csr_test.cpp

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#include <gtest/gtest.h>
2+
3+
#include <fmt/core.h>
4+
5+
#include <binsparse/binsparse.hpp>
6+
7+
inline std::vector file_paths({"1138_bus/1138_bus.mtx",
8+
"chesapeake/chesapeake.mtx",
9+
"mouse_gene/mouse_gene.mtx"});
10+
11+
TEST(BinsparseReadWrite, CSRFormat) {
12+
using T = float;
13+
using I = std::size_t;
14+
15+
std::string binsparse_file = "out.bsp.hdf5";
16+
17+
for (auto&& file_path : file_paths) {
18+
auto x = binsparse::__detail::mmread<T, I, binsparse::__detail::csr_matrix_owning<T, I>>(file_path);
19+
20+
auto&& [num_rows, num_columns] = x.shape();
21+
binsparse::csr_matrix<T, I> matrix{x.values().data(), x.colind().data(), x.rowptr().data(), num_rows, num_columns, I(x.size())};
22+
binsparse::write_csr_matrix(binsparse_file, matrix);
23+
24+
auto matrix_ = binsparse::read_csr_matrix<T, I>(binsparse_file);
25+
26+
EXPECT_EQ(matrix.nnz, matrix_.nnz);
27+
EXPECT_EQ(matrix.m, matrix_.m);
28+
EXPECT_EQ(matrix.n, matrix_.n);
29+
30+
for (I i = 0; i < matrix.nnz; i++) {
31+
EXPECT_EQ(matrix.values[i], matrix_.values[i]);
32+
}
33+
34+
for (I i = 0; i < matrix.nnz; i++) {
35+
EXPECT_EQ(matrix.colind[i], matrix_.colind[i]);
36+
}
37+
38+
for (I i = 0; i < matrix.m+1; i++) {
39+
EXPECT_EQ(matrix.row_ptr[i], matrix_.row_ptr[i]);
40+
}
41+
42+
delete matrix_.values;
43+
delete matrix_.row_ptr;
44+
delete matrix_.colind;
45+
}
46+
}

0 commit comments

Comments
 (0)