Skip to content

Commit 09e262a

Browse files
committed
Implement convert_binsparse, which converts from Matrix Market to
binsparse
1 parent e0dd2e7 commit 09e262a

File tree

3 files changed

+279
-10
lines changed

3 files changed

+279
-10
lines changed

examples/convert_binsparse.cpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,15 @@ template <typename T, typename I>
88
void convert(std::string input_file, std::string output_file, std::string format, std::string comment) {
99
if (format == "CSR") {
1010
std::cout << "Reading in " << input_file << "...\n";
11-
grb::matrix<T, I> x("mouse_gene.mtx");
12-
binsparse::csr_matrix<T, I> matrix{x.backend_.values_.data(), x.backend_.colind_.data(), x.backend_.rowptr_.data(), x.shape()[0], x.shape()[1], I(x.size())};
11+
auto x = binsparse::__detail::mmread<T, I, binsparse::__detail::csr_matrix_owning<T, I>>(input_file);
12+
binsparse::csr_matrix<T, I> matrix{x.values().data(), x.colind().data(), x.rowptr().data(), std::get<0>(x.shape()), std::get<1>(x.shape()), I(x.size())};
1313
binsparse::write_csr_matrix(output_file, matrix);
1414
std::cout << "Writing to binsparse file " << output_file << " using " << format << " format...\n";
1515
} else {
16-
assert(false);
17-
/*
18-
std::cout << "Reading in " << input_file << "...\n";
19-
grb::matrix<T, I, grb::coordinate> x("mouse_gene.mtx");
20-
binsparse::coo_matrix<T, I> matrix{x.backend_.values_.data(), x.backend_.rowptr_.data(), x.backend_.colind_.data(), x.shape()[0], x.shape()[1], I(x.size())};
16+
auto x = binsparse::__detail::mmread<T, I, binsparse::__detail::coo_matrix_owning<T, I>>(input_file);
17+
binsparse::coo_matrix<T, I> matrix{x.values().data(), x.rowind().data(), x.colind().data(), std::get<0>(x.shape()), std::get<1>(x.shape()), I(x.size())};
2118
binsparse::write_coo_matrix(output_file, matrix);
2219
std::cout << "Writing to binsparse file " << output_file << " using " << format << " format...\n";
23-
*/
2420
}
2521
}
2622

@@ -35,7 +31,7 @@ void convert(std::string input_file, std::string output_file, std::string type,
3531
} else if (type == "integer") {
3632
convert<int64_t, I>(input_file, output_file, format, comment);
3733
} else if (type == "pattern") {
38-
convert<bool, I>(input_file, output_file, format, comment);
34+
convert<uint8_t, I>(input_file, output_file, format, comment);
3935
}
4036
}
4137

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
#pragma once
22

3-
#include "matrix_market_inspector.hpp"
3+
#include "matrix_market_inspector.hpp"
4+
#include "matrix_market_read.hpp"
Lines changed: 272 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,272 @@
1+
#pragma once
2+
3+
#include <iostream>
4+
#include <fstream>
5+
#include <ranges>
6+
7+
namespace binsparse {
8+
9+
namespace __detail {
10+
11+
template <typename T, typename I>
12+
class csr_matrix_owning {
13+
public:
14+
15+
csr_matrix_owning(std::tuple<I, I> shape) : shape_(shape) {}
16+
17+
auto values() { return std::ranges::views::all(values_); }
18+
auto rowptr() { return std::ranges::views::all(rowptr_); }
19+
auto colind() { return std::ranges::views::all(colind_); }
20+
21+
auto values() const { return std::ranges::views::all(values_); }
22+
auto rowptr() const { return std::ranges::views::all(rowptr_); }
23+
auto colind() const { return std::ranges::views::all(colind_); }
24+
25+
template <typename Iter>
26+
void assign_tuples(Iter first, Iter last) {
27+
std::size_t nnz = std::ranges::distance(first, last);
28+
values_.resize(nnz);
29+
colind_.resize(nnz);
30+
rowptr_.resize(std::get<0>(shape())+1);
31+
32+
rowptr_[0] = 0;
33+
34+
std::size_t r = 0;
35+
std::size_t c = 0;
36+
for (auto iter = first; iter != last; ++iter) {
37+
auto&& [index, value] = *iter;
38+
auto&& [i, j] = index;
39+
40+
values_[c] = value;
41+
colind_[c] = j;
42+
43+
while (r < i) {
44+
if (r+1 > std::get<0>(shape())) {
45+
// TODO: exception?
46+
// throw std::runtime_error("csr_matrix_impl_: given invalid matrix");
47+
}
48+
rowptr_[r+1] = c;
49+
r++;
50+
}
51+
c++;
52+
53+
if (c > nnz) {
54+
// TODO: exception?
55+
// throw std::runtime_error("csr_matrix_impl_: given invalid matrix");
56+
}
57+
}
58+
59+
for ( ; r < std::get<0>(shape()); r++) {
60+
rowptr_[r+1] = nnz;
61+
}
62+
}
63+
64+
auto shape() const {
65+
return shape_;
66+
}
67+
68+
auto size() const {
69+
return values_.size();
70+
}
71+
72+
private:
73+
std::tuple<I, I> shape_;
74+
std::vector<T> values_;
75+
std::vector<I> rowptr_;
76+
std::vector<I> colind_;
77+
};
78+
79+
template <typename T, typename I>
80+
class coo_matrix_owning {
81+
public:
82+
83+
coo_matrix_owning(std::tuple<I, I> shape) : shape_(shape) {}
84+
85+
auto values() { return std::ranges::views::all(values_); }
86+
auto rowind() { return std::ranges::views::all(rowind_); }
87+
auto colind() { return std::ranges::views::all(colind_); }
88+
89+
auto values() const { return std::ranges::views::all(values_); }
90+
auto rowind() const { return std::ranges::views::all(rowind_); }
91+
auto colind() const { return std::ranges::views::all(colind_); }
92+
93+
void push_back(std::tuple<std::tuple<I, I>, T> entry) {
94+
auto&& [idx, v] = entry;
95+
auto&& [i, j] = idx;
96+
values_.push_back(v);
97+
rowind_.push_back(i);
98+
colind_.push_back(j);
99+
}
100+
101+
template <typename Iter>
102+
void assign_tuples(Iter first, Iter last) {
103+
std::size_t nnz = std::ranges::distance(first, last);
104+
for (auto iter = first; iter != last; ++iter) {
105+
auto&& [idx, v] = *iter;
106+
auto&& [i, j] = idx;
107+
push_back({{i, j}, v});
108+
}
109+
}
110+
111+
void reserve(std::size_t size) {
112+
values_.reserve(size);
113+
rowind_.reserve(size);
114+
colind_.reserve(size);
115+
}
116+
117+
auto shape() const {
118+
return shape_;
119+
}
120+
121+
auto size() const {
122+
return values_.size();
123+
}
124+
125+
private:
126+
std::tuple<I, I> shape_;
127+
std::vector<T> values_;
128+
std::vector<I> rowind_;
129+
std::vector<I> colind_;
130+
};
131+
132+
/// Read in the Matrix Market file at location `file_path` and
133+
/// return a data structure with the matrix.
134+
template <typename T, typename I, typename MatrixType>
135+
inline MatrixType mmread(std::string file_path, bool one_indexed = true) {
136+
using index_type = I;
137+
using size_type = std::size_t;
138+
139+
std::ifstream f;
140+
141+
f.open(file_path.c_str());
142+
143+
if (!f.is_open()) {
144+
// TODO better choice of exception.
145+
throw std::runtime_error("mmread: cannot open " + file_path);
146+
}
147+
148+
std::string buf;
149+
150+
// Make sure the file is matrix market matrix, coordinate, and check whether
151+
// it is symmetric. If the matrix is symmetric, non-diagonal elements will
152+
// be inserted in both (i, j) and (j, i). Error out if skew-symmetric or
153+
// Hermitian.
154+
std::getline(f, buf);
155+
std::istringstream ss(buf);
156+
std::string item;
157+
ss >> item;
158+
if (item != "%%MatrixMarket") {
159+
throw std::runtime_error(file_path + " could not be parsed as a Matrix Market file.");
160+
}
161+
ss >> item;
162+
if (item != "matrix") {
163+
throw std::runtime_error(file_path + " could not be parsed as a Matrix Market file.");
164+
}
165+
ss >> item;
166+
if (item != "coordinate") {
167+
throw std::runtime_error(file_path + " could not be parsed as a Matrix Market file.");
168+
}
169+
bool pattern;
170+
ss >> item;
171+
if (item == "pattern") {
172+
pattern = true;
173+
} else {
174+
pattern = false;
175+
}
176+
// TODO: do something with real vs. integer vs. pattern?
177+
ss >> item;
178+
bool symmetric;
179+
if (item == "general") {
180+
symmetric = false;
181+
} else if (item == "symmetric") {
182+
symmetric = true;
183+
} else {
184+
throw std::runtime_error(file_path + " has an unsupported matrix type");
185+
}
186+
187+
bool outOfComments = false;
188+
while (!outOfComments) {
189+
std::getline(f, buf);
190+
191+
if (buf[0] != '%') {
192+
outOfComments = true;
193+
}
194+
}
195+
196+
I m, n, nnz;
197+
// std::istringstream ss(buf);
198+
ss.clear();
199+
ss.str(buf);
200+
ss >> m >> n >> nnz;
201+
202+
// NOTE for symmetric matrices: `nnz` holds the number of stored values in
203+
// the matrix market file, while `matrix.nnz_` will hold the total number of
204+
// stored values (including "mirrored" symmetric values).
205+
MatrixType m_out({m, n});
206+
207+
using coo_type = std::vector<std::tuple<std::tuple<I, I>, T>>;
208+
coo_type matrix;
209+
if (symmetric) {
210+
matrix.reserve(2*nnz);
211+
} else {
212+
matrix.reserve(nnz);
213+
}
214+
215+
size_type c = 0;
216+
while (std::getline(f, buf)) {
217+
I i, j;
218+
T v;
219+
std::istringstream ss(buf);
220+
if (!pattern) {
221+
ss >> i >> j >> v;
222+
} else {
223+
ss >> i >> j;
224+
v = T(1);
225+
}
226+
if (one_indexed) {
227+
i--;
228+
j--;
229+
}
230+
231+
if (i >= m || j >= n) {
232+
throw std::runtime_error("read_MatrixMarket: file has nonzero out of bounds.");
233+
}
234+
235+
matrix.push_back({{i, j}, v});
236+
237+
if (symmetric && i != j) {
238+
matrix.push_back({{j, i}, v});
239+
}
240+
241+
c++;
242+
if (c > nnz) {
243+
throw std::runtime_error("read_MatrixMarket: error reading Matrix Market file, file has more nonzeros than reported.");
244+
}
245+
}
246+
247+
auto sort_fn = [](auto&& a, auto&& b) {
248+
auto&& [a_idx, a_v] = a;
249+
auto&& [b_idx, b_v] = b;
250+
251+
auto&& [a_i, a_j] = a_idx;
252+
auto&& [b_i, b_j] = b_idx;
253+
254+
if (a_i != b_i) {
255+
return a_i < b_i;
256+
} else {
257+
return a_j < b_j;
258+
}
259+
};
260+
261+
std::sort(matrix.begin(), matrix.end(), sort_fn);
262+
263+
m_out.assign_tuples(matrix.begin(), matrix.end());
264+
265+
f.close();
266+
267+
return m_out;
268+
}
269+
270+
} // end __detail
271+
272+
} // end binsparse

0 commit comments

Comments
 (0)