@@ -50,7 +50,7 @@ void optimze_graph(const std::string initial_graph_file, const std::string graph
50
50
* Load the data repository and create a dynamic exploratino graph with it.
51
51
* Store the graph in the graph file.
52
52
*/
53
- void create_graph (const std::string repository_file, const DataStreamType data_stream_type, const std::string graph_file, deglib::Metric metric, deglib::builder::LID lid, const uint8_t d, const uint8_t k_ext, const float eps_ext, const uint8_t k_opt, const float eps_opt, const uint8_t i_opt) {
53
+ void create_graph (const std::string repository_file, const DataStreamType data_stream_type, const std::string graph_file, deglib::Metric metric, deglib::builder::LID lid, const uint8_t d, const uint8_t k_ext, const float eps_ext, const uint8_t k_opt, const float eps_opt, const uint8_t i_opt, const uint32_t thread_count ) {
54
54
55
55
auto rnd = std::mt19937 (7 ); // default 7
56
56
const uint32_t swap_tries = 0 ; // additional swap tries between the next graph extension
@@ -74,6 +74,8 @@ void create_graph(const std::string repository_file, const DataStreamType data_s
74
74
// create a graph builder to add vertices to the new graph and improve its edges
75
75
fmt::print (" Start graph builder \n " );
76
76
auto builder = deglib::builder::EvenRegularGraphBuilder (graph, rnd, lid, k_ext, eps_ext, k_opt, eps_opt, i_opt, swap_tries, additional_swap_tries);
77
+ builder.setBatchSize (10000 );
78
+ builder.setThreadCount (thread_count);
77
79
78
80
// provide all features to the graph builder at once. In an online system this will be called multiple times
79
81
auto base_size = uint32_t (repository.size ());
@@ -109,29 +111,30 @@ void create_graph(const std::string repository_file, const DataStreamType data_s
109
111
fmt::print (" Actual memory usage: {} Mb, Max memory usage: {} Mb after setup graph builder\n " , getCurrentRSS () / 1000000 , getPeakRSS () / 1000000 );
110
112
111
113
// check the integrity of the graph during the graph build process
112
- const auto log_after = 100000 ;
114
+ const auto log_after = 100 ;
113
115
114
116
fmt::print (" Start building \n " );
115
117
auto start = std::chrono::steady_clock::now ();
116
118
uint64_t duration_ms = 0 ;
117
119
const auto improvement_callback = [&](deglib::builder::BuilderStatus& status) {
118
120
const auto size = graph.size ();
119
121
120
- if (status.step % log_after == 0 || size == base_size) {
121
- duration_ms += uint32_t (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now () - start).count ());
122
- auto avg_edge_weight = deglib::analysis::calc_avg_edge_weight (graph, scale);
123
- auto weight_histogram_sorted = deglib::analysis::calc_edge_weight_histogram (graph, true , scale);
124
- auto weight_histogram = deglib::analysis::calc_edge_weight_histogram (graph, false , scale);
125
- auto valid_weights = deglib::analysis::check_graph_weights (graph) && deglib::analysis::check_graph_regularity (graph, uint32_t (size), true );
126
- auto connected = deglib::analysis::check_graph_connectivity (graph);
127
- auto duration = duration_ms / 1000 ;
128
- auto currRSS = getCurrentRSS () / 1000000 ;
129
- auto peakRSS = getPeakRSS () / 1000000 ;
130
- fmt::print (" {:7} vertices, {:5}s, {:8} / {:8} improv, Q: {:4.2f} -> Sorted:{:.1f}, InOrder:{:.1f}, {} connected & {}, RSS {} & peakRSS {}\n " ,
131
- size, duration, status.improved , status.tries , avg_edge_weight, fmt::join (weight_histogram_sorted, " " ), fmt::join (weight_histogram, " " ), connected ? " " : " not" , valid_weights ? " valid" : " invalid" , currRSS, peakRSS);
132
- start = std::chrono::steady_clock::now ();
133
- }
134
- else if (status.step % (log_after/10 ) == 0 ) {
122
+ // if(status.step % log_after == 0 || size == base_size) {
123
+ // duration_ms += uint32_t(std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start).count());
124
+ // auto avg_edge_weight = deglib::analysis::calc_avg_edge_weight(graph, scale);
125
+ // auto weight_histogram_sorted = deglib::analysis::calc_edge_weight_histogram(graph, true, scale);
126
+ // auto weight_histogram = deglib::analysis::calc_edge_weight_histogram(graph, false, scale);
127
+ // auto valid_weights = deglib::analysis::check_graph_weights(graph) && deglib::analysis::check_graph_regularity(graph, uint32_t(size), true);
128
+ // auto connected = deglib::analysis::check_graph_connectivity(graph);
129
+ // auto duration = duration_ms / 1000;
130
+ // auto currRSS = getCurrentRSS() / 1000000;
131
+ // auto peakRSS = getPeakRSS() / 1000000;
132
+ // fmt::print("{:7} vertices, {:5}s, {:8} / {:8} improv, Q: {:4.2f} -> Sorted:{:.1f}, InOrder:{:.1f}, {} connected & {}, RSS {} & peakRSS {}\n",
133
+ // size, duration, status.improved, status.tries, avg_edge_weight, fmt::join(weight_histogram_sorted, " "), fmt::join(weight_histogram, " "), connected ? "" : "not", valid_weights ? "valid" : "invalid", currRSS, peakRSS);
134
+ // start = std::chrono::steady_clock::now();
135
+ // }
136
+ // else
137
+ if (status.step % (log_after/10 ) == 0 ) {
135
138
duration_ms += uint32_t (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now () - start).count ());
136
139
auto avg_edge_weight = deglib::analysis::calc_avg_edge_weight (graph, scale);
137
140
auto connected = deglib::analysis::check_graph_connectivity (graph);
@@ -261,31 +264,37 @@ int main() {
261
264
262
265
// ------------------------------- SIFT1M -----------------------------------------
263
266
const auto data_stream_type = DataStreamType::AddAll;
264
- // const auto repository_file = (data_path / "laion2B" / "laion2B-en-clip768v2-n=300K.fvecs").string();
265
- // const auto query_file = (data_path / "laion2B" / "public-queries-2024-laion2B-en-clip768v2-n=10k.fvecs").string();
266
- const auto repository_file = (data_path / " laion2B" / " laion2B-en-clip768v2-n=300K_512byte.u8vecs" ).string ();
267
- const auto query_file = (data_path / " laion2B" / " public-queries-2024-laion2B-en-clip768v2-n=10k_512byte.u8vecs" ).string ();
268
- const auto gt_file = (data_path / " laion2B" / " gold-standard-dbsize=300K--public-queries-2024-laion2B-en-clip768v2-n=10k.ivecs" ).string ();
269
- const auto graph_file = (data_path / " deg" / " 768D_L2_K30_AddK60Eps0.1_schemeD_t1_512byte.deg" ).string ();
270
- const auto opt_graph_file = (data_path / " deg" / " 768D_L2_K30_AddK60Eps0.1_schemeD_t1_512byte_200kAll.deg" ).string ();
271
- const auto mrng_graph_file = (data_path / " deg" / " 768D_L2_K30_AddK60Eps0.1_schemeD_t1_512byte_200kAll_removedNonMRNG.deg" ).string ();
267
+ // const auto repository_file = (data_path / "laion2B" / "laion2B-en-clip768v2-n=300K.fvecs").string(); // 300K 768float
268
+ // const auto repository_file = (data_path / "laion2B" / "laion2B-en-clip768v2-n=300K_512float.fvecs").string(); // 300K 768float
269
+ // const auto repository_file = (data_path / "laion2B" / "laion2B-en-clip768v2-n=300K_512byte.u8vecs").string(); // 300K 512uint8
270
+ const auto repository_file = (data_path / " laion2B" / " laion2B-en-clip768v2-n=10M_512byte.u8vecs" ).string (); // 10M 512uint8
271
+
272
+ // const auto query_file = (data_path / "laion2B" / "public-queries-2024-laion2B-en-clip768v2-n=10k.fvecs").string(); // 768float
273
+ // const auto query_file = (data_path / "laion2B" / "public-queries-2024-laion2B-en-clip768v2-n=10k_512float.fvecs").string(); // 512float
274
+ const auto query_file = (data_path / " laion2B" / " public-queries-2024-laion2B-en-clip768v2-n=10k_512byte.u8vecs" ).string (); // 512uint8
275
+
276
+ // const auto gt_file = (data_path / "laion2B" / "gold-standard-dbsize=300K--public-queries-2024-laion2B-en-clip768v2-n=10k.ivecs").string(); // 300K
277
+ const auto gt_file = (data_path / " laion2B" / " gold-standard-dbsize=10M--public-queries-2024-laion2B-en-clip768v2-n=10k.ivecs" ).string (); // 10M
278
+ const auto graph_file = (data_path / " deg" / " 10m" / " 768D_L2_K30_AddK60Eps0.1_schemeD_t12_512byte.deg" ).string ();
279
+ const auto opt_graph_file = (data_path / " deg" / " 10m" / " 768D_L2_K30_AddK60Eps0.1_schemeD_t1_512byte_200kAll.deg" ).string ();
280
+ const auto mrng_graph_file = (data_path / " deg" / " 10m" / " 768D_L2_K30_AddK60Eps0.1_schemeD_t1_512byte_removedNonMRNG.deg" ).string ();
272
281
const auto lid = deglib::builder::LID::Low; // low=schemeD, high=schemeC
273
282
const deglib::Metric metric = deglib::Metric::L2_Uint8;
274
283
275
- // if(std::filesystem::exists(graph_file.c_str()) == false)
276
- // create_graph(repository_file, data_stream_type, graph_file, metric, lid, 30, 60, 0.1f, 30, 0.001f, 5); // d, k_ext, eps_ext, k_opt, eps_opt, i_opt
277
- // test_graph(query_file, gt_file, graph_file, 1, 30); // repeat_test, k
284
+ if (std::filesystem::exists (graph_file.c_str ()) == false )
285
+ create_graph (repository_file, data_stream_type, graph_file, metric, lid, 30 , 60 , 0 .1f , 30 , 0 .001f , 5 , 12 ); // d, k_ext, eps_ext, k_opt, eps_opt, i_opt, thread_count
286
+ test_graph (query_file, gt_file, graph_file, 1 , 30 ); // repeat_test, k
278
287
279
- if (std::filesystem::exists (opt_graph_file.c_str ()) == false )
280
- optimze_graph (graph_file, opt_graph_file, 30 , 0 .001f , 5 , 200000 ); // k_opt, eps_opt, i_opt, iteration
281
- test_graph (query_file, gt_file, opt_graph_file, 1 , 30 ); // repeat_test, k
288
+ // if(std::filesystem::exists(opt_graph_file.c_str()) == false)
289
+ // optimze_graph(graph_file, opt_graph_file, 30, 0.001f, 5, 200000); // k_opt, eps_opt, i_opt, iteration
290
+ // test_graph(query_file, gt_file, opt_graph_file, 1, 30); // repeat_test, k
282
291
283
- if (std::filesystem::exists (mrng_graph_file.c_str ()) == false ) {
284
- // remove_non_mrng_edges(graph_file, mrng_graph_file);
285
- remove_non_mrng_edges (opt_graph_file, mrng_graph_file);
292
+ // if(std::filesystem::exists(mrng_graph_file.c_str()) == false) {
293
+ // remove_non_mrng_edges(graph_file, mrng_graph_file);
294
+ // remove_non_mrng_edges(opt_graph_file, mrng_graph_file);
286
295
// change_features(graph_file, repository_file, metric, opt_graph_file);
287
- }
288
- test_graph (query_file, gt_file, mrng_graph_file, 1 , 30 ); // repeat_test, k
296
+ // }
297
+ // test_graph(query_file, gt_file, mrng_graph_file, 1, 30); // repeat_test, k
289
298
290
299
291
300
// // ------------------------------- GLOVE -----------------------------------------
0 commit comments