|
18 | 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19 | 19 | // SOFTWARE.
|
20 | 20 |
|
| 21 | +use enum_map::{Enum, EnumMap}; |
| 22 | +use serde_derive::{Deserialize, Serialize}; |
| 23 | +use tokio::sync::Mutex; |
| 24 | +use tokio::task::{spawn_blocking, JoinSet}; |
| 25 | + |
21 | 26 | use crate::config::Config;
|
22 |
| -use crate::prelude::*; |
23 |
| -use std::collections::HashMap; |
| 27 | +use std::collections::BTreeMap; |
| 28 | +use std::fs::OpenOptions; |
24 | 29 | use std::path::PathBuf;
|
25 |
| -use std::sync::{Arc, Mutex}; |
| 30 | +use std::sync::atomic::AtomicUsize; |
| 31 | +use std::sync::Arc; |
26 | 32 | use std::{
|
27 |
| - fs::{self, File, OpenOptions}, |
| 33 | + fs::{self, File}, |
28 | 34 | io::{prelude::*, BufWriter},
|
29 | 35 | };
|
30 | 36 |
|
31 |
| -#[derive(Default, Serialize, Deserialize)] |
32 |
| -struct State { |
33 |
| - last_id: HashMap<String, usize>, |
| 37 | +#[derive(Debug, Enum, Serialize, Deserialize, Copy, Clone)] |
| 38 | +pub enum Forge { |
| 39 | + Github, |
34 | 40 | }
|
35 | 41 |
|
36 |
| -#[derive(Serialize, Deserialize)] |
| 42 | +#[derive(Debug, Default, Serialize, Deserialize)] |
| 43 | +struct State(EnumMap<Forge, AtomicUsize>); |
| 44 | + |
| 45 | +#[derive(Debug, Serialize, Deserialize, Clone)] |
37 | 46 | pub struct Repo {
|
38 | 47 | pub id: String,
|
39 | 48 | pub name: String,
|
40 | 49 | pub has_cargo_toml: bool,
|
41 | 50 | pub has_cargo_lock: bool,
|
42 | 51 | }
|
43 | 52 |
|
| 53 | +#[derive(Debug, Clone)] |
44 | 54 | pub struct Data {
|
45 |
| - base_dir: PathBuf, |
| 55 | + data_dir: PathBuf, |
46 | 56 |
|
47 | 57 | csv_write_lock: Arc<Mutex<()>>,
|
| 58 | + state_lock: Arc<Mutex<()>>, |
48 | 59 |
|
49 |
| - state_path: PathBuf, |
50 |
| - state_cache: Arc<Mutex<Option<State>>>, |
| 60 | + state_cache: Arc<State>, |
| 61 | + |
| 62 | + repos_state: Arc<Mutex<EnumMap<Forge, BTreeMap<String, Repo>>>>, |
51 | 63 | }
|
52 | 64 |
|
53 | 65 | impl Data {
|
54 |
| - pub fn new(config: &Config) -> Self { |
55 |
| - Data { |
56 |
| - base_dir: config.data_dir.clone(), |
| 66 | + pub fn new(config: &Config) -> color_eyre::Result<Self> { |
| 67 | + let mut data = Data { |
| 68 | + data_dir: config.data_dir.clone(), |
57 | 69 |
|
58 | 70 | csv_write_lock: Arc::new(Mutex::new(())),
|
59 | 71 |
|
60 |
| - state_path: config.data_dir.join("state.json"), |
61 |
| - state_cache: Arc::new(Mutex::new(None)), |
62 |
| - } |
63 |
| - } |
| 72 | + state_lock: Arc::new(Mutex::new(())), |
| 73 | + state_cache: Arc::new(State::default()), |
| 74 | + repos_state: Arc::new(Mutex::new(EnumMap::default())), |
| 75 | + }; |
| 76 | + |
| 77 | + // TODO: create CSV files if not exist |
| 78 | + |
64 | 79 |
|
65 |
| - fn edit_state<T, F: Fn(&mut State) -> Fallible<T>>(&self, f: F) -> Fallible<T> { |
66 |
| - let mut state_cache = self.state_cache.lock().unwrap(); |
| 80 | + let state_path = data.state_path(); |
| 81 | + if state_path.exists() { |
| 82 | + let state_cache: State = serde_json::from_slice(&fs::read(&state_path)?)?; |
67 | 83 |
|
68 |
| - if state_cache.is_none() { |
69 |
| - if self.state_path.exists() { |
70 |
| - *state_cache = Some(serde_json::from_slice(&fs::read(&self.state_path)?)?); |
71 |
| - } else { |
72 |
| - *state_cache = Some(Default::default()); |
73 |
| - } |
| 84 | + data.state_cache = Arc::new(state_cache) |
74 | 85 | }
|
75 | 86 |
|
76 |
| - let state = state_cache.as_mut().unwrap(); |
77 |
| - let result = f(state)?; |
| 87 | + Ok(data) |
| 88 | + } |
78 | 89 |
|
79 |
| - let mut file = BufWriter::new(File::create(&self.state_path)?); |
80 |
| - serde_json::to_writer_pretty(&mut file, &state)?; |
81 |
| - file.write_all(&[b'\n'])?; |
| 90 | + pub fn state_path(&self) -> PathBuf { |
| 91 | + self.data_dir.join("state.json") |
| 92 | + } |
82 | 93 |
|
83 |
| - Ok(result) |
| 94 | + pub fn csv_path(&self, forge: Forge) -> PathBuf { |
| 95 | + match forge { |
| 96 | + Forge::Github => self.data_dir.join("github"), |
| 97 | + } |
84 | 98 | }
|
85 | 99 |
|
86 |
| - pub fn get_last_id(&self, platform: &str) -> Fallible<Option<usize>> { |
87 |
| - self.edit_state(|state| Ok(state.last_id.get(platform).cloned())) |
| 100 | + pub fn get_last_id(&self, forge: Forge) -> usize { |
| 101 | + self.state_cache.0[forge].load(std::sync::atomic::Ordering::SeqCst) |
88 | 102 | }
|
89 | 103 |
|
90 |
| - pub fn set_last_id(&self, platform: &str, id: usize) -> Fallible<()> { |
91 |
| - self.edit_state(|state| { |
92 |
| - state.last_id.insert(platform.to_string(), id); |
| 104 | + /// Store the state cache to disk, i.e. last fetched ids |
| 105 | + async fn store_state_cache(&self) -> color_eyre::Result<()> { |
| 106 | + let state = self.state_cache.clone(); |
| 107 | + let lock = self.state_lock.clone(); |
| 108 | + let state_path = self.state_path(); |
| 109 | + spawn_blocking(move || -> color_eyre::Result<()> { |
| 110 | + let guard = lock.blocking_lock(); |
| 111 | + |
| 112 | + let file = File::create(state_path)?; |
| 113 | + let mut file = BufWriter::new(file); |
| 114 | + serde_json::to_writer_pretty(&mut file, state.as_ref())?; |
| 115 | + file.write_all(b"\n")?; |
| 116 | + |
| 117 | + drop(guard); |
| 118 | + |
93 | 119 | Ok(())
|
94 | 120 | })
|
| 121 | + .await |
| 122 | + .unwrap() |
95 | 123 | }
|
96 | 124 |
|
97 |
| - pub fn store_repo(&self, platform: &str, repo: Repo) -> Fallible<()> { |
98 |
| - // Ensure only one thread can write to CSV files at once |
99 |
| - let _lock = self.csv_write_lock.lock().unwrap(); |
| 125 | + /// Stores the repos found to disk in a CSV |
| 126 | + async fn store_csv(&self) -> color_eyre::Result<()> { |
| 127 | + let mut repos = self.repos_state.lock().await; |
100 | 128 |
|
101 |
| - let file = self.base_dir.join(format!("{}.csv", platform)); |
| 129 | + let mut js = JoinSet::new(); |
102 | 130 |
|
103 |
| - // Create the new file or append to it |
104 |
| - let mut csv = if file.exists() { |
105 |
| - csv::WriterBuilder::new() |
106 |
| - .has_headers(false) |
107 |
| - .from_writer(OpenOptions::new().append(true).open(&file)?) |
108 |
| - } else { |
109 |
| - csv::WriterBuilder::new().from_path(&file)? |
110 |
| - }; |
| 131 | + for (forge, repos) in repos.iter() { |
| 132 | + let path = self.csv_path(forge); |
| 133 | + let repos = repos.clone(); // is this necessary? |
| 134 | + js.spawn_blocking(|| -> color_eyre::Result<()> { |
| 135 | + let mut write_headers = false; |
| 136 | + if !path.exists() { |
| 137 | + File::create(&path)?; |
| 138 | + write_headers = true; |
| 139 | + } |
111 | 140 |
|
112 |
| - csv.serialize(repo)?; |
| 141 | + let file = OpenOptions::new() |
| 142 | + .append(true) |
| 143 | + .open(path)?; |
| 144 | + |
| 145 | + let mut writer = csv::WriterBuilder::new() |
| 146 | + .has_headers(write_headers) |
| 147 | + .from_writer(file); |
| 148 | + |
| 149 | + for (_, repo) in repos { |
| 150 | + writer.serialize(repo)?; |
| 151 | + } |
| 152 | + |
| 153 | + Ok(()) |
| 154 | + }); |
| 155 | + } |
| 156 | + |
| 157 | + js.join_all().await.into_iter().collect::<Result<(), _>>()?; |
| 158 | + |
| 159 | + // Clear the map |
| 160 | + repos.iter_mut().for_each(|(_, m)| m.clear()); |
| 161 | + |
| 162 | + Ok(()) |
| 163 | + } |
| 164 | + |
| 165 | + pub async fn set_last_id(&self, forge: Forge, n: usize) -> color_eyre::Result<()> { |
| 166 | + self.state_cache.0[forge].store(n, std::sync::atomic::Ordering::SeqCst); |
| 167 | + |
| 168 | + self.store_csv().await?; |
| 169 | + self.store_state_cache().await?; |
113 | 170 |
|
114 | 171 | Ok(())
|
115 | 172 | }
|
| 173 | + |
| 174 | + pub async fn store_repo(&self, forge: Forge, repo: Repo) { |
| 175 | + let mut repos_state = self.repos_state.lock().await; |
| 176 | + repos_state[forge].insert(repo.name.clone(), repo); |
| 177 | + } |
116 | 178 | }
|
| 179 | + |
0 commit comments