@@ -108,64 +108,126 @@ def score_cmd(text: str | None, file: str | None, config: str | None):
108108 _render_score (result )
109109
110110
111- @main .command ()
112- @click .option ("--dir" , "-d" , default = "." , help = "Directory to initialize in" )
113- def init (dir : str ):
114- """Set up MIDAS in your project — guided onboarding."""
111+ def _find_sample (name : str ) -> Path | None :
112+ """Locate a bundled sample file (package data or dev examples/)."""
113+ pkg_data = Path (__file__ ).parent / "data" / name
114+ if pkg_data .exists ():
115+ return pkg_data
116+ dev_examples = Path (__file__ ).parent .parent / "examples" / name
117+ if dev_examples .exists ():
118+ return dev_examples
119+ return None
120+
121+
122+ def _copy_sample_files (target : Path ) -> tuple [Path | None , Path | None ]:
123+ """Copy sample config and data into target dir. Returns (config_path, data_path) or None if skipped."""
115124 import shutil
116125
117- target = Path (dir )
118- target .mkdir (parents = True , exist_ok = True )
119-
120- console .print (Panel (
121- "[bold]MIDAS[/bold] — Reverse-engineer your LinkedIn into a personalized scoring formula." ,
122- border_style = "yellow" ,
123- ))
124- console .print ()
125-
126- # Locate bundled sample files (package data or dev examples/)
127- def _find_sample (name : str ) -> Path | None :
128- pkg_data = Path (__file__ ).parent / "data" / name
129- if pkg_data .exists ():
130- return pkg_data
131- dev_examples = Path (__file__ ).parent .parent / "examples" / name
132- if dev_examples .exists ():
133- return dev_examples
134- return None
135-
136- # Copy sample config if none exists
137126 config_path = target / "midas_config.yaml"
127+ data_path = target / "posts.jsonl"
138128 sample_config = _find_sample ("sample_config.yaml" )
129+ sample_data = _find_sample ("sample_data.jsonl" )
130+
131+ created_config = None
132+ created_data = None
139133
140134 if config_path .exists ():
141135 console .print (f" [dim]Config already exists:[/dim] { config_path } " )
136+ created_config = config_path
142137 elif sample_config :
143138 shutil .copy (sample_config , config_path )
144139 console .print (f" [green]Created[/green] { config_path } (sample config)" )
145- else :
146- console .print (" [yellow]No sample config found. Run `midas analyze` to generate one.[/yellow]" )
147-
148- # Copy sample data if none exists
149- data_path = target / "posts.jsonl"
150- sample_data = _find_sample ("sample_data.jsonl" )
140+ created_config = config_path
151141
152142 if data_path .exists ():
153143 console .print (f" [dim]Data already exists:[/dim] { data_path } " )
144+ created_data = data_path
154145 elif sample_data :
155146 shutil .copy (sample_data , data_path )
156147 console .print (f" [green]Created[/green] { data_path } (10 sample posts)" )
148+ created_data = data_path
149+
150+ return created_config , created_data
151+
152+
153+ def _detect_data_format (path : str ) -> str :
154+ """Auto-detect data format: 'apify' (JSON array), 'csv' (LinkedIn CSV), or 'jsonl'."""
155+ filepath = Path (path )
156+ suffix = filepath .suffix .lower ()
157+
158+ if suffix == ".csv" :
159+ return "csv"
160+
161+ with open (filepath , encoding = "utf-8" ) as f :
162+ first_char = f .read (1 ).strip ()
163+
164+ if first_char == "[" :
165+ return "apify"
166+ return "jsonl"
167+
168+
169+ def _parse_data_file (path : str , fmt : str ) -> list [dict ]:
170+ """Parse a data file into MIDAS posts based on format."""
171+ from .export import parse_apify_posts , parse_linkedin_export , load_jsonl
157172
173+ if fmt == "apify" :
174+ return parse_apify_posts (path )
175+ elif fmt == "csv" :
176+ return parse_linkedin_export (path )
177+ else :
178+ return load_jsonl (path )
179+
180+
181+ def _interactive_score_demo (target : Path ) -> None :
182+ """Prompt user to score a post interactively."""
183+ config_path = target / "midas_config.yaml"
184+ if not config_path .exists ():
185+ return
186+
187+ console .print ()
188+ console .print ("[bold]Let's try scoring a post![/bold]" )
189+ console .print (" Paste a LinkedIn post below (or press Enter to use a sample):" )
158190 console .print ()
191+
192+ try :
193+ text = click .prompt ("" , default = "" , prompt_suffix = " > " , show_default = False )
194+ except (click .Abort , EOFError ):
195+ return
196+
197+ if not text .strip ():
198+ # Use a built-in sample
199+ text = (
200+ "I just spent 3 months building an AI agent from scratch.\n \n "
201+ "Everyone said to use a framework.\n \n "
202+ "But here's the thing → frameworks hide the complexity.\n \n "
203+ "They don't remove it.\n \n "
204+ "I learned more in those 3 months than in 2 years of using LangChain.\n \n "
205+ "Here's what actually matters:\n \n "
206+ "→ Prompt engineering is 80% of the work\n "
207+ "→ Memory management is harder than generation\n "
208+ "→ Error handling is where agents actually break\n "
209+ "→ Evaluation is still an unsolved problem\n \n "
210+ "The frameworks will catch up.\n \n "
211+ "But understanding the fundamentals won't go out of style.\n \n "
212+ "Comment AGENT if you've built from scratch too."
213+ )
214+ console .print (" [dim](Using sample post)[/dim]" )
215+
216+ cfg = load_config (str (config_path ))
217+ result = score (text .strip (), cfg )
218+ _render_score (result )
219+
220+
221+ def _print_static_next_steps (has_config : bool ) -> None :
222+ """Print the static next-steps text (non-interactive fallback)."""
159223 console .print ("[bold]Next steps:[/bold]" )
160224 console .print ()
161225 console .print (" [bold cyan]1.[/bold cyan] Get your LinkedIn data (you need posts + engagement numbers):" )
162226 console .print (" Use the [bold]Apify LinkedIn Post Scraper[/bold] (free tier available):" )
163227 console .print (" [dim]https://console.apify.com/actors/RE0MriXnFhR3IgVnJ/input[/dim]" )
164228 console .print ()
165229 console .print (" Then convert to MIDAS format:" )
166- console .print (' [dim]python3 -c "from midas.export import parse_apify_posts, save_jsonl; save_jsonl(parse_apify_posts(\' apify_dataset.json\' ), \' posts.jsonl\' )"[/dim]' )
167- console .print ()
168- console .print (" [dim]Full guide: https://github.com/ajsai47/midas/blob/main/docs/01-export-your-data.md[/dim]" )
230+ console .print (" [dim]midas init --data apify_dataset.json[/dim]" )
169231 console .print ()
170232 console .print (" [bold cyan]2.[/bold cyan] Analyze your posts to build your formula:" )
171233 console .print (" [dim]midas analyze posts.jsonl -o midas_config.yaml[/dim]" )
@@ -176,11 +238,207 @@ def _find_sample(name: str) -> Path | None:
176238 console .print (" [bold cyan]4.[/bold cyan] Validate that your formula predicts engagement:" )
177239 console .print (" [dim]midas validate posts.jsonl --config midas_config.yaml[/dim]" )
178240 console .print ()
179- if sample_config or config_path . exists () :
241+ if has_config :
180242 console .print (" [dim]Tip: A sample config and data were created above — try steps 3-4 now to see it in action.[/dim]" )
181243 console .print ()
182244
183245
246+ @main .command ()
247+ @click .option ("--dir" , "-d" , default = "." , help = "Directory to initialize in" )
248+ @click .option ("--data" , type = click .Path (exists = True ), help = "Path to your LinkedIn data file (auto-detects format)" )
249+ def init (dir : str , data : str | None ):
250+ """Set up MIDAS in your project — guided onboarding."""
251+ from .export import save_jsonl
252+ from .analyze import analyze_file , export_config
253+
254+ target = Path (dir )
255+ target .mkdir (parents = True , exist_ok = True )
256+
257+ console .print (Panel (
258+ "[bold]MIDAS[/bold] — Reverse-engineer your LinkedIn into a\n personalized scoring formula." ,
259+ border_style = "yellow" ,
260+ ))
261+ console .print ()
262+
263+ is_interactive = sys .stdin .isatty () and data is None
264+
265+ # ── Path A: User provided --data flag ──────────────────────────────
266+ if data :
267+ fmt = _detect_data_format (data )
268+ console .print (f" Detected format: [bold]{ fmt } [/bold]" )
269+
270+ posts = _parse_data_file (data , fmt )
271+ if not posts :
272+ console .print ("[red]No posts found in the file.[/red]" )
273+ sys .exit (1 )
274+
275+ data_path = target / "posts.jsonl"
276+ save_jsonl (posts , str (data_path ))
277+ console .print (f" [green]Parsed { len (posts )} posts[/green] → { data_path } " )
278+
279+ if fmt == "csv" :
280+ console .print ()
281+ console .print (" [yellow]Note:[/yellow] LinkedIn CSV exports don't include engagement metrics." )
282+ console .print (" You'll need to add reactions/comments/reposts manually or via the LinkedIn API." )
283+ console .print ()
284+
285+ # Analyze
286+ console .print ()
287+ console .print ("[bold]Analyzing your posts...[/bold]" )
288+ result = analyze_file (str (data_path ))
289+ sig_count = sum (1 for s in result .signals if s .significant )
290+
291+ config_path = target / "midas_config.yaml"
292+ export_config (result , str (config_path ))
293+
294+ console .print (f" Posts analyzed: [bold]{ result .total_posts } [/bold]" )
295+ console .print (f" Signals found: [bold]{ len (result .signals )} [/bold] ({ sig_count } statistically significant)" )
296+ console .print (f" [green]Config saved to { config_path } [/green]" )
297+
298+ # Validate
299+ console .print ()
300+ console .print ("[bold]Validating your formula...[/bold]" )
301+ from .validate import validate as validate_fn
302+ import json as _json
303+
304+ loaded_posts = []
305+ with open (data_path ) as f :
306+ for line in f :
307+ line = line .strip ()
308+ if line :
309+ loaded_posts .append (_json .loads (line ))
310+
311+ if len (loaded_posts ) >= 5 :
312+ cfg = load_config (str (config_path ))
313+ val_result = validate_fn (loaded_posts , cfg )
314+
315+ color = "green" if val_result .spearman_rho > 0.3 else "yellow" if val_result .spearman_rho > 0 else "red"
316+ strength = val_result .correlation_strength .upper ()
317+ sig_str = ", SIGNIFICANT" if val_result .is_significant else ""
318+ console .print (f" Spearman rho: [{ color } ]{ val_result .spearman_rho :+.2f} [/{ color } ] ({ strength } { sig_str } )" )
319+
320+ if val_result .spearman_rho > 0 and val_result .is_significant :
321+ console .print (" [green]Your formula predicts engagement![/green]" )
322+ else :
323+ console .print (" [dim]Not enough posts to validate (need at least 5).[/dim]" )
324+
325+ console .print ()
326+ console .print ("[bold]You're all set.[/bold] Try scoring a draft:" )
327+ console .print (' [dim]midas score "Your draft here..."[/dim]' )
328+ console .print ()
329+ return
330+
331+ # ── Path B: Non-interactive (piped stdin / CI) ─────────────────────
332+ if not is_interactive :
333+ created_config , _ = _copy_sample_files (target )
334+ console .print ()
335+ _print_static_next_steps (created_config is not None )
336+ return
337+
338+ # ── Path C: Interactive onboarding ─────────────────────────────────
339+ has_data = click .confirm ("Do you have your LinkedIn post data ready?" , default = False )
340+
341+ if not has_data :
342+ # No data — set up with samples and demo scoring
343+ console .print ()
344+ console .print (" No worries! Here's how to get it:" )
345+ console .print ()
346+ console .print (" [bold cyan]1.[/bold cyan] Go to the Apify LinkedIn Post Scraper (free tier available):" )
347+ console .print (" [dim]https://console.apify.com/actors/RE0MriXnFhR3IgVnJ/input[/dim]" )
348+ console .print ()
349+ console .print (" [bold cyan]2.[/bold cyan] Run the scraper on your profile" )
350+ console .print ()
351+ console .print (" [bold cyan]3.[/bold cyan] Download the JSON dataset and save it here, then run:" )
352+ console .print (" [dim]midas init --data apify_dataset.json[/dim]" )
353+ console .print ()
354+ console .print (" In the meantime, let's set up with sample data so you can see how MIDAS works." )
355+ console .print ()
356+
357+ _copy_sample_files (target )
358+ _interactive_score_demo (target )
359+
360+ console .print (" Your formula is working. Once you have your real data:" )
361+ console .print (" [dim]midas init --data your_posts.json[/dim]" )
362+ console .print ()
363+ else :
364+ # User has data — walk them through import
365+ console .print ()
366+ fmt_choice = click .prompt (
367+ "What format is your data in?\n "
368+ " [1] Apify JSON export\n "
369+ " [2] LinkedIn CSV export (Settings → Data privacy)\n "
370+ " [3] JSONL (already in MIDAS format)\n "
371+ " Choose" ,
372+ type = click .Choice (["1" , "2" , "3" ]),
373+ show_choices = False ,
374+ )
375+
376+ fmt_map = {"1" : "apify" , "2" : "csv" , "3" : "jsonl" }
377+ fmt = fmt_map [fmt_choice ]
378+
379+ file_path = click .prompt ("\n Path to your data file" , type = click .Path (exists = True ))
380+
381+ posts = _parse_data_file (file_path , fmt )
382+ if not posts :
383+ console .print ("[red]No posts found in the file.[/red]" )
384+ sys .exit (1 )
385+
386+ data_path = target / "posts.jsonl"
387+ save_jsonl (posts , str (data_path ))
388+ console .print (f" [green]Parsed { len (posts )} posts[/green] → { data_path } " )
389+
390+ if fmt == "csv" :
391+ console .print ()
392+ console .print (" [yellow]Note:[/yellow] LinkedIn CSV exports don't include engagement metrics." )
393+ console .print (" You'll need to add reactions/comments/reposts manually or via the LinkedIn API." )
394+ console .print ()
395+
396+ # Analyze
397+ console .print ()
398+ console .print ("[bold]Analyzing your posts...[/bold]" )
399+ result = analyze_file (str (data_path ))
400+ sig_count = sum (1 for s in result .signals if s .significant )
401+
402+ config_path = target / "midas_config.yaml"
403+ export_config (result , str (config_path ))
404+
405+ console .print (f" Posts analyzed: [bold]{ result .total_posts } [/bold]" )
406+ console .print (f" Signals found: [bold]{ len (result .signals )} [/bold] ({ sig_count } statistically significant)" )
407+ console .print (f" [green]Config saved to { config_path } [/green]" )
408+
409+ # Validate
410+ console .print ()
411+ console .print ("[bold]Validating your formula...[/bold]" )
412+ from .validate import validate as validate_fn
413+ import json as _json
414+
415+ loaded_posts = []
416+ with open (data_path ) as f :
417+ for line in f :
418+ line = line .strip ()
419+ if line :
420+ loaded_posts .append (_json .loads (line ))
421+
422+ if len (loaded_posts ) >= 5 :
423+ cfg = load_config (str (config_path ))
424+ val_result = validate_fn (loaded_posts , cfg )
425+
426+ color = "green" if val_result .spearman_rho > 0.3 else "yellow" if val_result .spearman_rho > 0 else "red"
427+ strength = val_result .correlation_strength .upper ()
428+ sig_str = ", SIGNIFICANT" if val_result .is_significant else ""
429+ console .print (f" Spearman rho: [{ color } ]{ val_result .spearman_rho :+.2f} [/{ color } ] ({ strength } { sig_str } )" )
430+
431+ if val_result .spearman_rho > 0 and val_result .is_significant :
432+ console .print (" [green]Your formula predicts engagement![/green]" )
433+ else :
434+ console .print (" [dim]Not enough posts to validate (need at least 5).[/dim]" )
435+
436+ console .print ()
437+ console .print ("[bold]You're all set.[/bold] Try scoring a draft:" )
438+ console .print (' [dim]midas score "Your draft here..."[/dim]' )
439+ console .print ()
440+
441+
184442@main .command ()
185443@click .argument ("data_path" , type = click .Path (exists = True ))
186444@click .option ("--output" , "-o" , default = "midas_config.yaml" , help = "Output config path" )
@@ -263,6 +521,13 @@ def validate(data_path: str, config: str | None, holdout: int, min_frequency: fl
263521
264522 if holdout > 0 :
265523 # K-fold cross-validation
524+ min_posts = holdout * 5
525+ if len (posts ) < min_posts :
526+ console .print (
527+ f"[red]Need at least { min_posts } posts for { holdout } -fold CV. "
528+ f"Got { len (posts )} .[/red]"
529+ )
530+ sys .exit (1 )
266531 console .print (f" Running { holdout } -fold holdout validation...\n " )
267532 cv_result = holdout_validate (posts , n_splits = holdout , min_frequency = min_frequency )
268533
0 commit comments