Skip to content

Commit ca1cf8c

Browse files
AJ Greenclaude
authored andcommitted
Replace static midas init with interactive onboarding flow
Add three paths to midas init: - --data flag: auto-detect format (Apify JSON/LinkedIn CSV/JSONL), parse, analyze, validate, and set up config in one command - Interactive TTY: prompt whether user has data, walk through format selection and file import, or set up samples with live scoring demo - Non-interactive fallback: create sample files + print steps (CI/scripts) Extract helpers for sample file lookup, data format detection, parsing, and interactive scoring demo. Add tests for all three paths. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent c48c39b commit ca1cf8c

2 files changed

Lines changed: 360 additions & 34 deletions

File tree

midas/cli.py

Lines changed: 299 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -108,64 +108,126 @@ def score_cmd(text: str | None, file: str | None, config: str | None):
108108
_render_score(result)
109109

110110

111-
@main.command()
112-
@click.option("--dir", "-d", default=".", help="Directory to initialize in")
113-
def init(dir: str):
114-
"""Set up MIDAS in your project — guided onboarding."""
111+
def _find_sample(name: str) -> Path | None:
112+
"""Locate a bundled sample file (package data or dev examples/)."""
113+
pkg_data = Path(__file__).parent / "data" / name
114+
if pkg_data.exists():
115+
return pkg_data
116+
dev_examples = Path(__file__).parent.parent / "examples" / name
117+
if dev_examples.exists():
118+
return dev_examples
119+
return None
120+
121+
122+
def _copy_sample_files(target: Path) -> tuple[Path | None, Path | None]:
123+
"""Copy sample config and data into target dir. Returns (config_path, data_path) or None if skipped."""
115124
import shutil
116125

117-
target = Path(dir)
118-
target.mkdir(parents=True, exist_ok=True)
119-
120-
console.print(Panel(
121-
"[bold]MIDAS[/bold] — Reverse-engineer your LinkedIn into a personalized scoring formula.",
122-
border_style="yellow",
123-
))
124-
console.print()
125-
126-
# Locate bundled sample files (package data or dev examples/)
127-
def _find_sample(name: str) -> Path | None:
128-
pkg_data = Path(__file__).parent / "data" / name
129-
if pkg_data.exists():
130-
return pkg_data
131-
dev_examples = Path(__file__).parent.parent / "examples" / name
132-
if dev_examples.exists():
133-
return dev_examples
134-
return None
135-
136-
# Copy sample config if none exists
137126
config_path = target / "midas_config.yaml"
127+
data_path = target / "posts.jsonl"
138128
sample_config = _find_sample("sample_config.yaml")
129+
sample_data = _find_sample("sample_data.jsonl")
130+
131+
created_config = None
132+
created_data = None
139133

140134
if config_path.exists():
141135
console.print(f" [dim]Config already exists:[/dim] {config_path}")
136+
created_config = config_path
142137
elif sample_config:
143138
shutil.copy(sample_config, config_path)
144139
console.print(f" [green]Created[/green] {config_path} (sample config)")
145-
else:
146-
console.print(" [yellow]No sample config found. Run `midas analyze` to generate one.[/yellow]")
147-
148-
# Copy sample data if none exists
149-
data_path = target / "posts.jsonl"
150-
sample_data = _find_sample("sample_data.jsonl")
140+
created_config = config_path
151141

152142
if data_path.exists():
153143
console.print(f" [dim]Data already exists:[/dim] {data_path}")
144+
created_data = data_path
154145
elif sample_data:
155146
shutil.copy(sample_data, data_path)
156147
console.print(f" [green]Created[/green] {data_path} (10 sample posts)")
148+
created_data = data_path
149+
150+
return created_config, created_data
151+
152+
153+
def _detect_data_format(path: str) -> str:
154+
"""Auto-detect data format: 'apify' (JSON array), 'csv' (LinkedIn CSV), or 'jsonl'."""
155+
filepath = Path(path)
156+
suffix = filepath.suffix.lower()
157+
158+
if suffix == ".csv":
159+
return "csv"
160+
161+
with open(filepath, encoding="utf-8") as f:
162+
first_char = f.read(1).strip()
163+
164+
if first_char == "[":
165+
return "apify"
166+
return "jsonl"
167+
168+
169+
def _parse_data_file(path: str, fmt: str) -> list[dict]:
170+
"""Parse a data file into MIDAS posts based on format."""
171+
from .export import parse_apify_posts, parse_linkedin_export, load_jsonl
157172

173+
if fmt == "apify":
174+
return parse_apify_posts(path)
175+
elif fmt == "csv":
176+
return parse_linkedin_export(path)
177+
else:
178+
return load_jsonl(path)
179+
180+
181+
def _interactive_score_demo(target: Path) -> None:
182+
"""Prompt user to score a post interactively."""
183+
config_path = target / "midas_config.yaml"
184+
if not config_path.exists():
185+
return
186+
187+
console.print()
188+
console.print("[bold]Let's try scoring a post![/bold]")
189+
console.print(" Paste a LinkedIn post below (or press Enter to use a sample):")
158190
console.print()
191+
192+
try:
193+
text = click.prompt("", default="", prompt_suffix=" > ", show_default=False)
194+
except (click.Abort, EOFError):
195+
return
196+
197+
if not text.strip():
198+
# Use a built-in sample
199+
text = (
200+
"I just spent 3 months building an AI agent from scratch.\n\n"
201+
"Everyone said to use a framework.\n\n"
202+
"But here's the thing → frameworks hide the complexity.\n\n"
203+
"They don't remove it.\n\n"
204+
"I learned more in those 3 months than in 2 years of using LangChain.\n\n"
205+
"Here's what actually matters:\n\n"
206+
"→ Prompt engineering is 80% of the work\n"
207+
"→ Memory management is harder than generation\n"
208+
"→ Error handling is where agents actually break\n"
209+
"→ Evaluation is still an unsolved problem\n\n"
210+
"The frameworks will catch up.\n\n"
211+
"But understanding the fundamentals won't go out of style.\n\n"
212+
"Comment AGENT if you've built from scratch too."
213+
)
214+
console.print(" [dim](Using sample post)[/dim]")
215+
216+
cfg = load_config(str(config_path))
217+
result = score(text.strip(), cfg)
218+
_render_score(result)
219+
220+
221+
def _print_static_next_steps(has_config: bool) -> None:
222+
"""Print the static next-steps text (non-interactive fallback)."""
159223
console.print("[bold]Next steps:[/bold]")
160224
console.print()
161225
console.print(" [bold cyan]1.[/bold cyan] Get your LinkedIn data (you need posts + engagement numbers):")
162226
console.print(" Use the [bold]Apify LinkedIn Post Scraper[/bold] (free tier available):")
163227
console.print(" [dim]https://console.apify.com/actors/RE0MriXnFhR3IgVnJ/input[/dim]")
164228
console.print()
165229
console.print(" Then convert to MIDAS format:")
166-
console.print(' [dim]python3 -c "from midas.export import parse_apify_posts, save_jsonl; save_jsonl(parse_apify_posts(\'apify_dataset.json\'), \'posts.jsonl\')"[/dim]')
167-
console.print()
168-
console.print(" [dim]Full guide: https://github.com/ajsai47/midas/blob/main/docs/01-export-your-data.md[/dim]")
230+
console.print(" [dim]midas init --data apify_dataset.json[/dim]")
169231
console.print()
170232
console.print(" [bold cyan]2.[/bold cyan] Analyze your posts to build your formula:")
171233
console.print(" [dim]midas analyze posts.jsonl -o midas_config.yaml[/dim]")
@@ -176,11 +238,207 @@ def _find_sample(name: str) -> Path | None:
176238
console.print(" [bold cyan]4.[/bold cyan] Validate that your formula predicts engagement:")
177239
console.print(" [dim]midas validate posts.jsonl --config midas_config.yaml[/dim]")
178240
console.print()
179-
if sample_config or config_path.exists():
241+
if has_config:
180242
console.print(" [dim]Tip: A sample config and data were created above — try steps 3-4 now to see it in action.[/dim]")
181243
console.print()
182244

183245

246+
@main.command()
247+
@click.option("--dir", "-d", default=".", help="Directory to initialize in")
248+
@click.option("--data", type=click.Path(exists=True), help="Path to your LinkedIn data file (auto-detects format)")
249+
def init(dir: str, data: str | None):
250+
"""Set up MIDAS in your project — guided onboarding."""
251+
from .export import save_jsonl
252+
from .analyze import analyze_file, export_config
253+
254+
target = Path(dir)
255+
target.mkdir(parents=True, exist_ok=True)
256+
257+
console.print(Panel(
258+
"[bold]MIDAS[/bold] — Reverse-engineer your LinkedIn into a\npersonalized scoring formula.",
259+
border_style="yellow",
260+
))
261+
console.print()
262+
263+
is_interactive = sys.stdin.isatty() and data is None
264+
265+
# ── Path A: User provided --data flag ──────────────────────────────
266+
if data:
267+
fmt = _detect_data_format(data)
268+
console.print(f" Detected format: [bold]{fmt}[/bold]")
269+
270+
posts = _parse_data_file(data, fmt)
271+
if not posts:
272+
console.print("[red]No posts found in the file.[/red]")
273+
sys.exit(1)
274+
275+
data_path = target / "posts.jsonl"
276+
save_jsonl(posts, str(data_path))
277+
console.print(f" [green]Parsed {len(posts)} posts[/green] → {data_path}")
278+
279+
if fmt == "csv":
280+
console.print()
281+
console.print(" [yellow]Note:[/yellow] LinkedIn CSV exports don't include engagement metrics.")
282+
console.print(" You'll need to add reactions/comments/reposts manually or via the LinkedIn API.")
283+
console.print()
284+
285+
# Analyze
286+
console.print()
287+
console.print("[bold]Analyzing your posts...[/bold]")
288+
result = analyze_file(str(data_path))
289+
sig_count = sum(1 for s in result.signals if s.significant)
290+
291+
config_path = target / "midas_config.yaml"
292+
export_config(result, str(config_path))
293+
294+
console.print(f" Posts analyzed: [bold]{result.total_posts}[/bold]")
295+
console.print(f" Signals found: [bold]{len(result.signals)}[/bold] ({sig_count} statistically significant)")
296+
console.print(f" [green]Config saved to {config_path}[/green]")
297+
298+
# Validate
299+
console.print()
300+
console.print("[bold]Validating your formula...[/bold]")
301+
from .validate import validate as validate_fn
302+
import json as _json
303+
304+
loaded_posts = []
305+
with open(data_path) as f:
306+
for line in f:
307+
line = line.strip()
308+
if line:
309+
loaded_posts.append(_json.loads(line))
310+
311+
if len(loaded_posts) >= 5:
312+
cfg = load_config(str(config_path))
313+
val_result = validate_fn(loaded_posts, cfg)
314+
315+
color = "green" if val_result.spearman_rho > 0.3 else "yellow" if val_result.spearman_rho > 0 else "red"
316+
strength = val_result.correlation_strength.upper()
317+
sig_str = ", SIGNIFICANT" if val_result.is_significant else ""
318+
console.print(f" Spearman rho: [{color}]{val_result.spearman_rho:+.2f}[/{color}] ({strength}{sig_str})")
319+
320+
if val_result.spearman_rho > 0 and val_result.is_significant:
321+
console.print(" [green]Your formula predicts engagement![/green]")
322+
else:
323+
console.print(" [dim]Not enough posts to validate (need at least 5).[/dim]")
324+
325+
console.print()
326+
console.print("[bold]You're all set.[/bold] Try scoring a draft:")
327+
console.print(' [dim]midas score "Your draft here..."[/dim]')
328+
console.print()
329+
return
330+
331+
# ── Path B: Non-interactive (piped stdin / CI) ─────────────────────
332+
if not is_interactive:
333+
created_config, _ = _copy_sample_files(target)
334+
console.print()
335+
_print_static_next_steps(created_config is not None)
336+
return
337+
338+
# ── Path C: Interactive onboarding ─────────────────────────────────
339+
has_data = click.confirm("Do you have your LinkedIn post data ready?", default=False)
340+
341+
if not has_data:
342+
# No data — set up with samples and demo scoring
343+
console.print()
344+
console.print(" No worries! Here's how to get it:")
345+
console.print()
346+
console.print(" [bold cyan]1.[/bold cyan] Go to the Apify LinkedIn Post Scraper (free tier available):")
347+
console.print(" [dim]https://console.apify.com/actors/RE0MriXnFhR3IgVnJ/input[/dim]")
348+
console.print()
349+
console.print(" [bold cyan]2.[/bold cyan] Run the scraper on your profile")
350+
console.print()
351+
console.print(" [bold cyan]3.[/bold cyan] Download the JSON dataset and save it here, then run:")
352+
console.print(" [dim]midas init --data apify_dataset.json[/dim]")
353+
console.print()
354+
console.print(" In the meantime, let's set up with sample data so you can see how MIDAS works.")
355+
console.print()
356+
357+
_copy_sample_files(target)
358+
_interactive_score_demo(target)
359+
360+
console.print(" Your formula is working. Once you have your real data:")
361+
console.print(" [dim]midas init --data your_posts.json[/dim]")
362+
console.print()
363+
else:
364+
# User has data — walk them through import
365+
console.print()
366+
fmt_choice = click.prompt(
367+
"What format is your data in?\n"
368+
" [1] Apify JSON export\n"
369+
" [2] LinkedIn CSV export (Settings → Data privacy)\n"
370+
" [3] JSONL (already in MIDAS format)\n"
371+
" Choose",
372+
type=click.Choice(["1", "2", "3"]),
373+
show_choices=False,
374+
)
375+
376+
fmt_map = {"1": "apify", "2": "csv", "3": "jsonl"}
377+
fmt = fmt_map[fmt_choice]
378+
379+
file_path = click.prompt("\nPath to your data file", type=click.Path(exists=True))
380+
381+
posts = _parse_data_file(file_path, fmt)
382+
if not posts:
383+
console.print("[red]No posts found in the file.[/red]")
384+
sys.exit(1)
385+
386+
data_path = target / "posts.jsonl"
387+
save_jsonl(posts, str(data_path))
388+
console.print(f" [green]Parsed {len(posts)} posts[/green] → {data_path}")
389+
390+
if fmt == "csv":
391+
console.print()
392+
console.print(" [yellow]Note:[/yellow] LinkedIn CSV exports don't include engagement metrics.")
393+
console.print(" You'll need to add reactions/comments/reposts manually or via the LinkedIn API.")
394+
console.print()
395+
396+
# Analyze
397+
console.print()
398+
console.print("[bold]Analyzing your posts...[/bold]")
399+
result = analyze_file(str(data_path))
400+
sig_count = sum(1 for s in result.signals if s.significant)
401+
402+
config_path = target / "midas_config.yaml"
403+
export_config(result, str(config_path))
404+
405+
console.print(f" Posts analyzed: [bold]{result.total_posts}[/bold]")
406+
console.print(f" Signals found: [bold]{len(result.signals)}[/bold] ({sig_count} statistically significant)")
407+
console.print(f" [green]Config saved to {config_path}[/green]")
408+
409+
# Validate
410+
console.print()
411+
console.print("[bold]Validating your formula...[/bold]")
412+
from .validate import validate as validate_fn
413+
import json as _json
414+
415+
loaded_posts = []
416+
with open(data_path) as f:
417+
for line in f:
418+
line = line.strip()
419+
if line:
420+
loaded_posts.append(_json.loads(line))
421+
422+
if len(loaded_posts) >= 5:
423+
cfg = load_config(str(config_path))
424+
val_result = validate_fn(loaded_posts, cfg)
425+
426+
color = "green" if val_result.spearman_rho > 0.3 else "yellow" if val_result.spearman_rho > 0 else "red"
427+
strength = val_result.correlation_strength.upper()
428+
sig_str = ", SIGNIFICANT" if val_result.is_significant else ""
429+
console.print(f" Spearman rho: [{color}]{val_result.spearman_rho:+.2f}[/{color}] ({strength}{sig_str})")
430+
431+
if val_result.spearman_rho > 0 and val_result.is_significant:
432+
console.print(" [green]Your formula predicts engagement![/green]")
433+
else:
434+
console.print(" [dim]Not enough posts to validate (need at least 5).[/dim]")
435+
436+
console.print()
437+
console.print("[bold]You're all set.[/bold] Try scoring a draft:")
438+
console.print(' [dim]midas score "Your draft here..."[/dim]')
439+
console.print()
440+
441+
184442
@main.command()
185443
@click.argument("data_path", type=click.Path(exists=True))
186444
@click.option("--output", "-o", default="midas_config.yaml", help="Output config path")
@@ -263,6 +521,13 @@ def validate(data_path: str, config: str | None, holdout: int, min_frequency: fl
263521

264522
if holdout > 0:
265523
# K-fold cross-validation
524+
min_posts = holdout * 5
525+
if len(posts) < min_posts:
526+
console.print(
527+
f"[red]Need at least {min_posts} posts for {holdout}-fold CV. "
528+
f"Got {len(posts)}.[/red]"
529+
)
530+
sys.exit(1)
266531
console.print(f" Running {holdout}-fold holdout validation...\n")
267532
cv_result = holdout_validate(posts, n_splits=holdout, min_frequency=min_frequency)
268533

0 commit comments

Comments
 (0)