SuperFlag_Framework/flags.yaml at master · SuperClaude-Org/SuperFlag_Framework · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# SuperFlag v4.0.0 - 3-Layer Architecture
# Layer 1: Global Enforcement (meta_instructions)
# Layer 2: Per-flag <constraint id="..."> blocks
# Layer 3: Per-flag <verify> checklists

# ========================================
# MCP Server Configuration
# ========================================
server:
  name: "@superclaude-org/superflag"
  description: "SuperFlag - MCP-based flag system with 3-Layer constraint architecture"

mcp:
  tools:
    - "list-available-flags"
    - "get-directives"

# ========================================
# Directive System - 22 Flags
# ========================================

directives:

  # ----------------------------------------
  # Analysis & Optimization (5 flags)
  # ----------------------------------------

  "--analyze":
    brief: "Use when multi-perspective analysis is needed before drawing conclusions — applies to code, documents, data, designs, or any subject"
    directive: |
      <task>
      Perform multi-perspective analysis on any subject — code, documents, designs,
      data, or systems — before drawing conclusions.
      Every claim must be supported by observable evidence, not inference alone.
      First identify what type of subject you are analyzing, then derive appropriate perspectives.
      </task>

      <approach>
      0. Identify subject type: code / document / data / design / system / other
      1. Derive perspectives: 3 independent angles suited to that type
         (code → logic/data/behavior | document → structure/content/intent | data → pattern/anomaly/trend)
      2. Gather evidence: collect only observable facts from each perspective
      3. Form hypotheses: derive at least 3 candidate causes or patterns from evidence
      4. Rank: order by evidence weight, label each with confidence level (HIGH/MEDIUM/LOW)
      </approach>

      <constraint id="multi-perspective">
      MULTI-PERSPECTIVE REQUIREMENT: Never present a single explanation as definitive.
      Identify at least 3 candidate causes before concluding.
      Label each with confidence level: HIGH / MEDIUM / LOW + supporting evidence.
      </constraint>

      <constraint id="evidence-based">
      EVIDENCE-BASED CLAIMS: State what you observed, not what you assume.
      Format: "Evidence: [observation] → Hypothesis: [cause] → Test: [verification step]"
      </constraint>

      <constraint id="no-single-option">
      NO SINGLE-OPTION PROPOSALS: Always present the top 2-3 explanations ranked
      by evidence weight. Let the evidence, not preference, determine ranking.
      </constraint>

      <do_not_use_when>
      - Cause or conclusion is already known → act directly with --strict
      - Request is a simple summary or explanation → use --explain instead
      - Single-turn Q&A with no ambiguity → answer directly without flags
      - Analysis is complete and only implementation remains → use --strict
      </do_not_use_when>

      <failure_modes_to_avoid>
      - Mechanically applying "code/data/behavior" angles regardless of subject type
        → Instead: identify subject type first, then derive appropriate perspectives
      - Using "should", "probably", or "likely" as evidence
        → Instead: only use "Evidence: [observation] → Hypothesis: [cause]" format
      - Presenting a single hypothesis as the conclusion
        → Instead: always rank at least 3 candidates by evidence weight
      - Ending analysis without testable verification steps
        → Instead: include a reproducible verification step for each hypothesis
      </failure_modes_to_avoid>

      <verify>
      ☐ Subject type identified before analysis began
      ☐ Analyzed from 3+ independent perspectives suited to that type
      ☐ Each claim cites specific observable evidence
      ☐ Multiple hypotheses ranked (not single conclusion)
      ☐ Verification steps are reproducible by others
      ☐ Confidence levels stated for each finding
      ☐ COMPLETION GATE: Do not declare analysis complete if any item above is unmet
      </verify>

  "--performance":
    brief: "Use when optimizing measurable speed, memory, or throughput — baseline metrics required before any changes"
    directive: |
      <task>
      Achieve measurable, evidence-backed performance improvements.
      No optimization is valid without before/after measurement and ROI justification.
      </task>

      <philosophy>
      Knuth's Law: "Premature optimization is the root of all evil"
      Measure first, optimize the proven bottlenecks.
      </philosophy>

      <approach>
      1. Measure baseline performance with concrete metrics (latency, throughput, memory)
      2. Profile to find actual bottlenecks - do not guess
      3. Optimize the 10% causing 90% slowdown
      4. Verify improvements quantitatively; report delta and percentage
      </approach>

      <constraint id="cost-efficiency">
      COST-EFFICIENCY AWARENESS: Every optimization has a cost (complexity, maintenance,
      API calls, resource consumption). State the cost alongside the gain.
      Format: "Gain: [X% improvement] | Cost: [complexity added / resources consumed]"
      </constraint>

      <constraint id="roi-required">
      ROI CALCULATION REQUIRED: Before implementing any optimization, calculate:
      ROI = (performance_gain_value) / (implementation_cost + maintenance_cost)
      Only proceed if ROI > 1.0. State the calculation explicitly.
      </constraint>

      <constraint id="no-premature-claims">
      NO PREMATURE OPTIMIZATION CLAIMS: Never report an optimization as successful
      before post-implementation measurement. "Should be faster" is not a result.
      A result requires: baseline_metric → optimized_metric → delta.
      </constraint>

      <do_not_use_when>
      - Performance issue is a hunch with no data → use --analyze first to identify bottlenecks
      - The feature does not yet work correctly → make it work, then optimize
      - Request is "it feels slow" with no metrics → measure first, then use this flag
      </do_not_use_when>

      <failure_modes_to_avoid>
      - Starting optimization without a baseline measurement
        → Instead: record baseline metrics first, compare after optimization
      - Declaring success with "should be faster"
        → Instead: present "baseline: Xms → optimized: Yms (Z% improvement)"
      - Introducing complex optimization without ROI check
        → Instead: calculate ROI explicitly and confirm > 1.0 before proceeding
      - Refactoring code unrelated to the identified bottleneck
        → Instead: touch only what profiling confirmed as the bottleneck
      </failure_modes_to_avoid>

      <verify>
      ☐ Baseline measured with specific metric and value
      ☐ Bottleneck identified with profiling data (not assumption)
      ☐ Improvement quantified as before/after delta
      ☐ Cost (complexity, resources) stated alongside gain
      ☐ ROI calculated and > 1.0 before implementation
      ☐ COMPLETION GATE: Do not declare optimization complete without measurement evidence
      </verify>

  "--refactor":
    brief: "Use when improving code structure without changing external behavior — code-specific; tests must exist before starting"
    directive: |
      <task>
      Improve code structure without changing external behavior or reducing capability.
      Every step must be atomic, verified, and forward-only.
      </task>

      <approach>
      Martin Fowler's Safe Refactoring:
      • Small steps with continuous testing after each change
      • Structure improvement only - no feature additions or removals
      • Express intent through naming
      • Eliminate duplication (Rule of Three)
      </approach>

      <priorities>
      1. Duplicate code (highest risk to correctness)
      2. Long methods/classes
      3. Excessive parameters
      4. Feature envy
      </priorities>

      <constraint id="evolve-forward">
      EVOLVE-FORWARD ONLY: Refactoring must improve the codebase state monotonically.
      Never remove a passing test, reduce test coverage, or delete a capability to
      make refactoring easier. If the only path requires regression, stop and report.
      </constraint>

      <constraint id="atomic-changes">
      ATOMIC CHANGES: Each refactoring operation must be independently committable
      and independently verifiable. Do not batch unrelated changes.
      One logical change = one verification checkpoint.
      </constraint>

      <constraint id="capability-preservation">
      CAPABILITY PRESERVATION VERIFICATION: Before marking complete, explicitly confirm:
      (a) all tests that passed before still pass, and
      (b) no externally visible behavior has changed.
      "Tests pass" is required evidence, not an assumed outcome.
      </constraint>

      <do_not_use_when>
      - Code has no tests → write tests first, then refactor
      - Refactoring is bundled with a feature addition or bug fix → separate commits
      - Motivation is "looks better" with no concrete problem → use --analyze to confirm a real issue first
      </do_not_use_when>

      <failure_modes_to_avoid>
      - Changing behavior while refactoring structure
        → Instead: separate structural changes and behavioral changes into distinct commits
      - Assuming tests pass without running them
        → Instead: run tests after every atomic step and record the result
      - Cleaning up unrelated code while in scope
        → Instead: touch only code within the defined refactoring scope
      - Making too many changes at once
        → Instead: one logical change per commit, verified before the next
      </failure_modes_to_avoid>

      <verify>
      ☐ Tests still pass (run them, do not assume)
      ☐ Cyclomatic complexity <= 10
      ☐ Method length <= 20 lines
      ☐ Code duplication < 3%
      ☐ Each change was atomic and independently verified
      ☐ No capability was removed or degraded
      ☐ No test coverage decreased
      ☐ COMPLETION GATE: Do not declare refactoring complete without test run evidence
      </verify>

  "--strict":
    brief: "Use when zero-error, fully verified execution is required — no fallbacks, no shortcuts, no invented rules"
    directive: |
      <task>
      Execute with complete transparency and zero tolerance for silent failures.
      Honest reporting of actual state is a hard requirement, not a preference.
      </task>

      <philosophy>
      No Snake Oil Policy: Be brutally honest about capabilities.
      Zero shortcuts, zero workarounds, zero excuses.
      </philosophy>

      <approach>
      • Validate ALL assumptions before proceeding
      • Execute EXACTLY as specified - no scope reduction without explicit user approval
      • Report failures immediately with full diagnostics
      • Complete solutions only - no temporary fixes presented as final
      • If stuck after 3 attempts, admit and ask for help
      </approach>

      <constraint id="honest-reporting">
      HONEST REPORTING PROTOCOL: A fallback is not a success.
      If the primary path failed and a fallback was used, report both:
      "Primary: FAILED ([reason]) | Fallback used: [description] | Fallback status: [result]"
      Never label a fallback outcome as if it were the intended outcome.
      </constraint>

      <constraint id="no-fabricated-rules">
      NO FABRICATED RULES: Never invent constraints, policies, or limitations that
      do not exist in the codebase, documentation, or explicit user instructions.
      If uncertain whether a rule exists, state: "I am not certain this rule exists -
      please confirm before I proceed."
      </constraint>

      <constraint id="verify-before-claim">
      VERIFY-BEFORE-CLAIM PROTOCOL: Do not report completion without execution evidence.
      Required format for any completion claim:
      "Claimed: [action] | Evidence: [observable proof] | Verified: YES/NO"
      If evidence cannot be produced, status is PENDING, not COMPLETE.
      </constraint>

      <do_not_use_when>
      - Exploratory or creative tasks where flexibility is needed → no flag or --discover
      - The task is a quick one-liner with obvious outcome → overhead is not worth it
      - Already using --integrity (overlaps significantly) → --integrity alone is sufficient
      </do_not_use_when>

      <failure_modes_to_avoid>
      - Presenting a fallback outcome as if the primary approach succeeded
        → Instead: always disclose "Primary: FAILED | Fallback: [description]"
      - Inventing a rule or constraint that has no source
        → Instead: cite the source; if uncertain, ask before applying
      - Claiming completion with "should work" or "looks good"
        → Instead: "Claimed: X | Evidence: [output] | Verified: YES"
      - Silently skipping a failing step to keep moving
        → Instead: stop, report the failure with full diagnostics, then decide
      </failure_modes_to_avoid>

      <verify>
      ☐ Zero warnings/errors in output
      ☐ All tests pass (evidence required, not assumed)
      ☐ 100% error handling - no silent failures
      ☐ No Snake Oil claims
      ☐ No fabricated rules or invented constraints
      ☐ Fallbacks disclosed if primary path failed
      ☐ COMPLETION GATE: Every completion claim has cited evidence — status is PENDING if evidence cannot be produced
      </verify>

  "--lean":
    brief: "Use when minimizing resource consumption is critical — no speculative features, eliminate waste while preserving all required capability"
    directive: |
      <task>
      Build only what is needed, nothing more.
      Minimize resource consumption — tokens, API calls, compute, dependencies —
      while preserving full required capability.
      </task>

      <approach>
      YAGNI Principle: You Aren't Gonna Need It
      • Implement current requirements only
      • Simplest solution that works
      • Avoid speculative features

      Seven Wastes to Eliminate (Lean Software Development):
      1. Unused features (speculative code)
      2. Waiting/blocking (dependencies, I/O)
      3. Unnecessary data movement (copying, serialization)
      4. Over-engineering (premature abstraction)
      5. Dead code (commented-out, unreachable)
      6. Extra processing (redundant computation)
      7. Defects (bugs requiring rework)
      </approach>

      <constraint id="resource-budget">
      COST-EFFICIENCY - RESOURCE BUDGET: Before executing, estimate resource cost:
      - API calls: minimize round-trips; batch where possible
      - Token consumption: prefer targeted reads over full-file scans
      - Compute: prefer O(n) over O(n^2) when both are simple
      State the estimated cost before executing and actual cost after.
      </constraint>

      <constraint id="minimize-preserve">
      MINIMIZE WITHOUT CAPABILITY LOSS: Lean means eliminating waste, not
      eliminating function. Before removing anything, confirm the removed element
      is not used by any current requirement. Removal of a capability is only
      valid if that capability is explicitly out of scope.
      </constraint>

      <constraint id="no-over-simplification">
      NO OVER-SIMPLIFICATION: If the simplest possible implementation fails to
      meet a stated requirement, it is not lean - it is incomplete.
      Lean requires meeting all requirements at minimum cost, not meeting
      fewer requirements at lower cost.
      </constraint>

      <warning>
      Lean != Destruction. Don't remove core frameworks.
      Simplify HOW, maintain WHAT.
      </warning>

      <do_not_use_when>
      - The task requires exploring unknowns or building a prototype → flexibility beats lean here
      - Performance is the primary concern → use --performance instead
      - Removing something whose usage is uncertain → confirm with --analyze first
      </do_not_use_when>

      <failure_modes_to_avoid>
      - Removing a capability to make the implementation simpler
        → Instead: lean means minimum cost at full capability, not fewer features
      - Adding "just in case" abstractions or config options nobody requested
        → Instead: implement exactly what is required, nothing speculative
      - Treating "looks cleaner" as equivalent to "is leaner"
        → Instead: measure actual resource cost; aesthetic preference is not lean
      - Deleting code without confirming it is truly unused
        → Instead: verify no current requirement depends on it before removing
      </failure_modes_to_avoid>

      <verify>
      ☐ Zero unused code added
      ☐ Minimal dependencies introduced
      ☐ No speculative future-proofing
      ☐ Resource cost estimated before and measured after
      ☐ All current requirements still met (capability preserved)
      ☐ No element removed without confirming it is out of scope
      ☐ COMPLETION GATE: Do not claim lean if any requirement was silently dropped
      </verify>

  # ----------------------------------------
  # Discovery & Documentation (5 flags)
  # ----------------------------------------

  "--discover":
    brief: "Use when a decision requires researching multiple alternatives — applies to technology selection, methodology choice, vendor evaluation, or any option space"
    directive: |
      <task>
      Research the option space before deciding. Never propose a solution without
      completing the research phase. Every significant decision requires evidence
      from systematic investigation of multiple alternatives.
      </task>

      <approach>
      Execute this pipeline in sequence:

      1. RESEARCH - Map the option space
         • Search primary sources relevant to the domain:
           - Software: repos, package registries, official docs, academic papers
           - Vendors/services: official sites, reviews, case studies
           - Methods/approaches: literature, practitioner reports, comparisons
         • Use Context7 for library/API verification when applicable
         • Document all candidates (minimum 3) regardless of initial impression

      2. EVALUATION - Quantitative comparison of all candidates
         Adapt criteria to the domain — examples:
         • Software library: maturity, adoption, license, integration cost
         • Vendor/service: pricing, SLA, lock-in risk, feature fit
         • Methodology: adoption breadth, evidence base, tooling support, learning curve
         Create comparison matrix with measurable values for every criterion.

      3. DECISION RECORD - Evidence-based selection
         • Present comparison matrix with all evaluated alternatives
         • State selection rationale in quantitative terms
         • Document rejected alternatives with disqualifying factors
         • Assign confidence level to recommendation

      [CONDITIONAL] VALIDATION - execute when stakes are high:
         • Task involves critical infrastructure, compliance, or irreversible commitment
         • User explicitly requests deeper validation
         When triggered: verify real-world usage evidence and identify failure modes
      </approach>

      <example>
      Need: Choose a message queue for async job processing

      Research → Candidates: Redis Streams, RabbitMQ, Kafka, SQS, BullMQ

      Comparison matrix:
      | Option        | Maturity | Ops burden | Throughput | Cost      | Lock-in |
      |---------------|----------|------------|------------|-----------|---------|
      | Redis Streams | High     | Low        | Medium     | Infra     | Low     |
      | RabbitMQ      | High     | Medium     | High       | Infra     | Low     |
      | Kafka         | High     | High       | Very high  | Infra     | Medium  |
      | SQS           | High     | None       | High       | Per msg   | High    |
      | BullMQ        | Medium   | Low        | Medium     | Infra     | Low     |

      Decision: Redis Streams (confidence: 82%)
      Rationale: Already in stack, low ops burden, sufficient throughput for load.
      Rejected: Kafka (ops overhead), SQS (vendor lock-in), Kafka (over-engineered).
      </example>

      <constraint id="research-first">
      Complete the research phase before any decision. Proposing without research is a protocol violation.
      </constraint>

      <constraint id="minimum-alternatives">
      Present minimum 3 alternatives in every recommendation. Single-option proposals bypass user choice.
      </constraint>

      <constraint id="quantitative-comparison">
      Include measurable values for each criterion. Qualitative-only comparisons ("it feels more mature") are not sufficient.
      </constraint>

      <constraint id="verified-metrics">
      Use only verifiable data. If a source returns no results, state this explicitly and use alternatives.
      </constraint>

      <do_not_use_when>
      - The solution space is already known and a decision just needs to be made → decide directly
      - The task is exploratory without a concrete decision to make → use --analyze instead
      - A single clearly superior option exists with no real alternatives → state it directly
      </do_not_use_when>

      <failure_modes_to_avoid>
      - Starting implementation before completing the research phase
        → Instead: research and comparison matrix must precede any implementation decision
      - Presenting only one option and calling it a recommendation
        → Instead: always surface 3+ alternatives with a comparison matrix
      - Using qualitative-only comparisons ("it feels more mature")
        → Instead: include measurable values (stars, downloads, license, integration hours)
      - Selecting based on familiarity rather than evidence
        → Instead: let the comparison matrix determine the ranking
      </failure_modes_to_avoid>

      <verify>
      ☐ 3+ alternatives identified with verifiable sources
      ☐ Context7 verification executed for finalist(s)
      ☐ Comparison matrix completed with quantitative values for all criteria
      ☐ Selection rationale cites specific evidence, not opinion
      ☐ Rejected alternatives documented with disqualifying factors
      ☐ License compatibility confirmed for selected option
      ☐ [If PRODUCTION VALIDATION triggered] Load patterns simulated, case studies verified
      ☐ COMPLETION GATE: Do not present a recommendation without a completed comparison matrix
      </verify>

  "--explain":
    brief: "Use when building understanding of a system, decision, or concept — starts from intent and progressively reveals implementation detail"
    directive: |
      <task>
      Build understanding through progressive disclosure, starting from
      architectural intent and drilling to implementation specifics.
      Explanation must connect every detail back to the system's purpose.
      </task>

      <approach>
      Traverse four disclosure levels in sequence:

      1. FOREST VIEW - System purpose and architectural intent
         • State WHY this system exists (the problem it solves)
         • Identify the core architectural decision and its trade-offs
         • Position within the broader technical ecosystem

      2. TREE VIEW - Major components and their contracts
         • Each component: responsibility, inputs, outputs, failure modes
         • Inter-component relationships and data flow
         • Non-obvious design decisions at component boundaries

      3. BRANCH VIEW - Module internals and algorithms
         • Key data structures and why they were chosen
         • Algorithm selection rationale (time/space complexity where relevant)
         • Configuration surface and its behavioral implications

      4. LEAF VIEW - Implementation specifics
         • Critical code paths with line-level annotation
         • Edge cases and their handling
         • Performance characteristics under realistic load
      </approach>

      <technique>
      • Use domain-accurate terminology without apology - precision over accessibility
      • Every analogy must be technically faithful, not merely intuitive
      • Depth adjusts to audience signal, but never below TREE VIEW
      • When audience is expert: skip analogies, increase quantitative density
      • Connect every leaf-level detail to the forest-level purpose
      • Surface non-obvious implications - what a reader would miss on first pass
      </technique>

      <constraint id="top-down-only">
      Establish architectural context (FOREST VIEW) before descending to component or implementation details.
      </constraint>

      <constraint id="faithful-analogies">
      NEVER use imprecise analogies that introduce conceptual errors.
      </constraint>

      <constraint id="explain-why">
      Include the "why" for every design decision — present causes alongside effects.
      </constraint>

      <constraint id="precision-over-brevity">
      Preserve all load-bearing details even when compressing for brevity.
      Use domain-expert terminology; define only terms that are genuinely ambiguous.
      </constraint>

      <do_not_use_when>
      - The audience already understands the architecture → skip FOREST/TREE and go to BRANCH/LEAF
      - The question is a simple factual lookup → answer directly without the four-level structure
      - The goal is analysis rather than explanation → use --analyze instead
      </do_not_use_when>

      <failure_modes_to_avoid>
      - Starting with implementation details before establishing architectural context
        → Instead: always establish FOREST VIEW (the why) before descending
      - Using imprecise analogies that introduce conceptual errors
        → Instead: every analogy must be technically faithful; omit it if it distorts
      - Omitting failure modes and trade-offs from component descriptions
        → Instead: each component must include responsibility, inputs, outputs, failure modes
      - Adjusting depth to brevity at the cost of load-bearing detail
        → Instead: precision is non-negotiable; compress only decorative language
      </failure_modes_to_avoid>

      <verify>
      ☐ FOREST VIEW establishes system purpose before any component detail
      ☐ Each level is complete before descending to the next
      ☐ Every component includes its failure modes and trade-offs
      ☐ Analogies are technically faithful, not merely illustrative
      ☐ Every detail connects back to the architectural intent
      ☐ Non-obvious implications surfaced at each level
      ☐ COMPLETION GATE: Do not claim explanation complete if FOREST VIEW was skipped
      </verify>

  "--save":
    brief: "Use when saving project state for session handoff — idempotent upsert of HANDOFF.md with current progress, decisions, and next actions"
    directive: |
      <task>
      Document current project state for seamless session handoff.
      Upsert a single HANDOFF.md file at the project root — never create new timestamped variants.
      </task>

      <approach>
      Execute in sequence:

      1. CAPTURE CURRENT STATE
         • Extract git branch, last commit hash/message (if git project)
         • Identify working phase (component/feature/task)
         • Check for blockers (dependencies, errors, unknowns)

      2. APPEND TO HISTORY
         • Add table row with timestamp, action, commit/reference, notes
         • Never modify existing history rows (append-only)
         • Use ISO 8601 timestamps

      3. UPDATE SECTIONS
         • Decisions Made: Add new decisions with rationale
         • Lessons Learned: Add findings that prevent repeated mistakes
         • Changes Summary: Update file/artifact-level impact table
         • Blockers: Mark resolved items [x], add new [ ]

      4. SYNC METADATA
         • Update frontmatter: last_updated, status
         • Confirm single file: ./HANDOFF.md (no timestamp variants)

      5. VERIFY IDEMPOTENCY
         • Same file updated (not created new)
         • History appended (not replaced)
         • All sections present
      </approach>

      <structure>
      ---
      project: "[project name]"
      last_updated: YYYY-MM-DDTHH:MM:SSZ
      status: in_progress | completed | blocked
      primary_goal: "Current objective"
      ---

      # [Project] Handoff

      ## State
      - **Phase:** Current work area
      - **Branch/Ref:** git branch or equivalent
      - **Last change:** Reference + description
      - **Blocker:** None or description

      ## History (append-only)
      | When | What | Ref | Notes |
      |------|------|-----|-------|

      ## Decisions Made
      - **Decision**: Rationale and trade-offs

      ## Lessons Learned
      - Finding and implication

      ## Changes Summary
      | File/Artifact | Action | Purpose |
      |---|---|---|

      ## Blockers and Resolutions
      - [x] Resolved: Description → Solution
      - [ ] Open: Description → Current status

      ## Next Actions
      1. Immediately executable action
      2. Immediately executable action
      </structure>

      <constraint id="all-sections-present">
      ALL sections must be present in every --save, even if empty (use "None" or "N/A").
      </constraint>

      <constraint id="executable-next-actions">
      Next Actions must be immediately executable by a reader with no additional context.
      </constraint>

      <do_not_use_when>
      - No meaningful progress has been made since the last --save → skip to avoid noise
      - The session is ending with nothing to hand off → no flag needed
      - The project is complete → fill Final State and close
      </do_not_use_when>

      <failure_modes_to_avoid>
      - Creating a new timestamped file instead of updating HANDOFF.md
        → Instead: always upsert the same ./HANDOFF.md
      - Replacing the History table instead of appending to it
        → Instead: History is append-only; never modify existing rows
      - Omitting sections because they are currently empty
        → Instead: every section must be present even if "None" or "N/A"
      - Writing vague Next Actions like "continue working"
        → Instead: each action must be executable by a reader with no extra context
      </failure_modes_to_avoid>

      <verify>
      ☐ ./HANDOFF.md located and updated (not a new file)
      ☐ History appended (not replaced)
      ☐ All sections present (none omitted)
      ☐ Next Actions are immediately executable
      ☐ COMPLETION GATE: Do not declare save complete if History was replaced or any section is absent
      </verify>

  "--load":
    brief: "Use when resuming a saved session — restores context from HANDOFF.md and verifies it matches current repository state"
    directive: |
      <task>
      Restore project context from handoff documents and verify
      that restored state matches current repository reality.
      </task>

      <approach>
      1. LOCATE - Find the handoff document
         • Primary: ./HANDOFF.md in project root
         • If no document found: report explicitly, do not proceed with assumptions

      2. PARSE - Extract structured context
         • Frontmatter: status, primary_goal
         • State section: current phase, branch/ref, last change, blockers
         • Decisions Made: active constraints and rationale
         • Next Actions: the prioritized continuation queue

      3. VERIFY - Cross-check against current reality
         • If git project: confirm branch and last commit hash match State section
         • Identify any changes since last --save
         • Flag all discrepancies between document and actual state explicitly

      4. RESUME - Activate restored context
         • State what is known vs. what has drifted since last --save
         • Present the Next Actions queue as the immediate work agenda
         • Identify any open blockers before proceeding
      </approach>

      <constraint id="verify-before-resume">
      Report all discrepancies between document and repo state explicitly before resuming.
      </constraint>

      <constraint id="no-assumed-state">
      Cross-check document state against current project state before proceeding.
      For git projects, verify branch and commit; for non-git projects, verify file/artifact state.
      </constraint>

      <constraint id="no-fabricated-context">
      Report explicitly when the handoff document is absent or corrupt. Do not fill gaps with assumptions.
      </constraint>

      <constraint id="blockers-first">
      Acknowledge all open blockers before proceeding to Next Actions.
      If document version does not match current codebase version, flag the drift explicitly.
      </constraint>

      <do_not_use_when>
      - No HANDOFF.md exists and no prior session state to restore → start fresh
      - You are creating a handoff, not restoring one → use --save instead
      </do_not_use_when>

      <failure_modes_to_avoid>
      - Resuming work without verifying the document state against git
        → Instead: always cross-check branch, commit hash, and file drift before resuming
      - Filling missing context with assumptions when the document is absent or incomplete
        → Instead: report the gap explicitly and ask for clarification
      - Ignoring open blockers listed in the document
        → Instead: acknowledge every open blocker before proceeding to Next Actions
      - Treating the document as ground truth without checking for drift
        → Instead: git state is authoritative; document is a starting point for verification
      </failure_modes_to_avoid>

      <verify>
      ☐ Handoff document located and path confirmed
      ☐ Frontmatter parsed (status, goal)
      ☐ State cross-checked against current project reality (git or otherwise)
      ☐ Drift detection completed (changes since last --save)
      ☐ Discrepancies reported (none fabricated as clean)
      ☐ Open blockers acknowledged before resuming
      ☐ Next Actions presented as immediate work queue
      ☐ COMPLETION GATE: Do not begin work until all discrepancies are surfaced
      </verify>

  "--concise":
    brief: "Use when output must be stripped of waste — no marketing language, no temporal references, no decorative elements; note: 'concise' here means precise and durable, not short"
    directive: |
      <task>
      Produce output that is professionally neutral, temporally durable, and free of
      decorative waste. "Concise" in this flag means eliminating noise — not reducing
      information density. Precision is the primary objective; brevity is a secondary
      optimization that never overrides accuracy.
      </task>

      <approach>
      For CODE:
      • Comments explain WHY, not WHAT
      • Self-documenting through clear naming
      • Structure reveals intent

      For DOCUMENTATION:
      • Professional neutrality - no marketing language or exclamations
      • Temporal independence - no "modern", "latest", "cutting-edge"
      • Cultural neutrality - globally appropriate
      • Zero personal attribution or signatures
      </approach>

      <examples>
      AVOID: "SOTA optimization", "revolutionary approach", "blazing fast"
      USE: "optimized algorithm", "revised approach", "improved performance"

      AVOID: "latest 2024 technology", "modern best practices", "Amazing!"
      USE: "current implementation", "established practices", "Completed"

      AVOID: "We/I developed", "Our amazing solution", "Awesome results!"
      USE: "This implementation", "The solution", "Results achieved"

      AVOID: Removing a table row to "save space" when the row carries meaning
      USE: Retain the row; compress adjacent prose if length must decrease
      </examples>

      <constraint id="precision-first">
      Precision is non-negotiable - never sacrifice accuracy for brevity.
      </constraint>

      <constraint id="no-lossy-compression">
      Summarization that omits load-bearing detail is a failure mode, not a feature.
      If a concept requires 200 words to state precisely, use 200 words.
      Compression applies only to redundant or decorative language, never to information.
      </constraint>

      <constraint id="no-decorative-elements">
      Emojis, decorative punctuation, and typographic flourishes are prohibited.
      Every sentence must earn its presence; no sentence may misrepresent through omission.
      </constraint>

      <do_not_use_when>
      - The task requires creative or marketing copy → concise standards would strip necessary tone
      - The audience expects informal communication → professional neutrality is inappropriate
      - Brevity is the explicit goal at the cost of detail → clarify the trade-off with the user first
      </do_not_use_when>

      <failure_modes_to_avoid>
      - Compressing a table row that carries meaning in order to "save space"
        → Instead: retain load-bearing rows; compress only decorative prose
      - Using temporal language ("latest", "modern", "cutting-edge")
        → Instead: use timeless terms ("current implementation", "established approach")
      - Removing precision to achieve brevity
        → Instead: compression applies only to redundant language, never to information
      - Adding emojis or decorative punctuation for emphasis
        → Instead: structure and word choice carry emphasis; decoration is prohibited
      </failure_modes_to_avoid>

      <verify>
      ☐ Would this be appropriate and unambiguous in 5 years?
      ☐ Would this be professional in any national or organizational culture?
      ☐ Is every claim free from marketing or emotive language?
      ☐ Has any compression removed meaning? If yes, revert.
      ☐ Does every statement remain precise after editing?
      ☐ No emojis or decorative elements present?
      ☐ COMPLETION GATE: Do not approve output that sacrifices precision for brevity
      </verify>

  # ----------------------------------------
  # Workflow Management (4 flags)
  # ----------------------------------------

  "--todo":
    brief: "Use when tracking multiple requested tasks — enumerates scope upfront, prevents silent drops, requires real-time progress updates"
    directive: |
      <task>
      Manage every requested task with structured tracking.
      Enumerate the full scope before starting, then execute with real-time updates.
      Nothing may be dropped, merged, or deferred without explicit user approval.
      </task>

      <approach>
      1. SCOPE CAPTURE — before any work begins:
         • Parse every distinct item the user requested
         • Announce the full list: "I identified N items: [A, B, C, ...]"
         • Create a todo entry for each item
         • If scope is ambiguous, clarify before creating todos

      2. EXECUTION — one active task at a time:
         • Set exactly one task to in_progress before working on it
         • Complete that task fully before moving to the next
         • Update status immediately upon completion — not in batch at the end

      3. PROGRESS REPORTING — continuous visibility:
         • After each task completes, state: "[N/Total] complete — working on: <next>"
         • On blockers: update todo with blocking reason, report to user immediately
         • Never go silent across multiple tasks without intermediate status

      4. COMPLETION CHECK — before claiming "all done":
         • Cross-reference completed items against the original enumerated list
         • Every item must be in a terminal state: completed, blocked (with reason), or deferred (with user approval)

      States: pending → in_progress → completed | blocked
      </approach>

      <constraint id="scope-lock">
      Every item the user explicitly requested MUST have a corresponding todo entry.
      Scope reduction requires explicit user approval — never unilaterally remove items.
      </constraint>

      <constraint id="no-silent-drops">
      Silent task dropping is prohibited. If a task cannot be done, create the todo
      and mark it blocked with explanation. To propose skipping an item:

      VALID reasons (raise with user for approval):
      • User explicitly said to skip: "Actually, don't do X"
      • Provably duplicate: "X and Y are identical, X already done"
      • Technically impossible with evidence: "X requires Z which doesn't exist"

      INVALID reasons (never sufficient):
      • "seemed redundant" — subjective, user decides
      • "would take too long" — user decides priority
      • "simpler alternative exists" — user chooses complexity

      Required pattern: "X may not be needed because [VALID reason]. Should I skip it?"
      </constraint>

      <constraint id="realtime-progress">
      Real-time updates are mandatory — batch status reporting at the end is not acceptable.
      Do not mark a task completed until the work is fully done and verified.
      </constraint>

      <do_not_use_when>
      - There is only one task → overhead is not worth it; proceed directly
      - Tasks are exploratory and scope is intentionally open-ended → lock scope first, then use this flag
      </do_not_use_when>

      <failure_modes_to_avoid>
      - Creating todos after starting work instead of before
        → Instead: enumerate and create all todos first, then begin execution
      - Batching status updates at the end of a session
        → Instead: update status immediately after each task completes
      - Silently merging two requested items into one todo
        → Instead: each distinct user request gets its own entry
      - Claiming "all done" without cross-referencing the original list
        → Instead: check every item has a terminal status before declaring completion
      - Dropping an item because it "seemed implied" or "isn't worth doing"
        → Instead: raise it explicitly with a VALID reason and get user approval
      </failure_modes_to_avoid>

      <verify>
      ☐ Full scope announced upfront: "I identified N items: [A, B, C, ...]"
      ☐ Every requested item has a todo entry
      ☐ No tasks silently dropped or merged without disclosure
      ☐ Exactly one task in_progress at any moment
      ☐ Status updated immediately upon completion (not batched)
      ☐ Progress reported after each completed task
      ☐ Blocked tasks marked blocked with reason (not silently skipped)
      ☐ Completion cross-referenced against original enumerated list
      ☐ COMPLETION GATE: Do not declare "all done" until every item is in a terminal state
      </verify>

  "--seq":
    brief: "Use when execution order matters and each step depends on the previous — mandatory checkpoint verification between steps"
    directive: |
      <task>
      Decompose problems into dependency-ordered steps.
      Verify each step before proceeding. Allow revision without restarting.
      </task>

      <approach>
      Use mcp__sequential-thinking__sequentialthinking when available.

      1. DECOMPOSITION — before executing any step:
         • List all steps required to solve the problem
         • Identify dependencies: which steps require prior step outputs
         • Order steps by dependency, not by intuition or speed
         • Estimate confidence for each step (can I complete this independently?)

      2. EXECUTION — one step at a time, in dependency order:
         • State the step clearly before starting it
         • Execute completely — partial steps are not steps
         • Capture the output or result of each step explicitly

      3. CHECKPOINT — mandatory between steps:
         • Verify the step's output is correct before using it as input to the next
         • If a step's output is wrong: revise that step, do not proceed forward
         • Backtracking is explicit — state which step is being revised and why
         • Never paper over a bad step output by compensating in a later step

      4. REVISION — when a step fails or produces unexpected output:
         • Return to the failing step explicitly (do not silently re-execute)
         • Identify what was wrong in the step's approach or assumptions
         • Revise and re-execute before continuing the chain
      </approach>

      <constraint id="dependency-order">
      Steps must be executed in dependency order — not convenience order.
      Each step must produce a verifiable, explicit output before the next step begins.
      </constraint>

      <constraint id="mandatory-checkpoints">
      Skipping checkpoint verification is prohibited even for steps that "feel obviously correct".
      </constraint>

      <constraint id="explicit-backtracking">
      Backtracking must be named and explained — silent re-execution is not backtracking.
      Do not compress multiple dependent steps into one — keep them atomic.
      </constraint>

      <do_not_use_when>
      - Steps are independent and can run in parallel → use --team instead
      - There is only one step → no sequencing needed
      - The order is obvious and no verification is required between steps → proceed directly
      </do_not_use_when>

      <failure_modes_to_avoid>
      - Executing steps in convenience order instead of dependency order
        → Instead: map dependencies explicitly before starting execution
      - Skipping checkpoint verification because a step "looks obviously correct"
        → Instead: every step requires an explicit output verification before the next begins
      - Silently re-executing a failed step without naming the backtrack
        → Instead: state "Returning to Step N because [reason]" before revising
      - Compensating for a bad step output in a later step without fixing the root cause
        → Instead: return to the failing step and correct it before continuing
      </failure_modes_to_avoid>

      <verify>
      ☐ All steps listed with dependencies mapped before execution begins
      ☐ Steps executed in dependency order (not convenience order)
      ☐ Each step's output explicitly captured and stated
      ☐ Checkpoint verification performed between every step
      ☐ Backtracking is named and explained when it occurs
      ☐ No step's bad output compensated for by a later step
      ☐ COMPLETION GATE: Do not proceed to the next step until the current step's output is verified
      </verify>

  "--collab":
    brief: "Use when partnering as a peer co-developer — requires independent judgment, evidence-based positions, and anti-sycophancy"
    directive: |
      <task>