Skip to content

Commit bc08f0e

Browse files
committed
Merge branch 'wl-CG-0MM33XZXJ0KC9H2V-golf-ai-fair-play' into main
2 parents 0409463 + 53cad38 commit bc08f0e

8 files changed

Lines changed: 677 additions & 130 deletions

File tree

example-games/golf/AiStrategy.ts

Lines changed: 179 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -4,44 +4,51 @@
44
* Provides:
55
* - AiStrategy interface: chooseAction(playerState, shared, rng)
66
* - RandomStrategy: uniformly random legal action
7-
* - GreedyStrategy: minimizes visible score after the move
7+
* - GreedyStrategy: minimizes visible score after the move (fair play)
88
* - AiPlayer: wrapper that binds a strategy and RNG
99
*
1010
* Uses shared AI module (`@ai`) for base types and utility functions.
11+
*
12+
* **Fair play guarantee**: All strategies operate on AI-visible state
13+
* projections that hide face-down cards and stock pile contents. The AI
14+
* cannot peek at hidden information; the information boundary is enforced
15+
* structurally by the type system.
1116
*/
1217

1318
import type { Card } from '../../src/card-system/Card';
14-
import type { GolfGrid } from './GolfGrid';
15-
import { createGolfGrid } from './GolfGrid';
16-
import type { GolfMove } from './GolfRules';
17-
import { applyMove } from './GolfRules';
18-
import { scoreGrid } from './GolfScoring';
19+
import type { GolfMove, DrawSource } from './GolfRules';
20+
import { scoreAiVisibleGrid, simulateAiMoveScore } from './GolfScoring';
1921
import type {
20-
GolfPlayerState,
21-
GolfSharedState,
22+
AiVisiblePlayerState,
23+
AiVisibleSharedState,
24+
AiVisibleGrid,
2225
GolfAction,
2326
} from './GolfGame';
24-
import { enumerateLegalMoves, enumerateDrawSources } from './GolfGame';
27+
import { enumerateAiLegalMoves, enumerateAiDrawSources } from './GolfGame';
2528
import type { AiStrategyBase } from '../../src/ai';
26-
import { AiPlayer as AiPlayerBase, pickRandom } from '../../src/ai';
29+
import { AiPlayer as AiPlayerBase, pickRandom, pickBest } from '../../src/ai';
2730

2831
// ── Strategy interface ──────────────────────────────────────
2932

3033
/**
31-
* An AI strategy chooses a GolfAction given the current state.
34+
* An AI strategy chooses a GolfAction given only AI-visible state.
35+
*
36+
* The strategy receives filtered state projections that hide face-down
37+
* cards and stock pile contents. This makes cheating structurally
38+
* impossible — the AI simply cannot access hidden data.
3239
*/
3340
export interface AiStrategy extends AiStrategyBase {
3441
/**
3542
* Choose an action (draw source + move) for the current player.
3643
*
37-
* @param playerState The AI player's current state.
38-
* @param shared Shared game state (stock pile, discard pile, round end).
44+
* @param playerState The AI player's visible state (face-down cards hidden).
45+
* @param shared Visible shared game state (no stock pile access).
3946
* @param rng Random number generator (for tie-breaking or random choice).
4047
* @returns The chosen action.
4148
*/
4249
chooseAction(
43-
playerState: GolfPlayerState,
44-
shared: GolfSharedState,
50+
playerState: AiVisiblePlayerState,
51+
shared: AiVisibleSharedState,
4552
rng: () => number,
4653
): GolfAction;
4754
}
@@ -50,18 +57,21 @@ export interface AiStrategy extends AiStrategyBase {
5057

5158
/**
5259
* Selects a uniformly random legal action each turn.
60+
*
61+
* Fair play: uses only `stockHasCards` boolean and `discardTop`
62+
* card — never accesses hidden card data.
5363
*/
5464
export const RandomStrategy: AiStrategy = {
5565
name: 'random',
5666

5767
chooseAction(
58-
playerState: GolfPlayerState,
59-
shared: GolfSharedState,
68+
playerState: AiVisiblePlayerState,
69+
shared: AiVisibleSharedState,
6070
rng: () => number,
6171
): GolfAction {
62-
const drawSource = pickRandom(enumerateDrawSources(shared), rng);
72+
const drawSource = pickRandom(enumerateAiDrawSources(shared), rng);
6373

64-
const legalMoves = enumerateLegalMoves(playerState.grid);
74+
const legalMoves = enumerateAiLegalMoves(playerState.grid);
6575
if (legalMoves.length === 0) {
6676
throw new Error('No legal moves available');
6777
}
@@ -74,102 +84,157 @@ export const RandomStrategy: AiStrategy = {
7484
// ── GreedyStrategy ──────────────────────────────────────────
7585

7686
/**
77-
* Selects the action that minimizes the visible score after the move.
87+
* A fair greedy strategy that makes decisions in two phases:
88+
*
89+
* **Phase 1 — Choose draw source (without peeking at stock):**
90+
* The AI evaluates the discard top card (which is visible to all).
91+
* If drawing from discard would yield a good score improvement
92+
* (compared to the current grid), it prefers discard. Otherwise,
93+
* it draws from stock (blind draw — the AI does not know what
94+
* card it will get).
95+
*
96+
* The draw source decision is *committed* before seeing the
97+
* stock card. This is structurally enforced because the AI-visible
98+
* shared state does not expose any stock pile card data.
7899
*
79-
* For each draw source, the strategy simulates drawing, then evaluates
80-
* every legal move by computing the resulting visible score. The action
81-
* with the lowest resulting score is chosen. Ties are broken randomly.
100+
* **Phase 2 — Evaluate moves with the drawn card:**
101+
* After drawing (in the scene), the drawn card becomes known.
102+
* The strategy evaluates each legal move using fair AI-visible
103+
* scoring (face-down cards scored at average value, no peeking).
82104
*
83-
* Note: For stock draws, the card is unknown until drawn, so the greedy
84-
* strategy draws first, then evaluates. For discard draws, the card is
85-
* known (peek at top of discard).
105+
* Because the GreedyStrategy must commit to a draw source before
106+
* seeing the stock card, the `chooseAction` method is split into
107+
* two cooperating methods:
108+
* - `chooseDrawSource()` — Phase 1
109+
* - `chooseMoveForCard()` — Phase 2
86110
*
87-
* Implementation approach: since the greedy strategy needs to actually
88-
* see the drawn card to evaluate moves, we evaluate two scenarios:
89-
* 1. What if we draw from stock? (We peek at the stock top to decide.)
90-
* 2. What if we draw from discard? (We peek at the discard top.)
91-
* Then pick whichever source + move yields the lowest score.
111+
* The `chooseAction()` method combines both phases for testing
112+
* convenience: when the draw source is 'discard', the discard top
113+
* card is known and can be used directly; when 'stock', a move
114+
* must be deferred. For the full game flow, the scene calls
115+
* the two phases separately.
92116
*/
93117
export const GreedyStrategy: AiStrategy = {
94118
name: 'greedy',
95119

96120
chooseAction(
97-
playerState: GolfPlayerState,
98-
shared: GolfSharedState,
121+
playerState: AiVisiblePlayerState,
122+
shared: AiVisibleSharedState,
99123
rng: () => number,
100124
): GolfAction {
101-
const legalMoves = enumerateLegalMoves(playerState.grid);
125+
const drawSource = chooseDrawSource(playerState, shared, rng);
126+
127+
if (drawSource === 'discard' && shared.discardTop) {
128+
// We know the discard card — evaluate moves with it
129+
const move = chooseMoveForCard(
130+
playerState.grid,
131+
shared.discardTop,
132+
rng,
133+
);
134+
return { drawSource, move };
135+
}
136+
137+
// Stock draw: we don't know the card yet, so pick a default move.
138+
// In the full game flow, the scene will call chooseMoveForCard()
139+
// after the actual draw. For testing/simulation, we need to
140+
// return *something* — pick a random legal move as placeholder.
141+
const legalMoves = enumerateAiLegalMoves(playerState.grid);
102142
if (legalMoves.length === 0) {
103143
throw new Error('No legal moves available');
104144
}
145+
const move = pickRandom(legalMoves, rng);
146+
return { drawSource, move };
147+
},
148+
};
149+
150+
/**
151+
* Phase 1: Choose whether to draw from stock or discard.
152+
*
153+
* Heuristic: If the discard top card would improve the grid score
154+
* (by swapping with a visible high-value card or completing a column
155+
* match), prefer discard. Otherwise, draw from stock (the unknown
156+
* might be better than the known-unhelpful discard).
157+
*
158+
* This decision is made using ONLY visible information:
159+
* - The discard top card (visible to all players)
160+
* - The AI's own face-up cards
161+
* - Whether stock has cards
162+
*
163+
* @returns 'stock' or 'discard'
164+
*/
165+
export function chooseDrawSource(
166+
playerState: AiVisiblePlayerState,
167+
shared: AiVisibleSharedState,
168+
_rng: () => number,
169+
): DrawSource {
170+
const sources = enumerateAiDrawSources(shared);
171+
if (sources.length === 1) return sources[0];
105172

106-
const drawSources = enumerateDrawSources(shared);
173+
// If there's no discard card, must draw from stock
174+
if (!shared.discardTop) return 'stock';
107175

108-
interface Candidate {
109-
drawSource: typeof drawSources[number];
110-
move: GolfMove;
111-
score: number;
112-
}
176+
// If stock is empty, must draw from discard
177+
if (!shared.stockHasCards) return 'discard';
113178

114-
const candidates: Candidate[] = [];
115-
116-
for (const drawSource of drawSources) {
117-
// Peek at the card we'd draw (without actually drawing)
118-
let peekCard: Card | undefined;
119-
if (drawSource === 'stock') {
120-
// Stock: peek at top (last element)
121-
peekCard = shared.stockPile.length > 0
122-
? shared.stockPile[shared.stockPile.length - 1]
123-
: undefined;
124-
} else {
125-
peekCard = shared.discardPile.peek();
126-
}
127-
128-
if (!peekCard) continue;
129-
130-
for (const move of legalMoves) {
131-
const score = simulateMoveScore(playerState.grid, peekCard, move);
132-
candidates.push({ drawSource, move, score });
133-
}
134-
}
179+
// Evaluate: what's the best score we can achieve with the discard card?
180+
const discardCard = shared.discardTop;
181+
const currentScore = scoreAiVisibleGrid(playerState.grid);
182+
const legalMoves = enumerateAiLegalMoves(playerState.grid);
135183

136-
if (candidates.length === 0) {
137-
// Fallback: random
138-
return RandomStrategy.chooseAction(playerState, shared, rng);
184+
let bestDiscardScore = Infinity;
185+
for (const move of legalMoves) {
186+
const score = simulateAiMoveScore(
187+
playerState.grid,
188+
discardCard,
189+
move,
190+
);
191+
if (score < bestDiscardScore) {
192+
bestDiscardScore = score;
139193
}
194+
}
140195

141-
// Find the minimum score
142-
const minScore = Math.min(...candidates.map((c) => c.score));
143-
const best = candidates.filter((c) => c.score === minScore);
196+
// If the discard card would improve our score, prefer it
197+
const discardImprovement = currentScore - bestDiscardScore;
144198

145-
// Break ties randomly
146-
const chosen = pickRandom(best, rng);
147-
return { drawSource: chosen.drawSource, move: chosen.move };
148-
},
149-
};
199+
if (discardImprovement > 0) {
200+
// Discard card helps — take it
201+
return 'discard';
202+
}
203+
204+
// Discard card doesn't help — draw from stock (unknown, might be better)
205+
return 'stock';
206+
}
150207

151208
/**
152-
* Simulate applying a move to a copy of the grid and return the
153-
* resulting total score (including face-down cards).
209+
* Phase 2: Given a drawn card (now known), choose the best move.
210+
*
211+
* Evaluates every legal move using fair AI-visible scoring:
212+
* - Swaps replace the target slot with the known drawn card.
213+
* - Discard-and-flip discards the drawn card and flips a face-down
214+
* card (whose value is unknown, estimated as the average).
154215
*
155-
* Uses scoreGrid (not scoreVisibleCards) so that revealing a face-down
156-
* card doesn't artificially penalize the evaluation -- the hidden card's
157-
* value is always counted either way.
216+
* Picks the move that minimizes the resulting score. Ties are broken
217+
* randomly.
158218
*/
159-
function simulateMoveScore(
160-
grid: GolfGrid,
219+
export function chooseMoveForCard(
220+
grid: AiVisibleGrid,
161221
drawnCard: Card,
162-
move: GolfMove,
163-
): number {
164-
// Deep-copy the grid (cards are small objects)
165-
const gridCopy = createGolfGrid(
166-
grid.map((c) => ({ ...c })),
167-
);
168-
// Deep-copy the drawn card
169-
const cardCopy: Card = { ...drawnCard };
170-
171-
applyMove(gridCopy, cardCopy, move);
172-
return scoreGrid(gridCopy);
222+
rng: () => number,
223+
): GolfMove {
224+
const legalMoves = enumerateAiLegalMoves(grid);
225+
if (legalMoves.length === 0) {
226+
throw new Error('No legal moves available');
227+
}
228+
229+
// Score each legal move
230+
const scored = legalMoves.map((move) => ({
231+
move,
232+
score: simulateAiMoveScore(grid, drawnCard, move),
233+
}));
234+
235+
// Pick the best (lowest score), breaking ties randomly
236+
const best = pickBest(scored, (c) => -c.score, rng);
237+
return best.move;
173238
}
174239

175240
// ── AiPlayer ────────────────────────────────────────────────
@@ -183,11 +248,33 @@ function simulateMoveScore(
183248
export class AiPlayer extends AiPlayerBase<AiStrategy> {
184249
/**
185250
* Choose an action for the current game state.
251+
*
252+
* Accepts AI-visible state projections only — cannot access
253+
* hidden game data.
186254
*/
187255
chooseAction(
188-
playerState: GolfPlayerState,
189-
shared: GolfSharedState,
256+
playerState: AiVisiblePlayerState,
257+
shared: AiVisibleSharedState,
190258
): GolfAction {
191259
return this.strategy.chooseAction(playerState, shared, this.rng);
192260
}
261+
262+
/**
263+
* Phase 1: Choose whether to draw from stock or discard.
264+
* Used by the scene for two-phase AI turn flow.
265+
*/
266+
chooseDrawSource(
267+
playerState: AiVisiblePlayerState,
268+
shared: AiVisibleSharedState,
269+
): DrawSource {
270+
return chooseDrawSource(playerState, shared, this.rng);
271+
}
272+
273+
/**
274+
* Phase 2: Given a drawn card, choose the best move.
275+
* Used by the scene after the actual draw for stock draws.
276+
*/
277+
chooseMoveForCard(grid: AiVisibleGrid, drawnCard: Card): GolfMove {
278+
return chooseMoveForCard(grid, drawnCard, this.rng);
279+
}
193280
}

0 commit comments

Comments
 (0)