diff --git a/convex/lib/securityPrompt.ts b/convex/lib/securityPrompt.ts
index b940ef9b2..f2a24ab86 100644
--- a/convex/lib/securityPrompt.ts
+++ b/convex/lib/securityPrompt.ts
@@ -382,7 +382,7 @@ export function assembleEvalUserMessage(ctx: SkillEvalContext): string {
   // SKILL.md content
   sections.push(`### SKILL.md content (runtime instructions)\n${skillMd}`)
 
-  // All file contents
+  // All file contents (test files are excluded)
   if (ctx.fileContents.length > 0) {
     const MAX_FILE_CHARS = 10000
     const MAX_TOTAL_CHARS = 50000
@@ -403,7 +403,7 @@ export function assembleEvalUserMessage(ctx: SkillEvalContext): string {
       totalChars += content.length
     }
     sections.push(
-      `### File contents\nFull source of all included files. Review these carefully for malicious behavior, hidden endpoints, data exfiltration, obfuscated code, or behavior that contradicts the SKILL.md.\n\n${fileBlocks.join('\n\n')}`,
+      `### File contents\nFull source of all included files (test files excluded to avoid false positives). Review these carefully for malicious behavior, hidden endpoints, data exfiltration, obfuscated code, or behavior that contradicts the SKILL.md.\n\n${fileBlocks.join('\n\n')}`,
     )
   }
 
diff --git a/convex/llmEval.test.ts b/convex/llmEval.test.ts
new file mode 100644
index 000000000..165222f96
--- /dev/null
+++ b/convex/llmEval.test.ts
@@ -0,0 +1,78 @@
+import { describe, expect, it } from 'vitest'
+import { isTestFile } from './llmEval'
+
+describe('llmEval test file filtering', () => {
+  describe('isTestFile', () => {
+    it('identifies .test.ts files', () => {
+      expect(isTestFile('src/utils.test.ts')).toBe(true)
+      expect(isTestFile('security.test.ts')).toBe(true)
+      expect(isTestFile('src/nested/feature.test.ts')).toBe(true)
+    })
+
+    it('identifies .test.js files', () => {
+      expect(isTestFile('utils.test.js')).toBe(true)
+      expect(isTestFile('src/utils.test.js')).toBe(true)
+    })
+
+    it('identifies .spec.ts files', () => {
+      expect(isTestFile('utils.spec.ts')).toBe(true)
+      expect(isTestFile('src/security/auth.spec.ts')).toBe(true)
+    })
+
+    it('identifies .spec.js files', () => {
+      expect(isTestFile('api.spec.js')).toBe(true)
+    })
+
+    it('identifies __tests__ directories', () => {
+      expect(isTestFile('__tests__/utils.ts')).toBe(true)
+      expect(isTestFile('src/__tests__/auth.ts')).toBe(true)
+      expect(isTestFile('lib/__tests__/helper.js')).toBe(true)
+    })
+
+    it('identifies tests directories', () => {
+      expect(isTestFile('tests/integration.ts')).toBe(true)
+      expect(isTestFile('src/tests/unit.ts')).toBe(true)
+    })
+
+    it('identifies test directories', () => {
+      expect(isTestFile('test/setup.ts')).toBe(true)
+      expect(isTestFile('src/test/fixtures.ts')).toBe(true)
+    })
+
+    it('identifies __mocks__ directories', () => {
+      expect(isTestFile('__mocks__/api.ts')).toBe(true)
+      expect(isTestFile('src/__mocks__/data.ts')).toBe(true)
+    })
+
+    it('identifies mocks directories', () => {
+      expect(isTestFile('mocks/user.ts')).toBe(true)
+    })
+
+    it('does NOT flag normal source files', () => {
+      expect(isTestFile('src/index.ts')).toBe(false)
+      expect(isTestFile('lib/utils.ts')).toBe(false)
+      expect(isTestFile('api/routes.js')).toBe(false)
+      expect(isTestFile('SKILL.md')).toBe(false)
+      expect(isTestFile('README.md')).toBe(false)
+    })
+
+    it('does NOT flag files with "test" in name but not extension/directory', () => {
+      expect(isTestFile('src/testUtils.ts')).toBe(false)
+      expect(isTestFile('lib/contest.ts')).toBe(false)
+      expect(isTestFile('fastest.js')).toBe(false)
+    })
+
+    it('handles case insensitivity', () => {
+      expect(isTestFile('Security.TEST.ts')).toBe(true)
+      expect(isTestFile('src/Utils.Spec.Ts')).toBe(true)
+      expect(isTestFile('__TESTS__/feature.ts')).toBe(true)
+    })
+
+    it('handles real-world security skill test files', () => {
+      // Based on the actual issue - security skills have test files with malicious patterns
+      expect(isTestFile('src/security-scanner.test.ts')).toBe(true)
+      expect(isTestFile('tests/malware-detection.ts')).toBe(true)
+      expect(isTestFile('__tests__/exploit-patterns.js')).toBe(true)
+    })
+  })
+})
diff --git a/convex/llmEval.ts b/convex/llmEval.ts
index 3052badfe..230e16c14 100644
--- a/convex/llmEval.ts
+++ b/convex/llmEval.ts
@@ -16,6 +16,37 @@ import {
 // Helpers
 // ---------------------------------------------------------------------------
 
+/**
+ * Check if a file path is a test file that should be excluded from security scanning.
+ * Test files often contain malicious code patterns for testing purposes (e.g., security skills).
+ */
+export function isTestFile(path: string): boolean {
+  const lower = path.toLowerCase()
+
+  // Common test file patterns
+  if (lower.endsWith('.test.ts')) return true
+  if (lower.endsWith('.test.js')) return true
+  if (lower.endsWith('.test.tsx')) return true
+  if (lower.endsWith('.test.jsx')) return true
+  if (lower.endsWith('.spec.ts')) return true
+  if (lower.endsWith('.spec.js')) return true
+  if (lower.endsWith('.spec.tsx')) return true
+  if (lower.endsWith('.spec.jsx')) return true
+
+  // Common test directories
+  const parts = path.split('/')
+  for (const part of parts) {
+    const lowerPart = part.toLowerCase()
+    if (lowerPart === '__tests__') return true
+    if (lowerPart === 'tests') return true
+    if (lowerPart === 'test') return true
+    if (lowerPart === '__mocks__') return true
+    if (lowerPart === 'mocks') return true
+  }
+
+  return false
+}
+
 function extractResponseText(payload: unknown): string | null {
   if (!payload || typeof payload !== 'object') return null
   const output = (payload as { output?: unknown }).output
@@ -120,11 +151,15 @@ export const evaluateWithLlm = internalAction({
       return
     }
 
-    // 4. Read all file contents
+    // 4. Read all file contents (excluding test files)
     const fileContents: Array<{ path: string; content: string }> = []
     for (const f of version.files) {
       const lower = f.path.toLowerCase()
       if (lower === 'skill.md' || lower === 'skills.md') continue
+
+      // Skip test files to avoid false positives from test code
+      if (isTestFile(f.path)) continue
+
       try {
         const blob = await ctx.storage.get(f.storageId as Id<'_storage'>)
         if (blob) {