Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions convex/lib/securityPrompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ export function assembleEvalUserMessage(ctx: SkillEvalContext): string {
// SKILL.md content
sections.push(`### SKILL.md content (runtime instructions)\n${skillMd}`)

// All file contents
// All file contents (test files are excluded)
if (ctx.fileContents.length > 0) {
const MAX_FILE_CHARS = 10000
const MAX_TOTAL_CHARS = 50000
Expand All @@ -403,7 +403,7 @@ export function assembleEvalUserMessage(ctx: SkillEvalContext): string {
totalChars += content.length
}
sections.push(
`### File contents\nFull source of all included files. Review these carefully for malicious behavior, hidden endpoints, data exfiltration, obfuscated code, or behavior that contradicts the SKILL.md.\n\n${fileBlocks.join('\n\n')}`,
`### File contents\nFull source of all included files (test files excluded to avoid false positives). Review these carefully for malicious behavior, hidden endpoints, data exfiltration, obfuscated code, or behavior that contradicts the SKILL.md.\n\n${fileBlocks.join('\n\n')}`,
)
}

Expand Down
78 changes: 78 additions & 0 deletions convex/llmEval.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import { describe, expect, it } from 'vitest'
import { isTestFile } from './llmEval'

describe('llmEval test file filtering', () => {
describe('isTestFile', () => {
it('identifies .test.ts files', () => {
expect(isTestFile('src/utils.test.ts')).toBe(true)
expect(isTestFile('security.test.ts')).toBe(true)
expect(isTestFile('src/nested/feature.test.ts')).toBe(true)
})

it('identifies .test.js files', () => {
expect(isTestFile('utils.test.js')).toBe(true)
expect(isTestFile('src/utils.test.js')).toBe(true)
})

it('identifies .spec.ts files', () => {
expect(isTestFile('utils.spec.ts')).toBe(true)
expect(isTestFile('src/security/auth.spec.ts')).toBe(true)
})

it('identifies .spec.js files', () => {
expect(isTestFile('api.spec.js')).toBe(true)
})

it('identifies __tests__ directories', () => {
expect(isTestFile('__tests__/utils.ts')).toBe(true)
expect(isTestFile('src/__tests__/auth.ts')).toBe(true)
expect(isTestFile('lib/__tests__/helper.js')).toBe(true)
})

it('identifies tests directories', () => {
expect(isTestFile('tests/integration.ts')).toBe(true)
expect(isTestFile('src/tests/unit.ts')).toBe(true)
})

it('identifies test directories', () => {
expect(isTestFile('test/setup.ts')).toBe(true)
expect(isTestFile('src/test/fixtures.ts')).toBe(true)
})

it('identifies __mocks__ directories', () => {
expect(isTestFile('__mocks__/api.ts')).toBe(true)
expect(isTestFile('src/__mocks__/data.ts')).toBe(true)
})

it('identifies mocks directories', () => {
expect(isTestFile('mocks/user.ts')).toBe(true)
})

it('does NOT flag normal source files', () => {
expect(isTestFile('src/index.ts')).toBe(false)
expect(isTestFile('lib/utils.ts')).toBe(false)
expect(isTestFile('api/routes.js')).toBe(false)
expect(isTestFile('SKILL.md')).toBe(false)
expect(isTestFile('README.md')).toBe(false)
})

it('does NOT flag files with "test" in name but not extension/directory', () => {
expect(isTestFile('src/testUtils.ts')).toBe(false)
expect(isTestFile('lib/contest.ts')).toBe(false)
expect(isTestFile('fastest.js')).toBe(false)
})

it('handles case insensitivity', () => {
expect(isTestFile('Security.TEST.ts')).toBe(true)
expect(isTestFile('src/Utils.Spec.Ts')).toBe(true)
expect(isTestFile('__TESTS__/feature.ts')).toBe(true)
})

it('handles real-world security skill test files', () => {
// Based on the actual issue - security skills have test files with malicious patterns
expect(isTestFile('src/security-scanner.test.ts')).toBe(true)
expect(isTestFile('tests/malware-detection.ts')).toBe(true)
expect(isTestFile('__tests__/exploit-patterns.js')).toBe(true)
})
})
})
37 changes: 36 additions & 1 deletion convex/llmEval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,37 @@ import {
// Helpers
// ---------------------------------------------------------------------------

/**
* Check if a file path is a test file that should be excluded from security scanning.
* Test files often contain malicious code patterns for testing purposes (e.g., security skills).
*/
export function isTestFile(path: string): boolean {
const lower = path.toLowerCase()

// Common test file patterns
if (lower.endsWith('.test.ts')) return true
if (lower.endsWith('.test.js')) return true
if (lower.endsWith('.test.tsx')) return true
if (lower.endsWith('.test.jsx')) return true
if (lower.endsWith('.spec.ts')) return true
if (lower.endsWith('.spec.js')) return true
if (lower.endsWith('.spec.tsx')) return true
if (lower.endsWith('.spec.jsx')) return true

// Common test directories
const parts = path.split('/')
for (const part of parts) {
const lowerPart = part.toLowerCase()
if (lowerPart === '__tests__') return true
if (lowerPart === 'tests') return true
if (lowerPart === 'test') return true
if (lowerPart === '__mocks__') return true
if (lowerPart === 'mocks') return true
}

return false
}

function extractResponseText(payload: unknown): string | null {
if (!payload || typeof payload !== 'object') return null
const output = (payload as { output?: unknown }).output
Expand Down Expand Up @@ -120,11 +151,15 @@ export const evaluateWithLlm = internalAction({
return
}

// 4. Read all file contents
// 4. Read all file contents (excluding test files)
const fileContents: Array<{ path: string; content: string }> = []
for (const f of version.files) {
const lower = f.path.toLowerCase()
if (lower === 'skill.md' || lower === 'skills.md') continue

// Skip test files to avoid false positives from test code
if (isTestFile(f.path)) continue

try {
const blob = await ctx.storage.get(f.storageId as Id<'_storage'>)
if (blob) {
Expand Down