Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions convex/_generated/api.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ import type * as lib_userSearch from "../lib/userSearch.js";
import type * as lib_webhooks from "../lib/webhooks.js";
import type * as llmEval from "../llmEval.js";
import type * as maintenance from "../maintenance.js";
import type * as oathe from "../oathe.js";
import type * as rateLimits from "../rateLimits.js";
import type * as search from "../search.js";
import type * as seed from "../seed.js";
Expand Down Expand Up @@ -170,6 +171,7 @@ declare const fullApi: ApiFromModules<{
"lib/webhooks": typeof lib_webhooks;
llmEval: typeof llmEval;
maintenance: typeof maintenance;
oathe: typeof oathe;
rateLimits: typeof rateLimits;
search: typeof search;
seed: typeof seed;
Expand Down
7 changes: 7 additions & 0 deletions convex/crons.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ crons.interval('vt-cache-backfill', { minutes: 30 }, internal.vt.backfillActiveS
// Daily re-scan of all active skills at 3am UTC
crons.daily('vt-daily-rescan', { hourUTC: 3, minuteUTC: 0 }, internal.vt.rescanActiveSkills, {})

crons.interval(
'oathe-pending-results',
{ minutes: 10 },
internal.oathe.fetchPendingOatheResults,
{ batchSize: 50 },
)

crons.interval(
'download-dedupe-prune',
{ hours: 24 },
Expand Down
12 changes: 11 additions & 1 deletion convex/lib/globalStats.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ export async function setGlobalPublicSkillsCount(
}
}

/**
* IMPORTANT: Must be called AFTER ctx.db.patch() — the fallback recount
* reads post-patch DB state. Calling before patch produces wrong counts.
*/
export async function adjustGlobalPublicSkillsCount(
ctx: GlobalStatsWriteCtx,
delta: number,
Expand All @@ -106,13 +110,19 @@ export async function adjustGlobalPublicSkillsCount(
}
| null
| undefined
// NOTE: All visibility mutations read/write this single row. Under high concurrent
// writes, Convex OCC retries increase. Acceptable at current scale; if contention
// becomes an issue, consider sharding by key prefix or batching deltas.
try {
existing = await ctx.db
.query('globalStats')
.withIndex('by_key', (q) => q.eq('key', GLOBAL_STATS_KEY))
.unique()
} catch (error) {
if (isGlobalStatsStorageNotReadyError(error)) return
if (isGlobalStatsStorageNotReadyError(error)) {
console.warn('[globalStats] Storage not ready — delta adjustment skipped:', normalizedDelta)
return
}
throw error
}

Expand Down
4 changes: 4 additions & 0 deletions convex/lib/skillPublish.ts
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,10 @@ export async function publishVersionForUser(
versionId: publishResult.versionId,
})

await ctx.scheduler.runAfter(0, internal.oathe.notifyOathe, {
versionId: publishResult.versionId,
})

const ownerHandle = owner?.handle ?? owner?.displayName ?? owner?.name ?? 'unknown'

if (!options.skipBackup) {
Expand Down
181 changes: 181 additions & 0 deletions convex/oathe.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
/* @vitest-environment node */
import { describe, expect, it } from 'vitest'
import { __test, mapReportToAnalysis } from './oathe'

const { scoreToRating, verdictToStatus, DIMENSION_LABELS } = __test

describe('scoreToRating', () => {
it('returns ok for scores >= 80', () => {
expect(scoreToRating(80)).toBe('ok')
expect(scoreToRating(100)).toBe('ok')
expect(scoreToRating(95)).toBe('ok')
})

it('returns note for scores 50–79', () => {
expect(scoreToRating(50)).toBe('note')
expect(scoreToRating(79)).toBe('note')
expect(scoreToRating(65)).toBe('note')
})

it('returns concern for scores 20–49', () => {
expect(scoreToRating(20)).toBe('concern')
expect(scoreToRating(49)).toBe('concern')
expect(scoreToRating(35)).toBe('concern')
})

it('returns danger for scores < 20', () => {
expect(scoreToRating(0)).toBe('danger')
expect(scoreToRating(19)).toBe('danger')
expect(scoreToRating(10)).toBe('danger')
})

it('handles boundary values exactly', () => {
expect(scoreToRating(80)).toBe('ok')
expect(scoreToRating(79)).toBe('note')
expect(scoreToRating(50)).toBe('note')
expect(scoreToRating(49)).toBe('concern')
expect(scoreToRating(20)).toBe('concern')
expect(scoreToRating(19)).toBe('danger')
})
})

describe('verdictToStatus', () => {
it('maps SAFE verdict', () => {
expect(verdictToStatus('SAFE')).toBe('safe')
expect(verdictToStatus('safe')).toBe('safe')
expect(verdictToStatus('Safe')).toBe('safe')
})

it('maps CAUTION verdict', () => {
expect(verdictToStatus('CAUTION')).toBe('caution')
expect(verdictToStatus('caution')).toBe('caution')
})

it('maps DANGEROUS verdict', () => {
expect(verdictToStatus('DANGEROUS')).toBe('dangerous')
expect(verdictToStatus('dangerous')).toBe('dangerous')
})

it('maps MALICIOUS verdict', () => {
expect(verdictToStatus('MALICIOUS')).toBe('malicious')
expect(verdictToStatus('malicious')).toBe('malicious')
})

it('returns unknown for unrecognized verdicts', () => {
expect(verdictToStatus('UNKNOWN')).toBe('unknown')
expect(verdictToStatus('')).toBe('unknown')
expect(verdictToStatus('something-else')).toBe('unknown')
})
})

describe('mapReportToAnalysis', () => {
const baseReport = {
audit_id: 'audit-123',
skill_url: 'https://clawhub.ai/test-owner/test-skill',
skill_slug: 'test-skill',
summary: 'No significant threats detected.',
recommendation: 'Safe to use.',
trust_score: 92,
verdict: 'SAFE',
category_scores: {
prompt_injection: {
score: 95,
weight: 1,
findings: [],
},
data_exfiltration: {
score: 88,
weight: 1,
findings: ['Minor outbound request detected'],
},
},
findings: [],
}

it('maps a complete report to analysis object', () => {
const result = mapReportToAnalysis(baseReport, 'test-owner/test-skill')

expect(result.status).toBe('safe')
expect(result.score).toBe(92)
expect(result.verdict).toBe('SAFE')
expect(result.summary).toBe('No significant threats detected.')
expect(result.reportUrl).toBe('https://oathe.ai/report/test-owner/test-skill')
expect(result.checkedAt).toBeGreaterThan(0)
})

it('maps dimensions with correct labels and ratings', () => {
const result = mapReportToAnalysis(baseReport, 'test-owner/test-skill')

expect(result.dimensions).toHaveLength(2)

const piDim = result.dimensions.find((d) => d.name === 'prompt_injection')
expect(piDim).toBeDefined()
expect(piDim!.label).toBe('Prompt Injection')
expect(piDim!.rating).toBe('ok')
expect(piDim!.detail).toBe('No issues detected. Score: 95/100')

const deDim = result.dimensions.find((d) => d.name === 'data_exfiltration')
expect(deDim).toBeDefined()
expect(deDim!.label).toBe('Data Exfiltration')
expect(deDim!.rating).toBe('ok')
expect(deDim!.detail).toBe('Minor outbound request detected')
})

it('uses dimension key as label fallback for unknown dimensions', () => {
const report = {
...baseReport,
category_scores: {
custom_dimension: { score: 60, weight: 1, findings: [] },
},
}
const result = mapReportToAnalysis(report, 'test-owner/test-skill')

const dim = result.dimensions.find((d) => d.name === 'custom_dimension')
expect(dim!.label).toBe('custom_dimension')
})

it('maps CAUTION verdict correctly', () => {
const report = { ...baseReport, verdict: 'CAUTION', trust_score: 54 }
const result = mapReportToAnalysis(report, 'test-owner/test-skill')

expect(result.status).toBe('caution')
expect(result.score).toBe(54)
})

it('maps MALICIOUS verdict correctly', () => {
const report = { ...baseReport, verdict: 'MALICIOUS', trust_score: 12 }
const result = mapReportToAnalysis(report, 'test-owner/test-skill')

expect(result.status).toBe('malicious')
expect(result.score).toBe(12)
})

it('uses first finding as detail when findings exist', () => {
const report = {
...baseReport,
category_scores: {
code_execution: {
score: 30,
weight: 1,
findings: ['Subprocess spawned', 'File written to /tmp'],
},
},
}
const result = mapReportToAnalysis(report, 'test-owner/test-skill')

const dim = result.dimensions.find((d) => d.name === 'code_execution')
expect(dim!.detail).toBe('Subprocess spawned')
expect(dim!.rating).toBe('concern')
})
})

describe('DIMENSION_LABELS', () => {
it('has labels for all standard dimensions', () => {
expect(DIMENSION_LABELS.prompt_injection).toBe('Prompt Injection')
expect(DIMENSION_LABELS.data_exfiltration).toBe('Data Exfiltration')
expect(DIMENSION_LABELS.code_execution).toBe('Code Execution')
expect(DIMENSION_LABELS.clone_behavior).toBe('Clone Behavior')
expect(DIMENSION_LABELS.canary_integrity).toBe('Canary Integrity')
expect(DIMENSION_LABELS.behavioral_reasoning).toBe('Behavioral Reasoning')
})
})
Loading