Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion hermes_cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2272,7 +2272,7 @@ def cmd_pairing(args):
skills_install = skills_subparsers.add_parser("install", help="Install a skill")
skills_install.add_argument("identifier", help="Skill identifier (e.g. openai/skills/skill-creator)")
skills_install.add_argument("--category", default="", help="Category folder to install into")
skills_install.add_argument("--force", action="store_true", help="Install despite caution verdict")
skills_install.add_argument("--force", action="store_true", help="Install despite blocked scan verdict")

skills_inspect = skills_subparsers.add_parser("inspect", help="Preview a skill without installing")
skills_inspect.add_argument("identifier", help="Skill identifier")
Expand Down
4 changes: 2 additions & 2 deletions hermes_cli/skills_hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -929,11 +929,11 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:

elif action == "install":
if not args:
c.print("[bold red]Usage:[/] /skills install <identifier> [--category <cat>] [--force]\n")
c.print("[bold red]Usage:[/] /skills install <identifier> [--category <cat>] [--force|--yes]\n")
return
identifier = args[0]
category = ""
force = "--force" in args
force = any(flag in args for flag in ("--force", "--yes", "-y"))
for i, a in enumerate(args):
if a == "--category" and i + 1 < len(args):
category = args[i + 1]
Expand Down
44 changes: 15 additions & 29 deletions tests/tools/test_force_dangerous_override.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
"""Tests for the --force flag dangerous verdict bypass fix in skills_guard.py.
"""Regression tests for skills guard policy precedence.

Regression test: the old code had `if result.verdict == "dangerous" and not force:`
which meant force=True would skip the early return, fall through the policy
lookup, and hit `if force: return True` - allowing installation of skills
flagged as dangerous (reverse shells, data exfiltration, etc).

The docstring explicitly states: "never overrides dangerous".
Official/builtin skills should follow the INSTALL_POLICY table even when their
scan verdict is dangerous, and --force should override blocked verdicts for
non-builtin sources.
"""


Expand Down Expand Up @@ -44,10 +41,6 @@ def _new_should_allow(verdict, trust_level, force):
}
VERDICT_INDEX = {"safe": 0, "caution": 1, "dangerous": 2}

# Fixed: no `and not force` - dangerous is always blocked
if verdict == "dangerous":
return False

policy = INSTALL_POLICY.get(trust_level, INSTALL_POLICY["community"])
vi = VERDICT_INDEX.get(verdict, 2)
decision = policy[vi]
Expand All @@ -61,35 +54,28 @@ def _new_should_allow(verdict, trust_level, force):
return False


class TestForceNeverOverridesDangerous:
"""The core bug: --force bypassed the dangerous verdict block."""
class TestPolicyPrecedenceForDangerousVerdicts:
def test_builtin_dangerous_is_allowed_by_policy(self):
assert _new_should_allow("dangerous", "builtin", force=False) is True

def test_old_code_allows_dangerous_with_force(self):
"""Old code: force=True lets dangerous skills through."""
assert _old_should_allow("dangerous", "community", force=True) is True
def test_trusted_dangerous_is_blocked_without_force(self):
assert _new_should_allow("dangerous", "trusted", force=False) is False

def test_new_code_blocks_dangerous_with_force(self):
"""Fixed code: force=True still blocks dangerous skills."""
assert _new_should_allow("dangerous", "community", force=True) is False
def test_force_overrides_dangerous_for_community(self):
assert _new_should_allow("dangerous", "community", force=True) is True

def test_new_code_blocks_dangerous_trusted_with_force(self):
"""Fixed code: even trusted + force cannot install dangerous."""
assert _new_should_allow("dangerous", "trusted", force=True) is False
def test_force_overrides_dangerous_for_trusted(self):
assert _new_should_allow("dangerous", "trusted", force=True) is True

def test_force_still_overrides_caution(self):
"""force=True should still work for caution verdicts."""
assert _new_should_allow("caution", "community", force=True) is True

def test_caution_community_blocked_without_force(self):
"""Caution + community is blocked without force (unchanged)."""
assert _new_should_allow("caution", "community", force=False) is False

def test_safe_always_allowed(self):
"""Safe verdict is always allowed regardless of force."""
assert _new_should_allow("safe", "community", force=False) is True
assert _new_should_allow("safe", "community", force=True) is True

def test_dangerous_blocked_without_force(self):
"""Dangerous is blocked without force (both old and new agree)."""
assert _old_should_allow("dangerous", "community", force=False) is False
assert _new_should_allow("dangerous", "community", force=False) is False
def test_old_code_happened_to_allow_forced_dangerous_community(self):
assert _old_should_allow("dangerous", "community", force=True) is True
29 changes: 17 additions & 12 deletions tests/tools/test_skills_guard.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ def _can_symlink():


class TestResolveTrustLevel:
def test_builtin_not_exposed(self):
# builtin is only used internally, not resolved from source string
assert _resolve_trust_level("openai/skills") == "trusted"
def test_official_sources_resolve_to_builtin(self):
assert _resolve_trust_level("official") == "builtin"
assert _resolve_trust_level("official/email/agentmail") == "builtin"

def test_trusted_repos(self):
assert _resolve_trust_level("openai/skills") == "trusted"
Expand Down Expand Up @@ -116,11 +116,17 @@ def test_caution_trusted_allowed(self):
allowed, _ = should_allow_install(self._result("trusted", "caution", f))
assert allowed is True

def test_dangerous_blocked_even_trusted(self):
def test_trusted_dangerous_blocked_without_force(self):
f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
allowed, _ = should_allow_install(self._result("trusted", "dangerous", f))
assert allowed is False

def test_builtin_dangerous_allowed_without_force(self):
f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
allowed, reason = should_allow_install(self._result("builtin", "dangerous", f))
assert allowed is True
assert "builtin source" in reason

def test_force_overrides_caution(self):
f = [Finding("x", "high", "c", "f", 1, "m", "d")]
allowed, reason = should_allow_install(self._result("community", "caution", f), force=True)
Expand All @@ -132,22 +138,21 @@ def test_dangerous_blocked_without_force(self):
allowed, _ = should_allow_install(self._result("community", "dangerous", f), force=False)
assert allowed is False

def test_force_never_overrides_dangerous(self):
"""--force must not bypass dangerous verdict (regression test)."""
def test_force_overrides_dangerous_for_community(self):
f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
allowed, reason = should_allow_install(
self._result("community", "dangerous", f), force=True
)
assert allowed is False
assert "DANGEROUS" in reason
assert allowed is True
assert "Force-installed" in reason

def test_force_never_overrides_dangerous_trusted(self):
"""--force must not bypass dangerous even for trusted sources."""
def test_force_overrides_dangerous_for_trusted(self):
f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
allowed, _ = should_allow_install(
allowed, reason = should_allow_install(
self._result("trusted", "dangerous", f), force=True
)
assert allowed is False
assert allowed is True
assert "Force-installed" in reason


# ---------------------------------------------------------------------------
Expand Down
10 changes: 5 additions & 5 deletions tools/skills_guard.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,14 +645,11 @@ def should_allow_install(result: ScanResult, force: bool = False) -> Tuple[bool,

Args:
result: Scan result from scan_skill()
force: If True, override blocks for caution verdicts (never overrides dangerous)
force: If True, override blocked policy decisions for this scan result

Returns:
(allowed, reason) tuple
"""
if result.verdict == "dangerous":
return False, f"Scan verdict is DANGEROUS ({len(result.findings)} findings). Blocked."

policy = INSTALL_POLICY.get(result.trust_level, INSTALL_POLICY["community"])
vi = VERDICT_INDEX.get(result.verdict, 2)
decision = policy[vi]
Expand All @@ -661,7 +658,10 @@ def should_allow_install(result: ScanResult, force: bool = False) -> Tuple[bool,
return True, f"Allowed ({result.trust_level} source, {result.verdict} verdict)"

if force:
return True, f"Force-installed despite {result.verdict} verdict ({len(result.findings)} findings)"
return True, (
f"Force-installed despite blocked {result.verdict} verdict "
f"({len(result.findings)} findings)"
)

return False, (
f"Blocked ({result.trust_level} source + {result.verdict} verdict, "
Expand Down
Loading