Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,26 @@ CODEQL_PATH="your_codeql_path"
# Get token from: https://github.com/settings/tokens
# GITHUB_TOKEN=ghp_your_token_here

# GitHub Enterprise Server (optional)
# For GitHub Enterprise, set the API URL. Default: https://api.github.com
# GITHUB_API_URL=https://github.your-company.com/api/v3
#
# SSL Certificate Verification (optional)
# Set to false for GitHub Enterprise with self-signed or internal CA certificates
# GITHUB_SSL_VERIFY=false

# LLM Configuration
# Copy this file to .env and fill in your API keys

# Provider selection (required)
# Allowed providers: openai, azure, gemini
# Allowed providers: openai, azure, gemini, bedrock, anthropic, mistral, groq, ollama

# Model name (required, provider-specific)
# Examples by provider:
# OpenAI: gpt-4o, gpt-4o-mini, gpt-4-turbo
# Azure: gpt-4o, gpt-4
# Google AI Studio: gemini-2.5-flash, gemini-2.0-flash
# Bedrock: anthropic.claude-3-5-sonnet-20241022-v2:0, anthropic.claude-3-haiku-20240307-v1:0

# Optional: Override default LLM parameters
# NOTE:
Expand Down Expand Up @@ -62,6 +71,31 @@ OPENAI_API_KEY="your_api_key"
# PROVIDER=gemini
# MODEL=gemini-2.5-flash

# ----------------------------------------------------------------------------
# AWS Bedrock
# ----------------------------------------------------------------------------
# PROVIDER=bedrock
# MODEL=anthropic.claude-3-5-sonnet-20241022-v2:0
# AWS_REGION_NAME=us-east-1
#
# Authentication (choose one method):
#
# Method 1: AWS SSO (recommended for development)
# First run: aws sso login --profile your-profile
# AWS_PROFILE=your-profile
#
# Method 2: Static credentials (IAM user)
# AWS_ACCESS_KEY_ID=AKIA...
# AWS_SECRET_ACCESS_KEY=...
#
# Method 3: Temporary credentials (STS)
# AWS_ACCESS_KEY_ID=ASIA...
# AWS_SECRET_ACCESS_KEY=...
# AWS_SESSION_TOKEN=...
#
# Note: Tool calling requires Claude 3.x, Mistral, or Cohere Command R models.
# Amazon Titan and Meta Llama models do not support function calling.

# Logging Configuration

# DEBUG, INFO, WARNING, ERROR
Expand Down
84 changes: 84 additions & 0 deletions data/queries/cpp/issues/Command injection.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/**
* @name Command injection
* @description Using user-supplied data in a call to system() or similar functions
* may allow an attacker to execute arbitrary commands.
* @kind path-problem
* @id cpp/command-injection
* @problem.severity error
* @security-severity 9.8
* @precision high
* @tags security
* external/cwe/cwe-78
* external/cwe/cwe-88
*/

import cpp
import semmle.code.cpp.ir.dataflow.TaintTracking
import semmle.code.cpp.security.FlowSources
import CommandInjection::PathGraph

/**
* A function call that executes a command through the shell.
*/
class ShellCommandExecution extends FunctionCall {
ShellCommandExecution() {
this.getTarget().hasGlobalOrStdName([
"system",
"popen",
"execl",
"execle",
"execlp",
"execv",
"execve",
"execvp",
"execvpe",
"_popen",
"_wpopen",
"_wsystem"
])
}

/**
* Gets the argument that specifies the command to execute.
*/
Expr getCommandArgument() {
// For system/popen, the command is the first argument
if this.getTarget().hasGlobalOrStdName(["system", "popen", "_popen", "_wpopen", "_wsystem"])
then result = this.getArgument(0)
else
// For exec* functions, the command is also the first argument
result = this.getArgument(0)
}
}

module CommandInjectionConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
source instanceof FlowSource
}

predicate isSink(DataFlow::Node sink) {
exists(ShellCommandExecution call |
sink.asExpr() = call.getCommandArgument() or
sink.asIndirectExpr() = call.getCommandArgument()
)
}

predicate isBarrier(DataFlow::Node node) {
// Sanitization through validation functions (basic heuristic)
exists(FunctionCall fc |
fc.getTarget().getName().toLowerCase().matches(["%valid%", "%sanitiz%", "%escape%", "%check%"]) and
node.asExpr() = fc
)
}
}

module CommandInjection = TaintTracking::Global<CommandInjectionConfig>;

from ShellCommandExecution call, CommandInjection::PathNode source, CommandInjection::PathNode sink
where
CommandInjection::flowPath(source, sink) and
(sink.getNode().asExpr() = call.getCommandArgument() or
sink.getNode().asIndirectExpr() = call.getCommandArgument())
select call, source, sink,
"This command execution uses $@ which may be controlled by an attacker.",
source.getNode(), "user-supplied data"
85 changes: 85 additions & 0 deletions data/queries/cpp/issues/Format string vulnerability.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/**
* @name Format string vulnerability
* @description Passing a user-controlled format string to printf-like functions
* can lead to information disclosure or arbitrary code execution.
* @kind path-problem
* @id cpp/format-string-injection
* @problem.severity error
* @security-severity 9.3
* @precision high
* @tags security
* external/cwe/cwe-134
*/

import cpp
import semmle.code.cpp.ir.dataflow.TaintTracking
import semmle.code.cpp.security.FlowSources
import FormatString::PathGraph

/**
* A call to a printf-like function where the format string could be vulnerable.
*/
class PrintfLikeCall extends FunctionCall {
int formatArgIndex;

PrintfLikeCall() {
exists(string name | name = this.getTarget().getName() |
// Standard printf family
(name = "printf" and formatArgIndex = 0) or
(name = "fprintf" and formatArgIndex = 1) or
(name = "sprintf" and formatArgIndex = 1) or
(name = "snprintf" and formatArgIndex = 2) or
(name = "vprintf" and formatArgIndex = 0) or
(name = "vfprintf" and formatArgIndex = 1) or
(name = "vsprintf" and formatArgIndex = 1) or
(name = "vsnprintf" and formatArgIndex = 2) or
// Wide character versions
(name = "wprintf" and formatArgIndex = 0) or
(name = "fwprintf" and formatArgIndex = 1) or
(name = "swprintf" and formatArgIndex = 1) or
// Syslog
(name = "syslog" and formatArgIndex = 1) or
// Error reporting
(name = "err" and formatArgIndex = 1) or
(name = "errx" and formatArgIndex = 1) or
(name = "warn" and formatArgIndex = 0) or
(name = "warnx" and formatArgIndex = 0)
)
}

/**
* Gets the format string argument.
*/
Expr getFormatArgument() {
result = this.getArgument(formatArgIndex)
}
}

module FormatStringConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
source instanceof FlowSource
}

predicate isSink(DataFlow::Node sink) {
exists(PrintfLikeCall call |
sink.asExpr() = call.getFormatArgument() or
sink.asIndirectExpr() = call.getFormatArgument()
)
}

predicate isBarrier(DataFlow::Node node) {
// String literals are safe format strings
node.asExpr() instanceof StringLiteral
}
}

module FormatString = TaintTracking::Global<FormatStringConfig>;

from PrintfLikeCall call, FormatString::PathNode source, FormatString::PathNode sink
where
FormatString::flowPath(source, sink) and
(sink.getNode().asExpr() = call.getFormatArgument() or
sink.getNode().asIndirectExpr() = call.getFormatArgument())
select call, source, sink,
"This format string is derived from $@ and could allow format string attacks.",
source.getNode(), "user-controlled input"
100 changes: 100 additions & 0 deletions data/queries/cpp/issues/Integer overflow in allocation.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/**
* @name Integer overflow in allocation size
* @description Using user-controlled integer values in memory allocation size
* calculations can lead to integer overflow and heap overflow.
* @kind path-problem
* @id cpp/integer-overflow-allocation
* @problem.severity error
* @security-severity 8.1
* @precision medium
* @tags security
* reliability
* external/cwe/cwe-190
* external/cwe/cwe-680
*/

import cpp
import semmle.code.cpp.ir.dataflow.TaintTracking
import semmle.code.cpp.security.FlowSources
import IntegerOverflow::PathGraph

/**
* A memory allocation function call.
*/
class AllocationCall extends FunctionCall {
int sizeArgIndex;

AllocationCall() {
exists(string name | name = this.getTarget().getName() |
(name = "malloc" and sizeArgIndex = 0) or
(name = "calloc" and sizeArgIndex = 0) or // first arg is count
(name = "realloc" and sizeArgIndex = 1) or
(name = "reallocarray" and sizeArgIndex = 1) or
(name = "alloca" and sizeArgIndex = 0) or
(name = "_alloca" and sizeArgIndex = 0) or
(name = "_malloca" and sizeArgIndex = 0) or
(name = "HeapAlloc" and sizeArgIndex = 2) or
(name = "GlobalAlloc" and sizeArgIndex = 1) or
(name = "LocalAlloc" and sizeArgIndex = 1) or
(name = "VirtualAlloc" and sizeArgIndex = 1)
)
}

/**
* Gets the size argument of the allocation.
*/
Expr getSizeArgument() {
result = this.getArgument(sizeArgIndex)
}
}

/**
* An arithmetic operation that could overflow.
*/
class ArithmeticInSize extends Expr {
ArithmeticInSize() {
(this instanceof MulExpr or
this instanceof AddExpr or
this instanceof LShiftExpr) and
exists(AllocationCall alloc |
this.getParent*() = alloc.getSizeArgument()
)
}
}

module IntegerOverflowConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
source instanceof FlowSource
}

predicate isSink(DataFlow::Node sink) {
exists(AllocationCall alloc, ArithmeticInSize arith |
arith.getParent*() = alloc.getSizeArgument() and
(sink.asExpr() = arith.getAChild*() or
sink.asIndirectExpr() = arith.getAChild*())
)
or
exists(AllocationCall alloc |
sink.asExpr() = alloc.getSizeArgument() or
sink.asIndirectExpr() = alloc.getSizeArgument()
)
}

predicate isBarrier(DataFlow::Node node) {
// Checks for overflow or bounds
exists(RelationalOperation rel |
node.asExpr() = rel.getAnOperand()
)
}
}

module IntegerOverflow = TaintTracking::Global<IntegerOverflowConfig>;

from AllocationCall alloc, IntegerOverflow::PathNode source, IntegerOverflow::PathNode sink
where
IntegerOverflow::flowPath(source, sink) and
(sink.getNode().asExpr().getParent*() = alloc.getSizeArgument() or
sink.getNode().asIndirectExpr().getParent*() = alloc.getSizeArgument())
select alloc, source, sink,
"The allocation size is derived from $@ and may overflow, leading to undersized allocation.",
source.getNode(), "user-controlled input"
Loading