diff --git a/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll b/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll index ba65e13dd611..767ebc97437b 100644 --- a/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll +++ b/java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll @@ -47,6 +47,18 @@ module PolynomialRedosConfig implements DataFlow::ConfigSig { node instanceof SimpleTypeSanitizer or node.asExpr().(MethodCall).getMethod() instanceof LengthRestrictedMethod } + + predicate observeDiffInformedIncrementalMode() { any() } + + Location getASelectedSinkLocation(DataFlow::Node sink) { + exists(SuperlinearBackTracking::PolynomialBackTrackingTerm regexp | + regexp.getRootTerm() = sink.(PolynomialRedosSink).getRegExp() + | + result = sink.getLocation() + or + result = regexp.getLocation() + ) + } } module PolynomialRedosFlow = TaintTracking::Global; diff --git a/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll b/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll index 0e52764c1950..89aa4961e6ef 100644 --- a/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll +++ b/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll @@ -18,17 +18,7 @@ private module PolynomialReDoSConfig implements DataFlow::ConfigSig { predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer } - // Diff-informed incremental mode is currently disabled for this query due to - // API limitations. The query exposes sink.getABacktrackingTerm() as an alert - // location, but there is no way to express that information through - // getASelectedSinkLocation() because there is no @location in the CodeQL - // database that corresponds to a term inside a regular expression. As a - // result, this query could miss alerts in diff-informed incremental mode. - // - // To address this problem, we need to have a version of - // getASelectedSinkLocation() that uses hasLocationInfo() instead of - // returning Location objects. - predicate observeDiffInformedIncrementalMode() { none() } + predicate observeDiffInformedIncrementalMode() { any() } Location getASelectedSinkLocation(DataFlow::Node sink) { result = sink.(Sink).getHighlight().getLocation() diff --git a/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll b/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll index 98a42fcf5e7c..81179717e01e 100644 --- a/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll +++ b/ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll @@ -18,6 +18,16 @@ private module PolynomialReDoSConfig implements DataFlow::ConfigSig { predicate isSink(DataFlow::Node sink) { sink instanceof Sink } predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer } + + // Diff-informedness is disabled because of RegExpTerms having incorrect locations when + // the regexp is parsed from a string arising from constant folding. + predicate observeDiffInformedIncrementalMode() { none() } + + Location getASelectedSinkLocation(DataFlow::Node sink) { + result = sink.(Sink).getHighlight().getLocation() + or + result = sink.(Sink).getRegExp().getRootTerm().getLocation() + } } /** diff --git a/shared/util/codeql/util/AlertFiltering.qll b/shared/util/codeql/util/AlertFiltering.qll index 1bc366c0416d..bfc9d901b2fa 100644 --- a/shared/util/codeql/util/AlertFiltering.qll +++ b/shared/util/codeql/util/AlertFiltering.qll @@ -8,15 +8,14 @@ module; private import codeql.util.Location /** - * Holds if the query should produce alerts that match the given line ranges. + * Holds if the query may restrict its computation to only produce alerts that match the given line + * ranges. This predicate is used for implementing _diff-informed queries_ for pull requests in + * GitHub Code Scanning. * * This predicate is active if and only if it is nonempty. If this predicate is inactive, it has no - * effect. If it is active, it accepts any alert that has at least one matching location. - * - * Note that an alert that is not accepted by this filtering predicate may still be included in the - * query results if it is accepted by another active filtering predicate in this module. An alert is - * excluded from the query results if only if (1) there is at least one active filtering predicate, - * and (2) it is not accepted by any active filtering predicate. + * effect. If it is active, queries may omit alerts that don't have a _primary_ or _related_ + * location (in SARIF terminology) whose start line is within a specified range. Queries are allowed + * to produce alerts outside this range. * * An alert location is a match if it matches a row in this predicate. If `startLineStart` and * `startLineEnd` are both 0, the row specifies a whole-file match, and a location is a match if @@ -29,26 +28,24 @@ private import codeql.util.Location * - startLineStart: inclusive start of the range for alert location start line number (1-based). * - startLineEnd: inclusive end of the range for alert location start line number (1-based). * - * A query should either perform no alert filtering, or adhere to all the filtering rules in this - * module and return all and only the accepted alerts. - * - * This predicate is suitable for situations where we want to filter alerts at line granularity, - * such as based on the pull request diff. + * Note that an alert that is not accepted by this filtering predicate may still be included in the + * query results if it is accepted by another active filtering predicate in this module. An alert is + * excluded from the query results if only if (1) there is at least one active filtering predicate, + * and (2) it is not accepted by any active filtering predicate. * * See also: `restrictAlertsToExactLocation`. */ extensible predicate restrictAlertsTo(string filePath, int startLineStart, int startLineEnd); /** - * Holds if the query should produce alerts that match the given locations. + * Holds if the query may restrict its computation to only produce alerts that match the given + * character ranges. This predicate is suitable for testing, where we want to filter by the exact + * alert location, distinguishing between alerts on the same line. * * This predicate is active if and only if it is nonempty. If this predicate is inactive, it has no - * effect. If it is active, it accepts any alert that has at least one matching location. - * - * Note that an alert that is not accepted by this filtering predicate may still be included in the - * query results if it is accepted by another active filtering predicate in this module. An alert is - * excluded from the query results if only if (1) there is at least one active filtering predicate, - * and (2) it is not accepted by any active filtering predicate. + * effect. If it is active, queries may omit alerts that don't have a _primary_ or _related_ + * location (in SARIF terminology) whose location equals a tuple from this predicate. Queries are + * allowed to produce alerts outside this range. * * An alert location is a match if it matches a row in this predicate. Each row specifies an exact * location: an alert location is a match if its file path matches `filePath`, its start line and @@ -61,11 +58,10 @@ extensible predicate restrictAlertsTo(string filePath, int startLineStart, int s * - endLine: alert location end line number (1-based). * - endColumn: alert location end column number (1-based). * - * A query should either perform no alert filtering, or adhere to all the filtering rules in this - * module and return all and only the accepted alerts. - * - * This predicate is suitable for situations where we want to filter by the exact alert location, - * distinguishing between alerts on the same line. + * Note that an alert that is not accepted by this filtering predicate may still be included in the + * query results if it is accepted by another active filtering predicate in this module. An alert is + * excluded from the query results if only if (1) there is at least one active filtering predicate, + * and (2) it is not accepted by any active filtering predicate. * * See also: `restrictAlertsTo`. */ @@ -75,25 +71,64 @@ extensible predicate restrictAlertsToExactLocation( /** Module for applying alert location filtering. */ module AlertFilteringImpl { - /** Applies alert filtering to the given location. */ + pragma[nomagic] + private predicate restrictAlertsToEntireFile(string filePath) { restrictAlertsTo(filePath, 0, 0) } + + pragma[nomagic] + private predicate restrictAlertsToLine(string filePath, int line) { + exists(int startLineStart, int startLineEnd | + restrictAlertsTo(filePath, startLineStart, startLineEnd) and + line = [startLineStart .. startLineEnd] + ) + } + + /** + * Holds if the given location intersects the diff range. When that is the + * case, ensuring that alerts mentioning this location are included in the + * query results is a valid overapproximation of the requirements for + * _diff-informed queries_ as documented under `restrictAlertsTo`. + * + * Testing for full intersection rather than only matching the start line + * means that this predicate is more broadly useful than just checking whether + * a specific element is considered to be in the diff range of GitHub Code + * Scanning: + * - If it's inconvenient to pass the exact `Location` of the element of + * interest, it's valid to use a `Location` of an enclosing element. + * - This predicate could be useful for other systems of alert presentation + * where the rules don't exactly match GitHub Code Scanning. + * + * If there is no diff range, this predicate holds for all locations. Note + * that this predicate has a bindingset and will therefore be inlined; + * callers should include enough context to ensure efficient evaluation. + */ bindingset[location] predicate filterByLocation(Location location) { not restrictAlertsTo(_, _, _) and not restrictAlertsToExactLocation(_, _, _, _, _) or - exists(string filePath, int startLineStart, int startLineEnd | - restrictAlertsTo(filePath, startLineStart, startLineEnd) - | - startLineStart = 0 and - startLineEnd = 0 and + exists(string filePath | + restrictAlertsToEntireFile(filePath) and location.hasLocationInfo(filePath, _, _, _, _) or - location.hasLocationInfo(filePath, [startLineStart .. startLineEnd], _, _, _) + exists(int locStartLine, int locEndLine | + location.hasLocationInfo(filePath, locStartLine, _, locEndLine, _) + | + restrictAlertsToLine(pragma[only_bind_into](filePath), [locStartLine .. locEndLine]) + ) ) or - exists(string filePath, int startLine, int startColumn, int endLine, int endColumn | - restrictAlertsToExactLocation(filePath, startLine, startColumn, endLine, endColumn) + // Check if an exact filter-location is fully contained in `location`. + // This is slow but only used for testing. + exists( + string filePath, int startLine, int startColumn, int endLine, int endColumn, + int filterStartLine, int filterStartColumn, int filterEndLine, int filterEndColumn | - location.hasLocationInfo(filePath, startLine, startColumn, endLine, endColumn) + location.hasLocationInfo(filePath, startLine, startColumn, endLine, endColumn) and + restrictAlertsToExactLocation(filePath, filterStartLine, filterStartColumn, filterEndLine, + filterEndColumn) and + startLine <= filterStartLine and + (startLine != filterStartLine or startColumn <= filterStartColumn) and + endLine >= filterEndLine and + (endLine != filterEndLine or endColumn >= filterEndColumn) ) } }