Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rust/ql/integration-tests/hello-project/summary.expected
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
| Macro calls - resolved | 2 |
| Macro calls - total | 2 |
| Macro calls - unresolved | 0 |
| Taint edges - number of edges | 1670 |
| Taint edges - number of edges | 1671 |
| Taint reach - nodes tainted | 0 |
| Taint reach - per million nodes | 0 |
| Taint sinks - cryptographic operations | 0 |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
| Macro calls - resolved | 2 |
| Macro calls - total | 2 |
| Macro calls - unresolved | 0 |
| Taint edges - number of edges | 1670 |
| Taint edges - number of edges | 1671 |
| Taint reach - nodes tainted | 0 |
| Taint reach - per million nodes | 0 |
| Taint sinks - cryptographic operations | 0 |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
| Macro calls - resolved | 2 |
| Macro calls - total | 2 |
| Macro calls - unresolved | 0 |
| Taint edges - number of edges | 1670 |
| Taint edges - number of edges | 1671 |
| Taint reach - nodes tainted | 0 |
| Taint reach - per million nodes | 0 |
| Taint sinks - cryptographic operations | 0 |
Expand Down
7 changes: 7 additions & 0 deletions rust/ql/lib/codeql/rust/frameworks/regex.model.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Models for the `regex` crate.
extensions:
- addsTo:
pack: codeql/rust-all
extensible: summaryModel
data:
- ["repo:https://github.com/rust-lang/regex:regex", "crate::escape", "Argument[0].Reference", "ReturnValue", "taint", "manual"]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This model is accurate, but I mostly added it to make sure that the barrier actually worked.

Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/**
* Provides classes and predicates to reason about regular expression injection
* vulnerabilities.
*/

private import codeql.util.Unit
private import rust
private import codeql.rust.dataflow.DataFlow
private import codeql.rust.controlflow.CfgNodes
private import codeql.rust.dataflow.FlowSink

/**
* A data flow sink for regular expression injection vulnerabilities.
*/
abstract class RegexInjectionSink extends DataFlow::Node { }

/**
* A barrier for regular expression injection vulnerabilities.
*/
abstract class RegexInjectionBarrier extends DataFlow::Node { }

/** A sink for `a` in `Regex::new(a)` when `a` is not a literal. */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any benefit to defining this sink in QL rather than models-as-data?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to not consider cases where Regex::new is directly applied to a literal as sinks. I think most real-world uses of Regex::new is of that form, so it's simply as an optimization to not consider these clearly safe cases.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. Note that we do not currently do this in the cleartext logging query, which has far more sinks the vast majority of which just take a literal. If you think it makes a difference we might consider doing it there as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was told it could make a difference. But it probably doesn't matter much and let's double check before we spend time rewriting a bunch of stuff.

private class NewRegexInjectionSink extends RegexInjectionSink {
NewRegexInjectionSink() {
exists(CallExprCfgNode call, PathExpr path |
path = call.getFunction().getExpr() and
path.getResolvedCrateOrigin() = "repo:https://github.com/rust-lang/regex:regex" and
path.getResolvedPath() = "<crate::regex::string::Regex>::new" and
this.asExpr() = call.getArgument(0) and
not this.asExpr() instanceof LiteralExprCfgNode
)
}
}

private class MadRegexInjectionSink extends RegexInjectionSink {
MadRegexInjectionSink() { sinkNode(this, "regex-use") }
}

/**
* A unit class for adding additional flow steps.
*/
class RegexInjectionAdditionalFlowStep extends Unit {
/**
* Holds if the step from `node1` to `node2` should be considered a flow
* step for paths related to regular expression injection vulnerabilities.
*/
abstract predicate step(DataFlow::Node node1, DataFlow::Node node2);
}

/**
* An escape barrier for regular expression injection vulnerabilities.
*/
private class RegexInjectionDefaultBarrier extends RegexInjectionBarrier {
RegexInjectionDefaultBarrier() {
// A barrier is any call to a function named `escape`, in particular this
// makes calls to `regex::escape` a barrier.
this.asExpr()
.getExpr()
.(CallExpr)
.getFunction()
.(PathExpr)
.getPath()
.getPart()
.getNameRef()
.getText() = "escape"
}
}
56 changes: 56 additions & 0 deletions rust/ql/src/queries/security/CWE-020/RegexInjection.qhelp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>

<overview>
<p>
Constructing a regular expression with unsanitized user input can be dangerous.
A malicious user may be able to modify the meaning of the expression, causing it
to match unexpected strings and construct large regular expressions by using
counted repetitions.
</p>
</overview>

<recommendation>
<p>
Before embedding user input into a regular expression, escape the input string
using a function such as <a
href="https://docs.rs/regex/latest/regex/fn.escape.html">regex::escape</a> to
escape meta-characters that have special meaning.
</p>
<p>
If purposefully supporting user supplied regular expressions, then use <a
href="https://docs.rs/regex/latest/regex/struct.RegexBuilder.html#method.size_limit">RegexBuilder::size_limit</a>
to limit the pattern size so that it is no larger than necessary.
</p>
</recommendation>

<example>
<p>
The following example constructs a regular expressions from the user input
<code>key</code> without escaping it first.
</p>

<sample src="RegexInjectionBad.rs" />

<p>
The regular expression is intended to match strings starting with
<code>"property"</code> such as <code>"property:foo=bar"</code>. However, a
malicious user might inject the regular expression <code>".*^|key"</code> and
unexpectedly cause strings such as <code>"key=secret"</code> to match.
</p>
<p>
If user input is used to construct a regular expression, it should be escaped
first. This ensures that malicious users cannot insert characters that have special
meanings in regular expressions.
</p>
<sample src="RegexInjectionGood.rs" />
</example>

<references>
<li>
<code>regex</code> crate documentation: <a href="https://docs.rs/regex/latest/regex/index.html#untrusted-patterns">Untrusted patterns</a>.
</li>
</references>
</qhelp>
48 changes: 48 additions & 0 deletions rust/ql/src/queries/security/CWE-020/RegexInjection.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/**
* @name Regular expression injection
* @description User input should not be used in regular expressions without first being
* escaped, otherwise a malicious user may be able to inject an expression that
* could modify the meaning of the expression, causing it to match unexpected
* strings.
* @kind path-problem
* @problem.severity error
* @security-severity 7.8
* @precision high
* @id rust/regex-injection
* @tags security
* external/cwe/cwe-020
* external/cwe/cwe-074
*/

private import rust
private import codeql.rust.dataflow.DataFlow
private import codeql.rust.dataflow.TaintTracking
private import codeql.rust.Concepts
private import codeql.rust.security.regex.RegexInjectionExtensions

/**
* A taint configuration for detecting regular expression injection vulnerabilities.
*/
module RegexInjectionConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof ActiveThreatModelSource }

predicate isSink(DataFlow::Node sink) { sink instanceof RegexInjectionSink }

predicate isBarrier(DataFlow::Node barrier) { barrier instanceof RegexInjectionBarrier }

predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
any(RegexInjectionAdditionalFlowStep s).step(nodeFrom, nodeTo)
}
}

/**
* Detect taint flow of tainted data that reaches a regular expression sink.
*/
module RegexInjectionFlow = TaintTracking::Global<RegexInjectionConfig>;

private import RegexInjectionFlow::PathGraph

from RegexInjectionFlow::PathNode sourceNode, RegexInjectionFlow::PathNode sinkNode
where RegexInjectionFlow::flowPath(sourceNode, sinkNode)
select sinkNode.getNode(), sourceNode, sinkNode,
"This regular expression is constructed from a $@.", sourceNode.getNode(), "user-provided value"
8 changes: 8 additions & 0 deletions rust/ql/src/queries/security/CWE-020/RegexInjectionBad.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
use regex::Regex;

fn get_value<'h>(key: &str, property: &'h str) -> Option<&'h str> {
// BAD: User provided `key` is interpolated into the regular expression.
let pattern = format!(r"^property:{key}=(.*)$");
let re = Regex::new(&pattern).unwrap();
re.captures(property)?.get(1).map(|m| m.as_str())
}
9 changes: 9 additions & 0 deletions rust/ql/src/queries/security/CWE-020/RegexInjectionGood.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use regex::{escape, Regex};

fn get_value<'h>(key: &str, property: &'h str) -> option<&'h str> {
// GOOD: User input is escaped before being used in the regular expression.
let escaped_key = escape(key);
let pattern = format!(r"^property:{escaped_key}=(.*)$");
let re = regex::new(&pattern).unwrap();
re.captures(property)?.get(1).map(|m| m.as_str())
}
6 changes: 6 additions & 0 deletions rust/ql/test/default-threat-models.model.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
extensions:
- addsTo:
pack: codeql/threat-models
extensible: threatModelConfiguration
data:
- ["local", true, 0]
Original file line number Diff line number Diff line change
Expand Up @@ -2493,6 +2493,7 @@ readStep
| file://:0:0:0:0 | [summary param] 0 in lang:std::_::crate::sys_common::ignore_notfound | Err | file://:0:0:0:0 | [summary] read: Argument[0].Field[crate::result::Result::Err(0)] in lang:std::_::crate::sys_common::ignore_notfound |
| file://:0:0:0:0 | [summary param] 0 in lang:std::_::crate::thread::current::try_with_current | function return | file://:0:0:0:0 | [summary] read: Argument[0].ReturnValue in lang:std::_::crate::thread::current::try_with_current |
| file://:0:0:0:0 | [summary param] 0 in lang:std::_::crate::thread::with_current_name | function return | file://:0:0:0:0 | [summary] read: Argument[0].ReturnValue in lang:std::_::crate::thread::with_current_name |
| file://:0:0:0:0 | [summary param] 0 in repo:https://github.com/rust-lang/regex:regex::_::crate::escape | &ref | file://:0:0:0:0 | [summary] read: Argument[0].Reference in repo:https://github.com/rust-lang/regex:regex::_::crate::escape |
| file://:0:0:0:0 | [summary param] 1 in lang:alloc::_::<crate::vec::into_iter::IntoIter as crate::iter::traits::iterator::Iterator>::fold | function return | file://:0:0:0:0 | [summary] read: Argument[1].ReturnValue in lang:alloc::_::<crate::vec::into_iter::IntoIter as crate::iter::traits::iterator::Iterator>::fold |
| file://:0:0:0:0 | [summary param] 1 in lang:alloc::_::crate::collections::btree::mem::replace | function return | file://:0:0:0:0 | [summary] read: Argument[1].ReturnValue in lang:alloc::_::crate::collections::btree::mem::replace |
| file://:0:0:0:0 | [summary param] 1 in lang:alloc::_::crate::collections::btree::mem::take_mut | function return | file://:0:0:0:0 | [summary] read: Argument[1].ReturnValue in lang:alloc::_::crate::collections::btree::mem::take_mut |
Expand Down
32 changes: 16 additions & 16 deletions rust/ql/test/library-tests/dataflow/sources/TaintSources.expected
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
| test.rs:8:10:8:22 | ...::var | Flow source 'EnvironmentSource' of type environment. |
| test.rs:9:10:9:25 | ...::var_os | Flow source 'EnvironmentSource' of type environment. |
| test.rs:11:16:11:28 | ...::var | Flow source 'EnvironmentSource' of type environment. |
| test.rs:12:16:12:31 | ...::var_os | Flow source 'EnvironmentSource' of type environment. |
| test.rs:17:25:17:38 | ...::vars | Flow source 'EnvironmentSource' of type environment. |
| test.rs:22:25:22:41 | ...::vars_os | Flow source 'EnvironmentSource' of type environment. |
| test.rs:29:29:29:42 | ...::args | Flow source 'CommandLineArgs' of type commandargs. |
| test.rs:32:16:32:29 | ...::args | Flow source 'CommandLineArgs' of type commandargs. |
| test.rs:33:16:33:32 | ...::args_os | Flow source 'CommandLineArgs' of type commandargs. |
| test.rs:34:16:34:29 | ...::args | Flow source 'CommandLineArgs' of type commandargs. |
| test.rs:42:16:42:29 | ...::args | Flow source 'CommandLineArgs' of type commandargs. |
| test.rs:46:16:46:32 | ...::args_os | Flow source 'CommandLineArgs' of type commandargs. |
| test.rs:52:15:52:35 | ...::current_dir | Flow source 'CommandLineArgs' of type commandargs. |
| test.rs:53:15:53:35 | ...::current_exe | Flow source 'CommandLineArgs' of type commandargs. |
| test.rs:54:16:54:33 | ...::home_dir | Flow source 'CommandLineArgs' of type commandargs. |
| test.rs:8:10:8:22 | ...::var | Flow source 'EnvironmentSource' of type environment (DEFAULT). |
| test.rs:9:10:9:25 | ...::var_os | Flow source 'EnvironmentSource' of type environment (DEFAULT). |
| test.rs:11:16:11:28 | ...::var | Flow source 'EnvironmentSource' of type environment (DEFAULT). |
| test.rs:12:16:12:31 | ...::var_os | Flow source 'EnvironmentSource' of type environment (DEFAULT). |
| test.rs:17:25:17:38 | ...::vars | Flow source 'EnvironmentSource' of type environment (DEFAULT). |
| test.rs:22:25:22:41 | ...::vars_os | Flow source 'EnvironmentSource' of type environment (DEFAULT). |
| test.rs:29:29:29:42 | ...::args | Flow source 'CommandLineArgs' of type commandargs (DEFAULT). |
| test.rs:32:16:32:29 | ...::args | Flow source 'CommandLineArgs' of type commandargs (DEFAULT). |
| test.rs:33:16:33:32 | ...::args_os | Flow source 'CommandLineArgs' of type commandargs (DEFAULT). |
| test.rs:34:16:34:29 | ...::args | Flow source 'CommandLineArgs' of type commandargs (DEFAULT). |
| test.rs:42:16:42:29 | ...::args | Flow source 'CommandLineArgs' of type commandargs (DEFAULT). |
| test.rs:46:16:46:32 | ...::args_os | Flow source 'CommandLineArgs' of type commandargs (DEFAULT). |
| test.rs:52:15:52:35 | ...::current_dir | Flow source 'CommandLineArgs' of type commandargs (DEFAULT). |
| test.rs:53:15:53:35 | ...::current_exe | Flow source 'CommandLineArgs' of type commandargs (DEFAULT). |
| test.rs:54:16:54:33 | ...::home_dir | Flow source 'CommandLineArgs' of type commandargs (DEFAULT). |
| test.rs:62:26:62:47 | ...::get | Flow source 'RemoteSource' of type remote (DEFAULT). |
| test.rs:65:26:65:47 | ...::get | Flow source 'RemoteSource' of type remote (DEFAULT). |
| test.rs:68:26:68:47 | ...::get | Flow source 'RemoteSource' of type remote (DEFAULT). |
Expand All @@ -22,4 +22,4 @@
| test.rs:80:24:80:35 | ...::get | Flow source 'RemoteSource' of type remote (DEFAULT). |
| test.rs:112:35:112:46 | send_request | Flow source 'RemoteSource' of type remote (DEFAULT). |
| test.rs:119:31:119:42 | send_request | Flow source 'RemoteSource' of type remote (DEFAULT). |
| test.rs:203:16:203:29 | ...::args | Flow source 'CommandLineArgs' of type commandargs. |
| test.rs:203:16:203:29 | ...::args | Flow source 'CommandLineArgs' of type commandargs (DEFAULT). |
2 changes: 2 additions & 0 deletions rust/ql/test/qlpack.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ dependencies:
extractor: rust
tests: .
warnOnImplicitThis: true
dataExtensions:
- default-threat-models.model.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
| Macro calls - resolved | 8 |
| Macro calls - total | 9 |
| Macro calls - unresolved | 1 |
| Taint edges - number of edges | 1670 |
| Taint edges - number of edges | 1671 |
| Taint reach - nodes tainted | 0 |
| Taint reach - per million nodes | 0 |
| Taint sinks - cryptographic operations | 0 |
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#select
| main.rs:6:25:6:30 | &regex | main.rs:4:20:4:32 | ...::var | main.rs:6:25:6:30 | &regex | This regular expression is constructed from a $@. | main.rs:4:20:4:32 | ...::var | user-provided value |
edges
| main.rs:4:9:4:16 | username | main.rs:5:25:5:44 | MacroExpr | provenance | |
| main.rs:4:20:4:32 | ...::var | main.rs:4:20:4:40 | ...::var(...) [Ok] | provenance | Src:MaD:60 |
| main.rs:4:20:4:40 | ...::var(...) [Ok] | main.rs:4:20:4:66 | ... .unwrap_or(...) | provenance | MaD:1573 |
| main.rs:4:20:4:66 | ... .unwrap_or(...) | main.rs:4:9:4:16 | username | provenance | |
| main.rs:5:9:5:13 | regex | main.rs:6:26:6:30 | regex | provenance | |
| main.rs:5:17:5:45 | res | main.rs:5:25:5:44 | { ... } | provenance | |
| main.rs:5:25:5:44 | ...::format(...) | main.rs:5:17:5:45 | res | provenance | |
| main.rs:5:25:5:44 | ...::must_use(...) | main.rs:5:9:5:13 | regex | provenance | |
| main.rs:5:25:5:44 | MacroExpr | main.rs:5:25:5:44 | ...::format(...) | provenance | MaD:64 |
| main.rs:5:25:5:44 | { ... } | main.rs:5:25:5:44 | ...::must_use(...) | provenance | MaD:2996 |
| main.rs:6:26:6:30 | regex | main.rs:6:25:6:30 | &regex | provenance | |
nodes
| main.rs:4:9:4:16 | username | semmle.label | username |
| main.rs:4:20:4:32 | ...::var | semmle.label | ...::var |
| main.rs:4:20:4:40 | ...::var(...) [Ok] | semmle.label | ...::var(...) [Ok] |
| main.rs:4:20:4:66 | ... .unwrap_or(...) | semmle.label | ... .unwrap_or(...) |
| main.rs:5:9:5:13 | regex | semmle.label | regex |
| main.rs:5:17:5:45 | res | semmle.label | res |
| main.rs:5:25:5:44 | ...::format(...) | semmle.label | ...::format(...) |
| main.rs:5:25:5:44 | ...::must_use(...) | semmle.label | ...::must_use(...) |
| main.rs:5:25:5:44 | MacroExpr | semmle.label | MacroExpr |
| main.rs:5:25:5:44 | { ... } | semmle.label | { ... } |
| main.rs:6:25:6:30 | &regex | semmle.label | &regex |
| main.rs:6:26:6:30 | regex | semmle.label | regex |
subpaths
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
query: queries/security/CWE-020/RegexInjection.ql
postprocess: utils/test/InlineExpectationsTestQuery.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
| main.rs:6:25:6:30 | &regex |
| main.rs:14:25:14:30 | &regex |
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
private import codeql.rust.dataflow.DataFlow
private import codeql.rust.security.regex.RegexInjectionExtensions

query predicate regexInjectionSink(DataFlow::Node node) { node instanceof RegexInjectionSink }
36 changes: 36 additions & 0 deletions rust/ql/test/query-tests/security/CWE-020/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
use regex::Regex;

fn simple_bad(hay: &str) -> Option<bool> {
let username = std::env::var("USER").unwrap_or("".to_string()); // $ Source=env
let regex = format!("foo{}bar", username);
let re = Regex::new(&regex).unwrap(); // $ Alert[rust/regex-injection]=env
Some(re.is_match(hay))
}

fn simple_good(hay: &str) -> Option<bool> {
let username = std::env::var("USER").unwrap_or("".to_string());
let escaped = regex::escape(&username);
let regex = format!("foo{}bar", escaped);
let re = Regex::new(&regex).unwrap();
Some(re.is_match(hay))
}

fn not_a_sink_literal() -> Option<bool> {
let username = std::env::var("USER").unwrap_or("".to_string());
let re = Regex::new("literal string").unwrap();
Some(re.is_match(&username))
}

fn not_a_sink_raw_literal() -> Option<bool> {
let username = std::env::var("USER").unwrap_or("".to_string());
let re = Regex::new(r"literal string").unwrap();
Some(re.is_match(&username))
}

fn main() {
let hay = "a string";
simple_bad(hay);
simple_good(hay);
not_a_sink_literal();
not_a_sink_raw_literal();
}
3 changes: 3 additions & 0 deletions rust/ql/test/query-tests/security/CWE-020/options.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
qltest_cargo_check: true
qltest_dependencies:
- regex = { version = "1.11.1" }
Loading