-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Rust: Add regular expression injection query #18946
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
494f914
179ea04
344fea2
0e965f7
5c83644
b48fd99
1e0b78e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| # Models for the `regex` crate. | ||
| extensions: | ||
| - addsTo: | ||
| pack: codeql/rust-all | ||
| extensible: summaryModel | ||
| data: | ||
| - ["repo:https://github.com/rust-lang/regex:regex", "crate::escape", "Argument[0].Reference", "ReturnValue", "taint", "manual"] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,67 @@ | ||
| /** | ||
| * Provides classes and predicates to reason about regular expression injection | ||
| * vulnerabilities. | ||
| */ | ||
|
|
||
| private import codeql.util.Unit | ||
| private import rust | ||
| private import codeql.rust.dataflow.DataFlow | ||
| private import codeql.rust.controlflow.CfgNodes | ||
| private import codeql.rust.dataflow.FlowSink | ||
|
|
||
| /** | ||
| * A data flow sink for regular expression injection vulnerabilities. | ||
| */ | ||
| abstract class RegexInjectionSink extends DataFlow::Node { } | ||
|
|
||
| /** | ||
| * A barrier for regular expression injection vulnerabilities. | ||
| */ | ||
| abstract class RegexInjectionBarrier extends DataFlow::Node { } | ||
|
|
||
| /** A sink for `a` in `Regex::new(a)` when `a` is not a literal. */ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any benefit to defining this sink in QL rather than models-as-data?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is to not consider cases where
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. Note that we do not currently do this in the cleartext logging query, which has far more sinks the vast majority of which just take a literal. If you think it makes a difference we might consider doing it there as well.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was told it could make a difference. But it probably doesn't matter much and let's double check before we spend time rewriting a bunch of stuff. |
||
| private class NewRegexInjectionSink extends RegexInjectionSink { | ||
| NewRegexInjectionSink() { | ||
| exists(CallExprCfgNode call, PathExpr path | | ||
| path = call.getFunction().getExpr() and | ||
| path.getResolvedCrateOrigin() = "repo:https://github.com/rust-lang/regex:regex" and | ||
| path.getResolvedPath() = "<crate::regex::string::Regex>::new" and | ||
| this.asExpr() = call.getArgument(0) and | ||
| not this.asExpr() instanceof LiteralExprCfgNode | ||
| ) | ||
| } | ||
| } | ||
|
|
||
| private class MadRegexInjectionSink extends RegexInjectionSink { | ||
| MadRegexInjectionSink() { sinkNode(this, "regex-use") } | ||
| } | ||
|
|
||
| /** | ||
| * A unit class for adding additional flow steps. | ||
| */ | ||
| class RegexInjectionAdditionalFlowStep extends Unit { | ||
| /** | ||
| * Holds if the step from `node1` to `node2` should be considered a flow | ||
| * step for paths related to regular expression injection vulnerabilities. | ||
| */ | ||
| abstract predicate step(DataFlow::Node node1, DataFlow::Node node2); | ||
| } | ||
|
|
||
| /** | ||
| * An escape barrier for regular expression injection vulnerabilities. | ||
| */ | ||
| private class RegexInjectionDefaultBarrier extends RegexInjectionBarrier { | ||
| RegexInjectionDefaultBarrier() { | ||
| // A barrier is any call to a function named `escape`, in particular this | ||
| // makes calls to `regex::escape` a barrier. | ||
| this.asExpr() | ||
| .getExpr() | ||
| .(CallExpr) | ||
| .getFunction() | ||
| .(PathExpr) | ||
| .getPath() | ||
| .getPart() | ||
| .getNameRef() | ||
| .getText() = "escape" | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| <!DOCTYPE qhelp PUBLIC | ||
| "-//Semmle//qhelp//EN" | ||
| "qhelp.dtd"> | ||
| <qhelp> | ||
|
|
||
| <overview> | ||
| <p> | ||
| Constructing a regular expression with unsanitized user input can be dangerous. | ||
| A malicious user may be able to modify the meaning of the expression, causing it | ||
| to match unexpected strings and construct large regular expressions by using | ||
| counted repetitions. | ||
| </p> | ||
| </overview> | ||
|
|
||
| <recommendation> | ||
| <p> | ||
| Before embedding user input into a regular expression, escape the input string | ||
| using a function such as <a | ||
| href="https://docs.rs/regex/latest/regex/fn.escape.html">regex::escape</a> to | ||
| escape meta-characters that have special meaning. | ||
| </p> | ||
| <p> | ||
| If purposefully supporting user supplied regular expressions, then use <a | ||
| href="https://docs.rs/regex/latest/regex/struct.RegexBuilder.html#method.size_limit">RegexBuilder::size_limit</a> | ||
| to limit the pattern size so that it is no larger than necessary. | ||
| </p> | ||
| </recommendation> | ||
|
|
||
| <example> | ||
| <p> | ||
| The following example constructs a regular expressions from the user input | ||
| <code>key</code> without escaping it first. | ||
| </p> | ||
|
|
||
| <sample src="RegexInjectionBad.rs" /> | ||
|
|
||
| <p> | ||
| The regular expression is intended to match strings starting with | ||
| <code>"property"</code> such as <code>"property:foo=bar"</code>. However, a | ||
| malicious user might inject the regular expression <code>".*^|key"</code> and | ||
| unexpectedly cause strings such as <code>"key=secret"</code> to match. | ||
| </p> | ||
| <p> | ||
| If user input is used to construct a regular expression, it should be escaped | ||
| first. This ensures that malicious users cannot insert characters that have special | ||
| meanings in regular expressions. | ||
| </p> | ||
| <sample src="RegexInjectionGood.rs" /> | ||
| </example> | ||
|
|
||
| <references> | ||
| <li> | ||
| <code>regex</code> crate documentation: <a href="https://docs.rs/regex/latest/regex/index.html#untrusted-patterns">Untrusted patterns</a>. | ||
| </li> | ||
| </references> | ||
| </qhelp> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| /** | ||
| * @name Regular expression injection | ||
| * @description User input should not be used in regular expressions without first being | ||
| * escaped, otherwise a malicious user may be able to inject an expression that | ||
| * could modify the meaning of the expression, causing it to match unexpected | ||
| * strings. | ||
| * @kind path-problem | ||
| * @problem.severity error | ||
| * @security-severity 7.8 | ||
geoffw0 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| * @precision high | ||
| * @id rust/regex-injection | ||
| * @tags security | ||
| * external/cwe/cwe-020 | ||
| * external/cwe/cwe-074 | ||
| */ | ||
|
|
||
| private import rust | ||
| private import codeql.rust.dataflow.DataFlow | ||
| private import codeql.rust.dataflow.TaintTracking | ||
| private import codeql.rust.Concepts | ||
| private import codeql.rust.security.regex.RegexInjectionExtensions | ||
|
|
||
| /** | ||
| * A taint configuration for detecting regular expression injection vulnerabilities. | ||
| */ | ||
| module RegexInjectionConfig implements DataFlow::ConfigSig { | ||
| predicate isSource(DataFlow::Node source) { source instanceof ActiveThreatModelSource } | ||
|
|
||
| predicate isSink(DataFlow::Node sink) { sink instanceof RegexInjectionSink } | ||
|
|
||
| predicate isBarrier(DataFlow::Node barrier) { barrier instanceof RegexInjectionBarrier } | ||
|
|
||
| predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { | ||
| any(RegexInjectionAdditionalFlowStep s).step(nodeFrom, nodeTo) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Detect taint flow of tainted data that reaches a regular expression sink. | ||
| */ | ||
| module RegexInjectionFlow = TaintTracking::Global<RegexInjectionConfig>; | ||
|
|
||
| private import RegexInjectionFlow::PathGraph | ||
|
|
||
| from RegexInjectionFlow::PathNode sourceNode, RegexInjectionFlow::PathNode sinkNode | ||
| where RegexInjectionFlow::flowPath(sourceNode, sinkNode) | ||
| select sinkNode.getNode(), sourceNode, sinkNode, | ||
| "This regular expression is constructed from a $@.", sourceNode.getNode(), "user-provided value" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| use regex::Regex; | ||
|
|
||
| fn get_value<'h>(key: &str, property: &'h str) -> Option<&'h str> { | ||
| // BAD: User provided `key` is interpolated into the regular expression. | ||
| let pattern = format!(r"^property:{key}=(.*)$"); | ||
| let re = Regex::new(&pattern).unwrap(); | ||
| re.captures(property)?.get(1).map(|m| m.as_str()) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| use regex::{escape, Regex}; | ||
|
|
||
| fn get_value<'h>(key: &str, property: &'h str) -> option<&'h str> { | ||
| // GOOD: User input is escaped before being used in the regular expression. | ||
| let escaped_key = escape(key); | ||
| let pattern = format!(r"^property:{escaped_key}=(.*)$"); | ||
| let re = regex::new(&pattern).unwrap(); | ||
| re.captures(property)?.get(1).map(|m| m.as_str()) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| extensions: | ||
| - addsTo: | ||
| pack: codeql/threat-models | ||
| extensible: threatModelConfiguration | ||
| data: | ||
| - ["local", true, 0] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,3 +6,5 @@ dependencies: | |
| extractor: rust | ||
| tests: . | ||
| warnOnImplicitThis: true | ||
| dataExtensions: | ||
| - default-threat-models.model.yml | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| #select | ||
| | main.rs:6:25:6:30 | ®ex | main.rs:4:20:4:32 | ...::var | main.rs:6:25:6:30 | ®ex | This regular expression is constructed from a $@. | main.rs:4:20:4:32 | ...::var | user-provided value | | ||
| edges | ||
| | main.rs:4:9:4:16 | username | main.rs:5:25:5:44 | MacroExpr | provenance | | | ||
| | main.rs:4:20:4:32 | ...::var | main.rs:4:20:4:40 | ...::var(...) [Ok] | provenance | Src:MaD:60 | | ||
| | main.rs:4:20:4:40 | ...::var(...) [Ok] | main.rs:4:20:4:66 | ... .unwrap_or(...) | provenance | MaD:1573 | | ||
| | main.rs:4:20:4:66 | ... .unwrap_or(...) | main.rs:4:9:4:16 | username | provenance | | | ||
| | main.rs:5:9:5:13 | regex | main.rs:6:26:6:30 | regex | provenance | | | ||
| | main.rs:5:17:5:45 | res | main.rs:5:25:5:44 | { ... } | provenance | | | ||
| | main.rs:5:25:5:44 | ...::format(...) | main.rs:5:17:5:45 | res | provenance | | | ||
| | main.rs:5:25:5:44 | ...::must_use(...) | main.rs:5:9:5:13 | regex | provenance | | | ||
| | main.rs:5:25:5:44 | MacroExpr | main.rs:5:25:5:44 | ...::format(...) | provenance | MaD:64 | | ||
| | main.rs:5:25:5:44 | { ... } | main.rs:5:25:5:44 | ...::must_use(...) | provenance | MaD:2996 | | ||
| | main.rs:6:26:6:30 | regex | main.rs:6:25:6:30 | ®ex | provenance | | | ||
| nodes | ||
| | main.rs:4:9:4:16 | username | semmle.label | username | | ||
| | main.rs:4:20:4:32 | ...::var | semmle.label | ...::var | | ||
| | main.rs:4:20:4:40 | ...::var(...) [Ok] | semmle.label | ...::var(...) [Ok] | | ||
| | main.rs:4:20:4:66 | ... .unwrap_or(...) | semmle.label | ... .unwrap_or(...) | | ||
| | main.rs:5:9:5:13 | regex | semmle.label | regex | | ||
| | main.rs:5:17:5:45 | res | semmle.label | res | | ||
| | main.rs:5:25:5:44 | ...::format(...) | semmle.label | ...::format(...) | | ||
| | main.rs:5:25:5:44 | ...::must_use(...) | semmle.label | ...::must_use(...) | | ||
| | main.rs:5:25:5:44 | MacroExpr | semmle.label | MacroExpr | | ||
| | main.rs:5:25:5:44 | { ... } | semmle.label | { ... } | | ||
| | main.rs:6:25:6:30 | ®ex | semmle.label | ®ex | | ||
| | main.rs:6:26:6:30 | regex | semmle.label | regex | | ||
| subpaths |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| query: queries/security/CWE-020/RegexInjection.ql | ||
| postprocess: utils/test/InlineExpectationsTestQuery.ql |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| | main.rs:6:25:6:30 | ®ex | | ||
| | main.rs:14:25:14:30 | ®ex | |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| private import codeql.rust.dataflow.DataFlow | ||
| private import codeql.rust.security.regex.RegexInjectionExtensions | ||
|
|
||
| query predicate regexInjectionSink(DataFlow::Node node) { node instanceof RegexInjectionSink } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| use regex::Regex; | ||
|
|
||
| fn simple_bad(hay: &str) -> Option<bool> { | ||
| let username = std::env::var("USER").unwrap_or("".to_string()); // $ Source=env | ||
| let regex = format!("foo{}bar", username); | ||
| let re = Regex::new(®ex).unwrap(); // $ Alert[rust/regex-injection]=env | ||
| Some(re.is_match(hay)) | ||
| } | ||
|
|
||
| fn simple_good(hay: &str) -> Option<bool> { | ||
| let username = std::env::var("USER").unwrap_or("".to_string()); | ||
| let escaped = regex::escape(&username); | ||
| let regex = format!("foo{}bar", escaped); | ||
| let re = Regex::new(®ex).unwrap(); | ||
| Some(re.is_match(hay)) | ||
| } | ||
|
|
||
| fn not_a_sink_literal() -> Option<bool> { | ||
| let username = std::env::var("USER").unwrap_or("".to_string()); | ||
| let re = Regex::new("literal string").unwrap(); | ||
| Some(re.is_match(&username)) | ||
| } | ||
|
|
||
| fn not_a_sink_raw_literal() -> Option<bool> { | ||
| let username = std::env::var("USER").unwrap_or("".to_string()); | ||
| let re = Regex::new(r"literal string").unwrap(); | ||
| Some(re.is_match(&username)) | ||
| } | ||
|
|
||
| fn main() { | ||
| let hay = "a string"; | ||
| simple_bad(hay); | ||
| simple_good(hay); | ||
| not_a_sink_literal(); | ||
| not_a_sink_raw_literal(); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| qltest_cargo_check: true | ||
| qltest_dependencies: | ||
| - regex = { version = "1.11.1" } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This model is accurate, but I mostly added it to make sure that the barrier actually worked.