Skip to content

Commit

Permalink
feat: add a max-rule-time flag, and use it in the rule execution
Browse files Browse the repository at this point in the history
  • Loading branch information
amaanq committed Nov 11, 2024
1 parent e5d55ae commit bb40982
Show file tree
Hide file tree
Showing 10 changed files with 198 additions and 13 deletions.
15 changes: 15 additions & 0 deletions crates/bins/src/bin/datadog-static-analyzer-git-hook.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,12 @@ fn main() -> Result<()> {
opts.optflag("t", "include-testing-rules", "include testing rules");
opts.optflag("", "secrets", "enable secrets detection (BETA)");
opts.optflag("", "static-analysis", "enable static-analysis");
opts.optopt(
"",
"max-rule-time",
"how long a rule can run before being killed, in milliseconds",
"1000",
);

let matches = match opts.parse(&args[1..]) {
Ok(m) => m,
Expand Down Expand Up @@ -329,10 +335,19 @@ fn main() -> Result<()> {
print_configuration(&configuration);
}

let timeout = matches
.opt_str("max-rule-time")
.map(|val| {
val.parse::<u64>()
.context("unable to parse `max-rule-time` flag as integer")
})
.transpose()?;

let analysis_options = AnalysisOptions {
log_output: true,
use_debug,
ignore_generated_files,
timeout,
};

if should_verify_checksum {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ fn test_rule(rule: &Rule, test: &RuleTest) -> Result<String> {
log_output: true,
use_debug: true,
ignore_generated_files: false,
timeout: None,
};
let rules = vec![rule_internal];
let analyze_result = analyze(
Expand Down
15 changes: 15 additions & 0 deletions crates/bins/src/bin/datadog-static-analyzer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,12 @@ fn main() -> Result<()> {
);
// TODO (JF): Remove this when releasing 0.3.8
opts.optflag("", "ddsa-runtime", "(deprecated)");
opts.optopt(
"",
"max-rule-time",
"how long a rule can run before being killed, in milliseconds",
"1000",
);

let matches = match opts.parse(&args[1..]) {
Ok(m) => m,
Expand Down Expand Up @@ -368,10 +374,19 @@ fn main() -> Result<()> {
if matches.opt_present("ddsa-runtime") {
println!("[WARNING] the --ddsa-runtime flag is deprecated and will be removed in the next version");
}
let timeout = matches
.opt_str("max-rule-time")
.map(|val| {
val.parse::<u64>()
.context("unable to parse `max-rule-time` flag as integer")
})
.transpose()?;

let analysis_options = AnalysisOptions {
log_output: true,
use_debug,
ignore_generated_files,
timeout,
};

if should_verify_checksum {
Expand Down
2 changes: 2 additions & 0 deletions crates/common/src/analysis_options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pub struct AnalysisOptions {
pub log_output: bool,
pub use_debug: bool,
pub ignore_generated_files: bool,
pub timeout: Option<u64>,
}

impl Default for AnalysisOptions {
Expand All @@ -14,6 +15,7 @@ impl Default for AnalysisOptions {
log_output: false,
use_debug: false,
ignore_generated_files: true,
timeout: None,
}
}
}
18 changes: 14 additions & 4 deletions crates/static-analysis-kernel/src/analysis/analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ use std::collections::HashMap;
use std::sync::Arc;
use std::time::{Duration, Instant};

/// The duration an individual execution of `v8` may run before it will be forcefully halted.
const JAVASCRIPT_EXECUTION_TIMEOUT: Duration = Duration::from_millis(5000);
/// The duration an individual execution of a rule may run before it will be forcefully halted.
/// This includes the time it takes for the tree-sitter query to collect its matches, as well as
/// the time it takes for the JavaScript rule to execute.
const RULE_EXECUTION_TIMEOUT: Duration = Duration::from_millis(2000);

thread_local! {
/// A thread-local `JsRuntime`
Expand Down Expand Up @@ -200,6 +202,12 @@ where
let tree = Arc::new(tree);
let cst_parsing_time = now.elapsed();

let timeout = if let Some(timeout) = analysis_option.timeout {
Some(Duration::from_millis(timeout))
} else {
Some(RULE_EXECUTION_TIMEOUT)
};

rules
.into_iter()
.filter(|rule| rule_config.rule_is_enabled(&rule.borrow().name))
Expand All @@ -215,7 +223,7 @@ where
filename,
rule,
&rule_config.get_arguments(&rule.name),
Some(JAVASCRIPT_EXECUTION_TIMEOUT),
timeout,
);

// NOTE: This is a translation layer to map Result<T, E> to a `RuleResult` struct.
Expand Down Expand Up @@ -258,7 +266,8 @@ where
Err(err) => {
let r_f = format!("{}:{}", rule.name, filename);
let (err_kind, execution_error) = match err {
DDSAJsRuntimeError::JavaScriptTimeout { timeout } => {
DDSAJsRuntimeError::JavaScriptTimeout { timeout }
| DDSAJsRuntimeError::TreeSitterTimeout { timeout } => {
if analysis_option.use_debug {
eprintln!(
"rule:file {} TIMED OUT ({} ms)",
Expand Down Expand Up @@ -1216,6 +1225,7 @@ function visit(node, filename, code) {
log_output: true,
use_debug: false,
ignore_generated_files: false,
timeout: None,
};
let rule_config_provider = RuleConfigProvider::from_config(
&parse_config_file(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ const ghi = 'hello' + ' world';
let query = TSQuery::try_new(&tree.language(), query).unwrap();
let matches = query
.cursor()
.matches(tree.root_node(), text)
.matches(tree.root_node(), text, None)
.collect::<Vec<_>>();
assert!(query_match_bridge.is_empty());
assert!(ts_node_bridge.is_empty());
Expand All @@ -152,7 +152,7 @@ const alpha = 'bravo';
let tree = get_tree(text, &Language::JavaScript).unwrap();
let matches = query
.cursor()
.matches(tree.root_node(), text)
.matches(tree.root_node(), text, None)
.collect::<Vec<_>>();
query_match_bridge.set_data(scope, matches, &mut ts_node_bridge);
assert_eq!(get_node_id_at_idx(&query_match_bridge, 0), 3);
Expand Down
2 changes: 2 additions & 0 deletions crates/static-analysis-kernel/src/analysis/ddsa_lib/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ pub type NodeId = u32;
pub enum DDSAJsRuntimeError {
#[error("{error}")]
Execution { error: JsError },
#[error("Tree-sitter query execution timeout")]
TreeSitterTimeout { timeout: Duration },
#[error("JavaScript execution timeout")]
JavaScriptTimeout { timeout: Duration },
#[error("expected `{name}` to exist within the v8 context")]
Expand Down
107 changes: 101 additions & 6 deletions crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ impl JsRuntime {
file_name: &Arc<str>,
rule: &RuleInternal,
rule_arguments: &HashMap<String, String>,
timeout: Option<Duration>,
mut timeout: Option<Duration>,
) -> Result<ExecutionResult, DDSAJsRuntimeError> {
let script_cache = Rc::clone(&self.script_cache);
let mut script_cache_ref = script_cache.borrow_mut();
Expand All @@ -162,11 +162,24 @@ impl JsRuntime {
let mut ts_qc = ts_query_cursor.borrow_mut();
let mut query_cursor = rule.tree_sitter_query.with_cursor(&mut ts_qc);
let query_matches = query_cursor
.matches(source_tree.root_node(), source_text.as_ref())
.matches(source_tree.root_node(), source_text.as_ref(), timeout)
.filter(|captures| !captures.is_empty())
.collect::<Vec<_>>();

let ts_query_time = now.elapsed();

// It's possible that the TS query took about as long as the timeout itself, and since
// we compute the time just a little bit before matches are run, `ts_query_time` could be
// larger than the timeout. In this case, we assume that execution timed out.
// Otherwise, we pass the remaining time left to the rule execution.
timeout = timeout.map(|t| t.checked_sub(ts_query_time).unwrap_or_default());
if timeout == Some(Duration::ZERO) {
return Err(DDSAJsRuntimeError::TreeSitterTimeout {
timeout: timeout
.expect("timeout should exist if we had tree-sitter query execution timeout"),
});
}

let now = Instant::now();

let js_violations = self.execute_rule_internal(
Expand Down Expand Up @@ -612,7 +625,7 @@ mod tests {

let mut curs = ts_query.cursor();
let q_matches = curs
.matches(tree.root_node(), source_text.as_ref())
.matches(tree.root_node(), source_text.as_ref(), None)
.collect::<Vec<_>>();
runtime.execute_rule_internal(
source_text,
Expand All @@ -633,7 +646,7 @@ mod tests {
filename: &str,
ts_query: &str,
rule_code: &str,
timeout: Option<Duration>,
mut timeout: Option<Duration>,
) -> Result<Vec<js::Violation<Instance>>, DDSAJsRuntimeError> {
let source_text: Arc<str> = Arc::from(source_text);
let filename: Arc<str> = Arc::from(filename);
Expand All @@ -646,10 +659,24 @@ mod tests {

let ts_query = crate::analysis::tree_sitter::TSQuery::try_new(&ts_lang, ts_query).unwrap();

let now = Instant::now();
let mut curs = ts_query.cursor();
let q_matches = curs
.matches(tree.root_node(), source_text.as_ref())
.matches(tree.root_node(), source_text.as_ref(), timeout)
.collect::<Vec<_>>();
let ts_query_time = now.elapsed();

// It's possible that the TS query took about as long as the timeout itself, and since
// we compute the time just a little bit before matches are run, `ts_query_time` could be
// larger than the timeout. In this case, we assume that execution timed out.
// Otherwise, we pass the remaining time left to the rule execution.
timeout = timeout.map(|t| t.checked_sub(ts_query_time).unwrap_or_default());
if timeout == Some(Duration::ZERO) {
return Err(DDSAJsRuntimeError::TreeSitterTimeout {
timeout: timeout
.expect("timeout should exist if we had tree-sitter query execution timeout"),
});
}

runtime.execute_rule_internal(
&source_text,
Expand Down Expand Up @@ -831,6 +858,74 @@ function visit(captures) {
.contains("ReferenceError: abc is not defined"));
}

#[test]
fn query_execute_timeout() {
let mut runtime = JsRuntime::try_new().unwrap();
let timeout = Duration::from_millis(1000);
let code = "function foo() { const baz = 1; }".repeat(100000);
let filename = "some_filename.js";
// This query is expensive, because it's trying to check three items in succession.
// Because they do not have to be strictly after each other, it will try every single
// combination of the 1000 foo's with each other, so this query has O(n^3) time complexity by nature.
let ts_query = r#"
(
(function_declaration
body: (statement_block
(lexical_declaration
(variable_declarator
name: (identifier)
value: (number)
)
)
)
) @foo
(function_declaration
body: (statement_block
(lexical_declaration
(variable_declarator
name: (identifier)
value: (number)
)
)
)
) @foo
(function_declaration
body: (statement_block
(lexical_declaration
(variable_declarator
name: (identifier)
value: (number)
)
)
)
) @foo
)"#;
let rule_code = r#"
function visit(captures) {
const node = captures.get("foo");
const error = buildError(
node.start.line,
node.start.col,
node.end.line,
node.end.col,
"Function `foo` is too long"
);
addError(error);
}
"#;
let err = shorthand_execute_rule_internal(
&mut runtime,
&code,
filename,
ts_query,
rule_code,
Some(timeout),
)
.expect_err("Expected a timeout error");

assert!(matches!(err, DDSAJsRuntimeError::TreeSitterTimeout { .. }));
}

/// `scoped_execute` can terminate JavaScript execution that goes on for too long.
#[test]
fn scoped_execute_timeout() {
Expand Down Expand Up @@ -1026,7 +1121,7 @@ function visit(captures) {
let ts_query = TSQuery::try_new(&ts_lang, ts_query).unwrap();
let captures = ts_query
.cursor()
.matches(tree.root_node(), text.as_ref())
.matches(tree.root_node(), text.as_ref(), None)
.filter(|captures| !captures.is_empty())
.collect::<Vec<_>>();
let _ = rt.execute_rule_internal(
Expand Down
5 changes: 4 additions & 1 deletion crates/static-analysis-kernel/src/analysis/tree_sitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use common::model::position::Position;
use indexmap::IndexMap;
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use streaming_iterator::StreamingIterator;
use tree_sitter::CaptureQuantifier;

Expand Down Expand Up @@ -171,11 +172,13 @@ impl<'a, 'tree> TSQueryCursor<'a, 'tree> {
&'a mut self,
node: tree_sitter::Node<'tree>,
text: &'tree str,
timeout: Option<Duration>,
) -> impl Iterator<Item = QueryMatch<tree_sitter::Node<'tree>>> + 'a {
let cursor = match &mut self.cursor {
MaybeOwnedMut::Borrowed(cursor) => cursor,
MaybeOwnedMut::Owned(cursor) => cursor,
};
cursor.set_timeout_micros(timeout.map(|t| t.as_micros()).unwrap_or_default() as u64);
let matches = cursor.matches(self.query, node, text.as_bytes());
matches.map_deref(|q_match| {
for capture in q_match.captures {
Expand Down Expand Up @@ -277,7 +280,7 @@ pub fn get_query_nodes(
) -> Vec<MatchNode> {
let mut match_nodes: Vec<MatchNode> = vec![];

for query_match in query.cursor().matches(tree.root_node(), code) {
for query_match in query.cursor().matches(tree.root_node(), code, None) {
let mut captures: HashMap<String, TreeSitterNode> = HashMap::new();
let mut captures_list: HashMap<String, Vec<TreeSitterNode>> = HashMap::new();
for capture in query_match {
Expand Down
Loading

0 comments on commit bb40982

Please sign in to comment.