From e0e835db3bf9652beb305d3a84d428a5c0e689e3 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Mon, 11 Nov 2024 14:59:40 -0500 Subject: [PATCH] feat: add a `max-rule-time` flag, and use it in the rule execution --- .../bin/datadog-static-analyzer-git-hook.rs | 15 +++ .../datadog-static-analyzer-test-ruleset.rs | 1 + .../bins/src/bin/datadog-static-analyzer.rs | 15 +++ crates/common/src/analysis_options.rs | 2 + .../src/analysis/analyze.rs | 18 +++- .../analysis/ddsa_lib/bridge/query_match.rs | 4 +- .../src/analysis/ddsa_lib/common.rs | 2 + .../src/analysis/ddsa_lib/runtime.rs | 97 +++++++++++++++++-- .../src/analysis/tree_sitter.rs | 5 +- crates/static-analysis-server/src/request.rs | 42 ++++++++ 10 files changed, 188 insertions(+), 13 deletions(-) diff --git a/crates/bins/src/bin/datadog-static-analyzer-git-hook.rs b/crates/bins/src/bin/datadog-static-analyzer-git-hook.rs index 21c798c7..0fffa217 100644 --- a/crates/bins/src/bin/datadog-static-analyzer-git-hook.rs +++ b/crates/bins/src/bin/datadog-static-analyzer-git-hook.rs @@ -159,6 +159,12 @@ fn main() -> Result<()> { opts.optflag("t", "include-testing-rules", "include testing rules"); opts.optflag("", "secrets", "enable secrets detection (BETA)"); opts.optflag("", "static-analysis", "enable static-analysis"); + opts.optopt( + "", + "max-rule-time", + "how long a rule can run before being killed, in milliseconds", + "1000", + ); let matches = match opts.parse(&args[1..]) { Ok(m) => m, @@ -329,10 +335,19 @@ fn main() -> Result<()> { print_configuration(&configuration); } + let timeout = matches + .opt_str("max-rule-time") + .map(|val| { + val.parse::() + .context("unable to parse `max-rule-time` flag as integer") + }) + .transpose()?; + let analysis_options = AnalysisOptions { log_output: true, use_debug, ignore_generated_files, + timeout, }; if should_verify_checksum { diff --git a/crates/bins/src/bin/datadog-static-analyzer-test-ruleset.rs b/crates/bins/src/bin/datadog-static-analyzer-test-ruleset.rs index f5d7e9da..aeb4f307 100644 --- a/crates/bins/src/bin/datadog-static-analyzer-test-ruleset.rs +++ b/crates/bins/src/bin/datadog-static-analyzer-test-ruleset.rs @@ -25,6 +25,7 @@ fn test_rule(rule: &Rule, test: &RuleTest) -> Result { log_output: true, use_debug: true, ignore_generated_files: false, + timeout: None, }; let rules = vec![rule_internal]; let analyze_result = analyze( diff --git a/crates/bins/src/bin/datadog-static-analyzer.rs b/crates/bins/src/bin/datadog-static-analyzer.rs index ab1ee46e..2e11b5a6 100644 --- a/crates/bins/src/bin/datadog-static-analyzer.rs +++ b/crates/bins/src/bin/datadog-static-analyzer.rs @@ -131,6 +131,12 @@ fn main() -> Result<()> { ); // TODO (JF): Remove this when releasing 0.3.8 opts.optflag("", "ddsa-runtime", "(deprecated)"); + opts.optopt( + "", + "max-rule-time", + "how long a rule can run before being killed, in milliseconds", + "1000", + ); let matches = match opts.parse(&args[1..]) { Ok(m) => m, @@ -368,10 +374,19 @@ fn main() -> Result<()> { if matches.opt_present("ddsa-runtime") { println!("[WARNING] the --ddsa-runtime flag is deprecated and will be removed in the next version"); } + let timeout = matches + .opt_str("max-rule-time") + .map(|val| { + val.parse::() + .context("unable to parse `max-rule-time` flag as integer") + }) + .transpose()?; + let analysis_options = AnalysisOptions { log_output: true, use_debug, ignore_generated_files, + timeout, }; if should_verify_checksum { diff --git a/crates/common/src/analysis_options.rs b/crates/common/src/analysis_options.rs index 4b99a9bb..0ec139b1 100644 --- a/crates/common/src/analysis_options.rs +++ b/crates/common/src/analysis_options.rs @@ -6,6 +6,7 @@ pub struct AnalysisOptions { pub log_output: bool, pub use_debug: bool, pub ignore_generated_files: bool, + pub timeout: Option, } impl Default for AnalysisOptions { @@ -14,6 +15,7 @@ impl Default for AnalysisOptions { log_output: false, use_debug: false, ignore_generated_files: true, + timeout: None, } } } diff --git a/crates/static-analysis-kernel/src/analysis/analyze.rs b/crates/static-analysis-kernel/src/analysis/analyze.rs index 061ab8ab..5fd1633b 100644 --- a/crates/static-analysis-kernel/src/analysis/analyze.rs +++ b/crates/static-analysis-kernel/src/analysis/analyze.rs @@ -16,8 +16,10 @@ use std::collections::HashMap; use std::sync::Arc; use std::time::{Duration, Instant}; -/// The duration an individual execution of `v8` may run before it will be forcefully halted. -const JAVASCRIPT_EXECUTION_TIMEOUT: Duration = Duration::from_millis(5000); +/// The duration an individual execution of a rule may run before it will be forcefully halted. +/// This includes the time it takes for the tree-sitter query to collect its matches, as well as +/// the time it takes for the JavaScript rule to execute. +const RULE_EXECUTION_TIMEOUT: Duration = Duration::from_millis(2000); thread_local! { /// A thread-local `JsRuntime` @@ -200,6 +202,12 @@ where let tree = Arc::new(tree); let cst_parsing_time = now.elapsed(); + let timeout = if let Some(timeout) = analysis_option.timeout { + Some(Duration::from_millis(timeout)) + } else { + Some(RULE_EXECUTION_TIMEOUT) + }; + rules .into_iter() .filter(|rule| rule_config.rule_is_enabled(&rule.borrow().name)) @@ -215,7 +223,7 @@ where filename, rule, &rule_config.get_arguments(&rule.name), - Some(JAVASCRIPT_EXECUTION_TIMEOUT), + timeout, ); // NOTE: This is a translation layer to map Result to a `RuleResult` struct. @@ -258,7 +266,8 @@ where Err(err) => { let r_f = format!("{}:{}", rule.name, filename); let (err_kind, execution_error) = match err { - DDSAJsRuntimeError::JavaScriptTimeout { timeout } => { + DDSAJsRuntimeError::JavaScriptTimeout { timeout } + | DDSAJsRuntimeError::TreeSitterTimeout { timeout } => { if analysis_option.use_debug { eprintln!( "rule:file {} TIMED OUT ({} ms)", @@ -1216,6 +1225,7 @@ function visit(node, filename, code) { log_output: true, use_debug: false, ignore_generated_files: false, + timeout: None, }; let rule_config_provider = RuleConfigProvider::from_config( &parse_config_file( diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/query_match.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/query_match.rs index b7ca41bf..ce443d86 100644 --- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/query_match.rs +++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/bridge/query_match.rs @@ -126,7 +126,7 @@ const ghi = 'hello' + ' world'; let query = TSQuery::try_new(&tree.language(), query).unwrap(); let matches = query .cursor() - .matches(tree.root_node(), text) + .matches(tree.root_node(), text, None) .collect::>(); assert!(query_match_bridge.is_empty()); assert!(ts_node_bridge.is_empty()); @@ -152,7 +152,7 @@ const alpha = 'bravo'; let tree = get_tree(text, &Language::JavaScript).unwrap(); let matches = query .cursor() - .matches(tree.root_node(), text) + .matches(tree.root_node(), text, None) .collect::>(); query_match_bridge.set_data(scope, matches, &mut ts_node_bridge); assert_eq!(get_node_id_at_idx(&query_match_bridge, 0), 3); diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/common.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/common.rs index d5c0dd25..9c96a961 100644 --- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/common.rs +++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/common.rs @@ -15,6 +15,8 @@ pub type NodeId = u32; pub enum DDSAJsRuntimeError { #[error("{error}")] Execution { error: JsError }, + #[error("Tree-sitter query execution timeout")] + TreeSitterTimeout { timeout: Duration }, #[error("JavaScript execution timeout")] JavaScriptTimeout { timeout: Duration }, #[error("expected `{name}` to exist within the v8 context")] diff --git a/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs b/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs index a4b50d73..dd3a23ad 100644 --- a/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs +++ b/crates/static-analysis-kernel/src/analysis/ddsa_lib/runtime.rs @@ -143,7 +143,7 @@ impl JsRuntime { file_name: &Arc, rule: &RuleInternal, rule_arguments: &HashMap, - timeout: Option, + mut timeout: Option, ) -> Result { let script_cache = Rc::clone(&self.script_cache); let mut script_cache_ref = script_cache.borrow_mut(); @@ -162,11 +162,24 @@ impl JsRuntime { let mut ts_qc = ts_query_cursor.borrow_mut(); let mut query_cursor = rule.tree_sitter_query.with_cursor(&mut ts_qc); let query_matches = query_cursor - .matches(source_tree.root_node(), source_text.as_ref()) + .matches(source_tree.root_node(), source_text.as_ref(), timeout) .filter(|captures| !captures.is_empty()) .collect::>(); let ts_query_time = now.elapsed(); + + // It's possible that the TS query took about as long as the timeout itself, and since + // we compute the time just a little bit before matches are run, `ts_query_time` could be + // larger than the timeout. In this case, we assume that execution timed out. + // Otherwise, we pass the remaining time left to the rule execution. + timeout = timeout.map(|t| t.checked_sub(ts_query_time).unwrap_or_default()); + if timeout == Some(Duration::ZERO) { + return Err(DDSAJsRuntimeError::TreeSitterTimeout { + timeout: timeout + .expect("timeout should exist if we had tree-sitter query execution timeout"), + }); + } + let now = Instant::now(); let js_violations = self.execute_rule_internal( @@ -612,7 +625,7 @@ mod tests { let mut curs = ts_query.cursor(); let q_matches = curs - .matches(tree.root_node(), source_text.as_ref()) + .matches(tree.root_node(), source_text.as_ref(), None) .collect::>(); runtime.execute_rule_internal( source_text, @@ -633,7 +646,7 @@ mod tests { filename: &str, ts_query: &str, rule_code: &str, - timeout: Option, + mut timeout: Option, ) -> Result>, DDSAJsRuntimeError> { let source_text: Arc = Arc::from(source_text); let filename: Arc = Arc::from(filename); @@ -646,10 +659,24 @@ mod tests { let ts_query = crate::analysis::tree_sitter::TSQuery::try_new(&ts_lang, ts_query).unwrap(); + let now = Instant::now(); let mut curs = ts_query.cursor(); let q_matches = curs - .matches(tree.root_node(), source_text.as_ref()) + .matches(tree.root_node(), source_text.as_ref(), timeout) .collect::>(); + let ts_query_time = now.elapsed(); + + // It's possible that the TS query took about as long as the timeout itself, and since + // we compute the time just a little bit before matches are run, `ts_query_time` could be + // larger than the timeout. In this case, we assume that execution timed out. + // Otherwise, we pass the remaining time left to the rule execution. + timeout = timeout.map(|t| t.checked_sub(ts_query_time).unwrap_or_default()); + if timeout == Some(Duration::ZERO) { + return Err(DDSAJsRuntimeError::TreeSitterTimeout { + timeout: timeout + .expect("timeout should exist if we had tree-sitter query execution timeout"), + }); + } runtime.execute_rule_internal( &source_text, @@ -831,6 +858,64 @@ function visit(captures) { .contains("ReferenceError: abc is not defined")); } + #[test] + fn query_execute_timeout() { + let mut runtime = JsRuntime::try_new().unwrap(); + let timeout = Duration::from_millis(100); + let code = "function foo() { const baz = 1; }".repeat(10000); + let filename = "some_filename.js"; + // This query is expensive, because it's trying to check two items in succession. + // Because they do not have to be strictly after each other, it will try every single + // combination of the 1000 foo's with each other, so this query is quadratic by nature. + let ts_query = r#" +( + (function_declaration + body: (statement_block + (lexical_declaration + (variable_declarator + name: (identifier) + value: (number) + ) + ) + ) + ) @foo + (function_declaration + body: (statement_block + (lexical_declaration + (variable_declarator + name: (identifier) + value: (number) + ) + ) + ) + ) @foo +)"#; + let rule_code = r#" +function visit(captures) { + const node = captures.get("foo"); + const error = buildError( + node.start.line, + node.start.col, + node.end.line, + node.end.col, + "Function `foo` is too long" + ); + addError(error); +} +"#; + let err = shorthand_execute_rule_internal( + &mut runtime, + &code, + filename, + ts_query, + rule_code, + Some(timeout), + ) + .expect_err("Expected a timeout error"); + + assert!(matches!(err, DDSAJsRuntimeError::TreeSitterTimeout { .. })); + } + /// `scoped_execute` can terminate JavaScript execution that goes on for too long. #[test] fn scoped_execute_timeout() { @@ -1026,7 +1111,7 @@ function visit(captures) { let ts_query = TSQuery::try_new(&ts_lang, ts_query).unwrap(); let captures = ts_query .cursor() - .matches(tree.root_node(), text.as_ref()) + .matches(tree.root_node(), text.as_ref(), None) .filter(|captures| !captures.is_empty()) .collect::>(); let _ = rt.execute_rule_internal( diff --git a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs index 9fa909d1..a92f2301 100644 --- a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs +++ b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs @@ -4,6 +4,7 @@ use common::model::position::Position; use indexmap::IndexMap; use std::collections::HashMap; use std::sync::Arc; +use std::time::Duration; use streaming_iterator::StreamingIterator; use tree_sitter::CaptureQuantifier; @@ -171,11 +172,13 @@ impl<'a, 'tree> TSQueryCursor<'a, 'tree> { &'a mut self, node: tree_sitter::Node<'tree>, text: &'tree str, + timeout: Option, ) -> impl Iterator>> + 'a { let cursor = match &mut self.cursor { MaybeOwnedMut::Borrowed(cursor) => cursor, MaybeOwnedMut::Owned(cursor) => cursor, }; + cursor.set_timeout_micros(timeout.map(|t| t.as_micros()).unwrap_or_default() as u64); let matches = cursor.matches(self.query, node, text.as_bytes()); matches.map_deref(|q_match| { for capture in q_match.captures { @@ -277,7 +280,7 @@ pub fn get_query_nodes( ) -> Vec { let mut match_nodes: Vec = vec![]; - for query_match in query.cursor().matches(tree.root_node(), code) { + for query_match in query.cursor().matches(tree.root_node(), code, None) { let mut captures: HashMap = HashMap::new(); let mut captures_list: HashMap> = HashMap::new(); for capture in query_match { diff --git a/crates/static-analysis-server/src/request.rs b/crates/static-analysis-server/src/request.rs index 50ee0b10..1fbfd067 100644 --- a/crates/static-analysis-server/src/request.rs +++ b/crates/static-analysis-server/src/request.rs @@ -203,6 +203,7 @@ pub fn process_analysis_request(request: AnalysisRequest) -> AnalysisResponse { .map(|o| o.log_output.unwrap_or(false)) .unwrap_or(false), ignore_generated_files: false, + timeout: None, }, ); @@ -234,6 +235,7 @@ pub fn process_analysis_request(request: AnalysisRequest) -> AnalysisResponse { mod tests { use crate::model::analysis_request::{AnalysisRequestOptions, ServerRule}; use kernel::model::{ + analysis::ERROR_RULE_TIMEOUT, common::Language, rule::{RuleCategory, RuleSeverity, RuleType}, }; @@ -922,4 +924,44 @@ rulesets: // We should've logged the `int_node` assert_eq!(response.rule_responses[0].output, Some("123".to_string())); } + + #[test] + fn test_query_execution_timeout() { + let base_rule = + ServerRule{ + name: "ruleset/rule-name".to_string(), + short_description_base64: None, + description_base64: None, + category: Some(RuleCategory::BestPractices), + severity: Some(RuleSeverity::Warning), + language: Language::JavaScript, + rule_type: RuleType::TreeSitterQuery, + entity_checked: None, + code_base64: "ZnVuY3Rpb24gdmlzaXQobm9kZSwgZmlsZW5hbWUsIGNvZGUpIHsKICAgIGNvbnN0IGNhcHR1cmVkID0gbm9kZS5jYXB0dXJlc1siaWRfbm9kZSJdOwoJY29uc29sZS5sb2coZ2V0Q29kZUZvck5vZGUoY2FwdHVyZWQsIGNvZGUpKTsKfQ==".to_string(), + checksum: Some("f7e512c599b80f91b3e483f40c63192156cc3ad8cf53efae87315d0db22755c4".to_string()), + pattern: None, + tree_sitter_query_base64: Some("KAogIChmdW5jdGlvbl9kZWNsYXJhdGlvbgogICAgYm9keTogKHN0YXRlbWVudF9ibG9jawogICAgICAobGV4aWNhbF9kZWNsYXJhdGlvbgogICAgICAgICh2YXJpYWJsZV9kZWNsYXJhdG9yCiAgICAgICAgICBuYW1lOiAoaWRlbnRpZmllcikKICAgICAgICAgIHZhbHVlOiAobnVtYmVyKQogICAgICAgICkKICAgICAgKQogICAgKQogICkgQGZvbwogIChmdW5jdGlvbl9kZWNsYXJhdGlvbgogICAgYm9keTogKHN0YXRlbWVudF9ibG9jawogICAgICAobGV4aWNhbF9kZWNsYXJhdGlvbgogICAgICAgICh2YXJpYWJsZV9kZWNsYXJhdG9yCiAgICAgICAgICBuYW1lOiAoaWRlbnRpZmllcikKICAgICAgICAgIHZhbHVlOiAobnVtYmVyKQogICAgICAgICkKICAgICAgKQogICAgKQogICkgQGZvbwop".to_string()), + arguments: vec![], + }; + + let request = AnalysisRequest { + filename: "myfile.js".to_string(), + language: Language::JavaScript, + file_encoding: "utf-8".to_string(), + code_base64: encode_base64_string("function foo() { const baz = 1; }=".repeat(10000)), + configuration_base64: None, + options: Some(AnalysisRequestOptions { + use_tree_sitter: None, + log_output: Some(true), + }), + rules: vec![base_rule.clone()], + }; + let response = process_analysis_request(request.clone()); + assert_eq!(response.rule_responses.len(), 1); + assert_eq!(response.rule_responses[0].errors.len(), 1); + assert_eq!( + response.rule_responses[0].errors[0], + ERROR_RULE_TIMEOUT.to_string() + ); + } }