Crest Infosolutions Git Repository

Commit b666c93f authored by Nathan Brahms's avatar Nathan Brahms
Browse files

Make Semgrep work with subshell targets

E.g.

    semgrep -e 'a' --lang js <(echo 'a')
parent c3c48840
Showing with 106 additions and 70 deletions
+106 -70
......@@ -340,14 +340,14 @@ let cache_computation file cache_file_of_file f =
res)
else
let res = f () in
(try Common2.write_value (Version.version, file, res) file_cache
with Sys_error err ->
(* We must ignore SIGXFSZ to get this exception, see
* note "SIGXFSZ (file size limit exceeded)". *)
logger#error "Could not write cache file for %s (%s): %s" file
file_cache err;
(* Make sure we don't leave corrupt cache files behind us. *)
if Sys.file_exists file_cache then Sys.remove file_cache);
(try Common2.write_value (Version.version, file, res) file_cache with
| Sys_error err ->
(* We must ignore SIGXFSZ to get this exception, see
* note "SIGXFSZ (file size limit exceeded)". *)
logger#error "Could not write cache file for %s (%s): %s" file
file_cache err;
(* Make sure we don't leave corrupt cache files behind us. *)
if Sys.file_exists file_cache then Sys.remove file_cache);
res)
let cache_file_of_file filename =
......@@ -511,7 +511,8 @@ let parse_generic lang file =
* However this introduces some weird regressions in CI so we focus on
* just Timeout for now.
*)
with Main_timeout _ as e -> Right e)
with
| Main_timeout _ as e -> Right e)
in
match v with
| Left x -> x
......@@ -531,15 +532,16 @@ let parse_pattern lang_pattern str =
Parse_pattern.parse_pattern lang_pattern ~print_errors:false str
in
res)
with exn ->
raise
(Rule.InvalidPattern
( "no-id",
str,
Rule.L (lang_pattern, []),
Common.exn_to_s exn,
Parse_info.unsafe_fake_info "no loc",
[] ))
with
| exn ->
raise
(Rule.InvalidPattern
( "no-id",
str,
Rule.L (lang_pattern, []),
Common.exn_to_s exn,
Parse_info.unsafe_fake_info "no loc",
[] ))
[@@profiling]
(*****************************************************************************)
......@@ -615,7 +617,7 @@ let xlang_files_of_dirs_or_files xlang files_or_dirs =
* Anyway right now the Semgrep python wrapper is
* calling -config with an explicit list of files.
*)
(files_or_dirs, [])
(files_or_dirs, [], fun () -> ())
| R.L (lang, _) -> Find_target.files_of_dirs_or_files lang files_or_dirs
(*****************************************************************************)
......@@ -693,29 +695,33 @@ let semgrep_with_patterns lang (rules, rule_parse_time) files skipped =
pr s
let semgrep_with_patterns_file lang rules_file roots =
let targets, skipped = Find_target.files_of_dirs_or_files lang roots in
let targets, skipped, cleanup_hook =
Find_target.files_of_dirs_or_files lang roots
in
try
logger#info "Parsing %s" rules_file;
let timed_rules =
Common.with_time (fun () -> Parse_mini_rule.parse rules_file)
in
semgrep_with_patterns lang timed_rules targets skipped;
if !profile then save_rules_file_in_tmp ()
with exn ->
logger#debug "exn before exit %s" (Common.exn_to_s exn);
(* if !Flag.debug then save_rules_file_in_tmp (); *)
let res =
{
RP.matches = [];
errors = [ E.exn_to_error "" exn ];
skipped = [];
rule_profiling = None;
}
in
let json = JSON_report.match_results_of_matches_and_errors [] res in
let s = SJ.string_of_match_results json in
pr s;
exit 2
if !profile then save_rules_file_in_tmp ();
cleanup_hook ()
with
| exn ->
logger#debug "exn before exit %s" (Common.exn_to_s exn);
(* if !Flag.debug then save_rules_file_in_tmp (); *)
let res =
{
RP.matches = [];
errors = [ E.exn_to_error "" exn ];
skipped = [];
rule_profiling = None;
}
in
let json = JSON_report.match_results_of_matches_and_errors [] res in
let s = SJ.string_of_match_results json in
pr s;
exit 2
(*****************************************************************************)
(* Semgrep -config *)
......@@ -729,7 +735,9 @@ let semgrep_with_rules (rules, rule_parse_time) files_or_dirs =
* For now python wrapper passes down all files that should be scanned
*)
let xlang = R.xlang_of_string !lang in
let files, skipped = xlang_files_of_dirs_or_files xlang files_or_dirs in
let files, skipped, cleanup_hook =
xlang_files_of_dirs_or_files xlang files_or_dirs
in
logger#info "processing %d files, skipping %d files" (List.length files)
(List.length skipped);
......@@ -798,7 +806,8 @@ let semgrep_with_rules (rules, rule_parse_time) files_or_dirs =
| Text ->
(* the match has already been printed above. We just print errors here *)
(* pr (spf "number of errors: %d" (List.length errs)); *)
errors |> List.iter (fun err -> pr (E.string_of_error err))
errors |> List.iter (fun err -> pr (E.string_of_error err));
cleanup_hook ()
let semgrep_with_rules_file rules_file files_or_dirs =
try
......@@ -807,20 +816,21 @@ let semgrep_with_rules_file rules_file files_or_dirs =
Common.with_time (fun () -> Parse_rule.parse rules_file)
in
semgrep_with_rules timed_rules files_or_dirs
with exn when !output_format = Json ->
logger#debug "exn before exit %s" (Common.exn_to_s exn);
let res =
{
RP.matches = [];
errors = [ E.exn_to_error "" exn ];
skipped = [];
rule_profiling = None;
}
in
let json = JSON_report.match_results_of_matches_and_errors [] res in
let s = SJ.string_of_match_results json in
pr s;
exit 2
with
| exn when !output_format = Json ->
logger#debug "exn before exit %s" (Common.exn_to_s exn);
let res =
{
RP.matches = [];
errors = [ E.exn_to_error "" exn ];
skipped = [];
rule_profiling = None;
}
in
let json = JSON_report.match_results_of_matches_and_errors [] res in
let s = SJ.string_of_match_results json in
pr s;
exit 2
(*****************************************************************************)
(* Semgrep -e/-f *)
......@@ -866,7 +876,9 @@ let semgrep_with_one_pattern lang roots =
Common.with_time (fun () -> [ rule_of_pattern lang pattern_string pattern ])
in
let targets, skipped = Find_target.files_of_dirs_or_files lang roots in
let targets, skipped, cleanup_hook =
Find_target.files_of_dirs_or_files lang roots
in
match !output_format with
| Json ->
(* closer to -rules_file, but no incremental match output *)
......@@ -897,7 +909,8 @@ let semgrep_with_one_pattern lang roots =
let n = List.length !E.g_errors in
if n > 0 then pr2 (spf "error count: %d" n);
(* TODO: what's that? *)
Experiments.gen_layer_maybe _matching_tokens pattern_string targets
Experiments.gen_layer_maybe _matching_tokens pattern_string targets;
cleanup_hook ()
(*****************************************************************************)
(* Checker *)
......@@ -923,7 +936,8 @@ let validate_pattern () =
let lang = lang_of_string !lang in
let _ = parse_pattern lang s in
exit 0
with _exn -> exit 1
with
| _exn -> exit 1
(* See also Check_rule.check_files *)
......
......@@ -28,6 +28,8 @@ let min_whitespace_frequency = 0.07
*)
let min_line_frequency = 0.001
type cleanup_hook = unit -> unit
type whitespace_stat = {
sample_size : int;
(* size of the block; possibly the whole file *)
......@@ -160,6 +162,9 @@ let files_of_dirs_or_files ?(keep_root_files = true)
Sys.file_exists path && not (Sys.is_directory path))
else (roots, [])
in
let explicit_targets, copied_fifos =
Common.copy_fifos_to_temp explicit_targets
in
let paths = Common.files_of_dir_or_files_no_vcs_nofilter paths in
let paths, skipped1 = exclude_files_in_skip_lists paths in
let paths, skipped2 = Guess_lang.inspect_files lang paths in
......@@ -176,4 +181,5 @@ let files_of_dirs_or_files ?(keep_root_files = true)
(fun (a : Resp.skipped_target) b -> String.compare a.path b.path)
skipped
in
(sorted_paths, sorted_skipped)
let cleanup_hook () = List.iter Unix.unlink copied_fifos in
(sorted_paths, sorted_skipped, cleanup_hook)
......@@ -3,6 +3,8 @@
language.
*)
type cleanup_hook = unit -> unit
(*
Scan a list of folders or files recursively and return a list of files
in the requested language. This takes care of ignoring undesirable
......@@ -27,7 +29,9 @@ val files_of_dirs_or_files :
?sort_by_decr_size:bool ->
Lang.t ->
Common.path list ->
Common.filename list * Semgrep_core_response_t.skipped_target list
Common.filename list
* Semgrep_core_response_t.skipped_target list
* cleanup_hook
(*
Sort files by decreasing size. This is meant for optimizing
......
......@@ -123,7 +123,9 @@ let dump_tree_sitter_cst lang file =
let test_parse_tree_sitter lang root_paths =
let paths = List.map Common.fullpath root_paths in
let paths, _skipped_paths = Find_target.files_of_dirs_or_files lang paths in
let paths, _skipped_paths, cleanup_hook =
Find_target.files_of_dirs_or_files lang paths
in
let stat_list = ref [] in
paths
|> Console.progress (fun k ->
......@@ -173,13 +175,14 @@ let test_parse_tree_sitter lang root_paths =
(spf "lang %s not supported with tree-sitter"
(Lang.string_of_lang lang)));
PI.correct_stat file
with exn ->
pr2 (spf "%s: exn = %s" file (Common.exn_to_s exn));
PI.bad_stat file
with
| exn ->
pr2 (spf "%s: exn = %s" file (Common.exn_to_s exn));
PI.bad_stat file
in
Common.push stat stat_list));
Parse_info.print_parsing_stat_list !stat_list;
()
cleanup_hook ()
(*****************************************************************************)
(* Pfff and tree-sitter parsing *)
......@@ -234,7 +237,9 @@ let parsing_common ?(verbose = true) lang files_or_dirs =
(* = absolute paths *)
List.map Common.fullpath files_or_dirs
in
let paths, skipped = Find_target.files_of_dirs_or_files lang paths in
let paths, skipped, cleanup_hook =
Find_target.files_of_dirs_or_files lang paths
in
let stats =
paths
|> List.rev_map (fun file ->
......@@ -268,6 +273,7 @@ let parsing_common ?(verbose = true) lang files_or_dirs =
in
stat)
in
cleanup_hook ();
(stats, skipped)
(*
......@@ -430,7 +436,7 @@ let diff_pfff_tree_sitter xs =
(*****************************************************************************)
let test_parse_rules roots =
let targets, _skipped_paths =
let targets, _skipped_paths, cleanup_hook =
Find_target.files_of_dirs_or_files Lang.Yaml roots
in
targets
......@@ -438,4 +444,5 @@ let test_parse_rules roots =
logger#info "processing %s" file;
let _r = Parse_rule.parse file in
());
cleanup_hook ();
logger#info "done test_parse_rules"
Subproject commit 174c4b07d5556929f4bb44fbe9e5766da25f6c44
Subproject commit 933257b10cf2073b8fba8f30eb1167d94659973f
......@@ -92,7 +92,9 @@ class TargetManager:
globbing or by 'git ls-files' e.g. submodules), and files missing
the read permission.
"""
return TargetManager._is_valid_file_or_dir(path) and path.is_file()
return TargetManager._is_valid_file_or_dir(path) and (
path.is_file() or path.is_fifo()
)
@staticmethod
def _filter_valid_files(paths: FrozenSet[Path]) -> FrozenSet[Path]:
......@@ -299,7 +301,9 @@ class TargetManager:
files, directories = partition_set(lambda p: not p.is_dir(), targets)
# Error on non-existent files
explicit_files, nonexistent_files = partition_set(lambda p: p.is_file(), files)
explicit_files, nonexistent_files = partition_set(
lambda p: p.is_file() or p.is_fifo(), files
)
if nonexistent_files:
self.output_handler.handle_semgrep_error(
FilesNotFoundError(tuple(nonexistent_files))
......
......@@ -82,10 +82,11 @@ def partition(pred: Callable, iterable: Iterable) -> Tuple[List, List]:
return list(filter(pred, i1)), list(itertools.filterfalse(pred, i2))
def partition_set(pred: Callable, iterable: Iterable) -> Tuple[Set, Set]:
def partition_set(
pred: Callable[[T], bool], iterable: Iterable[T]
) -> Tuple[Set[T], Set[T]]:
"""E.g. partition(is_odd, range(10)) -> 1 3 5 7 9 and 0 2 4 6 8"""
i1, i2 = itertools.tee(iterable)
return set(filter(pred, i1)), set(itertools.filterfalse(pred, i2))
return {x for x in iterable if pred(x)}, {x for x in iterable if not pred(x)}
# cf. https://docs.python.org/3/library/itertools.html#itertools-recipes
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment