diff --git a/docs/content/manual/manual.yml b/docs/content/manual/manual.yml index d6746bc56a..9cdbbfe6ea 100644 --- a/docs/content/manual/manual.yml +++ b/docs/content/manual/manual.yml @@ -3271,6 +3271,13 @@ sections: builtin outputs its input in raw mode to stder with no additional decoration, not even a newline. + Additionally, jq has support for executing "external filters" provided by + other executables. This functionality is provided by `exec` and related + filters viz. `system`, but may not be available on all platforms, and does + not necessarily integrate well with other jq features. It is intended to + perform smaller processing that is otherwise impossible to perform in jq + itself. + Most jq builtins are referentially transparent, and yield constant and repeatable value streams when applied to constant inputs. This is not true of I/O builtins. @@ -3344,6 +3351,23 @@ sections: Returns the line number of the input currently being filtered. + - title: "`exec(path)`, `exec(path; [args…])`" + body: | + + Spawns a new process of path with arguments args. Pipes its input + converted to a string as the stdin of the process. Outputs an object + like `{'out': "", "err": "", "status": 0}`, containing the stdout, + stderr, and exit code of the process respectively. If the process + exits abnormally due to an unhandled signal, it will have a status of + "-1" and a "signal" parameter giving the numeric value of the signal + that caused the process to terminate. + + - title: "`system(path)`, `system(path; [args…])`" + body: | + + Exactly equivalent to an `exec` call with the same arguments, followed + by `| .out | rtrim`. + - title: 'Streaming' body: | diff --git a/jq.1.prebuilt b/jq.1.prebuilt index 7239e87d16..eab5355cd4 100644 --- a/jq.1.prebuilt +++ b/jq.1.prebuilt @@ -3652,6 +3652,9 @@ At this time jq has minimal support for I/O, mostly in the form of control over Two builtins provide minimal output capabilities, \fBdebug\fR, and \fBstderr\fR\. (Recall that a jq program\'s output values are always output as JSON texts on \fBstdout\fR\.) The \fBdebug\fR builtin can have application\-specific behavior, such as for executables that use the libjq C API but aren\'t the jq executable itself\. The \fBstderr\fR builtin outputs its input in raw mode to stder with no additional decoration, not even a newline\. . .P +Additionally, jq has support for executing "external filters" provided by other executables\. This functionality is provided by \fBexec\fR and related filters viz\. \fBsystem\fR, but may not be available on all platforms, and does not necessarily integrate well with other jq features\. It is intended to perform smaller processing that is otherwise impossible to perform in jq itself\. +. +.P Most jq builtins are referentially transparent, and yield constant and repeatable value streams when applied to constant inputs\. This is not true of I/O builtins\. . .SS "input" @@ -3744,6 +3747,12 @@ Returns the name of the file whose input is currently being filtered\. Note that .SS "input_line_number" Returns the line number of the input currently being filtered\. . +.SS "exec(path), exec(path; [args…])" +Spawns a new process of path with arguments args\. Pipes its input converted to a string as the stdin of the process\. Outputs an object like \fB{\'out\': "", "err": "", "status": 0}\fR, containing the stdout, stderr, and exit code of the process respectively\. If the process exits abnormally due to an unhandled signal, it will have a status of "\-1" and a "signal" parameter giving the numeric value of the signal that caused the process to terminate\. +. +.SS "system(path), system(path; [args…])" +Exactly equivalent to an \fBexec\fR call with the same arguments, followed by \fB| \.out | rtrim\fR\. +. .SH "STREAMING" With the \fB\-\-stream\fR option jq can parse input texts in a streaming fashion, allowing jq programs to start processing large JSON texts immediately rather than after the parse completes\. If you have a single JSON text that is 1GB in size, streaming it will allow you to process it much more quickly\. . diff --git a/src/builtin.c b/src/builtin.c index 7d21bfb111..ec75e22858 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -34,6 +34,11 @@ void *alloca (size_t); #include #ifdef WIN32 #include +#else +#include +#include +#include +#include #endif #include "builtin.h" #include "compile.h" @@ -1750,6 +1755,323 @@ static jv f_have_decnum(jq_state *jq, jv a) { #endif } +#ifdef WIN32 +static jv f_exec(jq_state *jq, jv input, jv path, jv args) { + jv_free(input), jv_free(path), jv_free(args); + return jv_invalid_with_msg(jv_string("exec not supported on this platform")); +} +#else +static jv f_exec(jq_state *jq, jv input, jv path, jv args) { + int ret = 0; + + /* argument validation */ + if (jv_get_kind(path) != JV_KIND_STRING) { + jv_free(input), jv_free(path), jv_free(args); + return type_error(path, "exec/2 requires a string path"); + } + + // extract args into const char ** on the stack + if (jv_get_kind(args) != JV_KIND_ARRAY) { + jv_free(input), jv_free(path), jv_free(args); + return type_error(args, "exec/2 requires an array of arguments"); + } + + // validate args array before using it to avoid having to clean up + // a partially populated argv + jv_array_foreach(args, i, s) { + if (jv_get_kind(s) != JV_KIND_STRING) ret++; + jv_free(s); + } + if (ret) { + jv_free(input), jv_free(path), jv_free(args); + return type_error(args, "exec/2 only supports string arguments"); + } + + const size_t argc = jv_array_length(jv_copy(args)) + 1; + char * argv[argc + 1]; + jv_array_foreach(args, i, s) { + argv[i + 1] = jv_mem_strdup(jv_string_value(s)); + jv_free(s); + } + argv[0] = jv_mem_strdup(jv_string_value(path)); + argv[argc] = 0; + jv_free(path); + + /* setting up pipes */ + int fin[2] = {0, 0}, fout[2] = {0, 0}, ferr[2] = {0, 0}; + posix_spawn_file_actions_t fda; + if ((ret = posix_spawn_file_actions_init(&fda))) { + jv_free(args), jv_free(input); + return jv_invalid_with_msg(jv_string("exec/2 could not initialize fd actions")); + } + + /** stdin **/ + if ((ret = pipe(fin))) { + jv_free(args), jv_free(input); + switch (errno) { + case EMFILE: + return jv_invalid_with_msg(jv_string("exec/2 couldn't pipe() because of a per-process limit")); + case ENFILE: + return jv_invalid_with_msg(jv_string("exec/2 couldn't pipe() because of a system-wide limit")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't pipe()")); + } + } + if ((ret = posix_spawn_file_actions_addclose(&fda, fin[1]))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + switch (errno) { + case EBADF: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close an invalid file descriptor")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 tried to operate on an invalid file_actions object")); + case ENOMEM: + return jv_invalid_with_msg(jv_string("exec/2 ran out of memory while instructing the process to close a file descriptor")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close a file descriptor")); + } + } + if ((ret = posix_spawn_file_actions_adddup2(&fda, fin[0], 0))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + switch (errno) { + case EBADF: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to dup2 an invalid file descriptor")); + case ENOMEM: + return jv_invalid_with_msg(jv_string("exec/2 ran out of memory while instructing the process to dup2 a file descriptor")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 tried to operate on an invalid file_actions object")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to dup2 a file descriptor")); + } + } + if ((ret = posix_spawn_file_actions_addclose(&fda, fin[0]))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + switch (errno) { + case EBADF: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close an invalid file descriptor")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 tried to operate on an invalid file_actions object")); + case ENOMEM: + return jv_invalid_with_msg(jv_string("exec/2 ran out of memory while instructing the process to close a file descriptor")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close a file descriptor")); + } + } + + /** stdout **/ + if ((ret = pipe(fout))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + switch (errno) { + case EMFILE: + return jv_invalid_with_msg(jv_string("exec/2 couldn't pipe() because of a per-process limit")); + case ENFILE: + return jv_invalid_with_msg(jv_string("exec/2 couldn't pipe() because of a system-wide limit")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't pipe()")); + } + } + if ((ret = posix_spawn_file_actions_addclose(&fda, fout[0]))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + close(fout[0]), close(fout[1]); + switch (errno) { + case EBADF: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close an invalid file descriptor")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 tried to operate on an invalid file_actions object")); + case ENOMEM: + return jv_invalid_with_msg(jv_string("exec/2 ran out of memory while instructing the process to close a file descriptor")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close a file descriptor")); + } + } + if ((ret = posix_spawn_file_actions_adddup2(&fda, fout[1], 1))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + close(fout[0]), close(fout[1]); + switch (errno) { + case EBADF: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to dup2 an invalid file descriptor")); + case ENOMEM: + return jv_invalid_with_msg(jv_string("exec/2 ran out of memory while instructing the process to dup2 a file descriptor")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 tried to operate on an invalid file_actions object")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to dup2 a file descriptor")); + } + } + if ((ret = posix_spawn_file_actions_addclose(&fda, fout[1]))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + close(fout[0]), close(fout[1]); + switch (errno) { + case EBADF: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close an invalid file descriptor")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 tried to operate on an invalid file_actions object")); + case ENOMEM: + return jv_invalid_with_msg(jv_string("exec/2 ran out of memory while instructing the process to close a file descriptor")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close a file descriptor")); + } + } + + /** stderr **/ + if ((ret = pipe(ferr))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + close(fout[0]), close(fout[1]); + switch (errno) { + case EMFILE: + return jv_invalid_with_msg(jv_string("exec/2 couldn't pipe() because of a per-process limit")); + case ENFILE: + return jv_invalid_with_msg(jv_string("exec/2 couldn't pipe() because of a system-wide limit")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't pipe()")); + } + } + if ((ret = posix_spawn_file_actions_addclose(&fda, ferr[0]))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + close(fout[0]), close(fout[1]); + close(ferr[0]), close(ferr[1]); + switch (errno) { + case EBADF: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close an invalid file descriptor")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 tried to operate on an invalid file_actions object")); + case ENOMEM: + return jv_invalid_with_msg(jv_string("exec/2 ran out of memory while instructing the process to close a file descriptor")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close a file descriptor")); + } + } + if ((ret = posix_spawn_file_actions_adddup2(&fda, ferr[1], 2))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + close(fout[0]), close(fout[1]); + close(ferr[0]), close(ferr[1]); + switch (errno) { + case EBADF: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to dup2 an invalid file descriptor")); + case ENOMEM: + return jv_invalid_with_msg(jv_string("exec/2 ran out of memory while instructing the process to dup2 a file descriptor")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 tried to operate on an invalid file_actions object")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to dup2 a file descriptor")); + } + } + if ((ret = posix_spawn_file_actions_addclose(&fda, ferr[1]))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + close(fout[0]), close(fout[1]); + close(ferr[0]), close(ferr[1]); + switch (errno) { + case EBADF: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close an invalid file descriptor")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 tried to operate on an invalid file_actions object")); + case ENOMEM: + return jv_invalid_with_msg(jv_string("exec/2 ran out of memory while instructing the process to close a file descriptor")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close a file descriptor")); + } + } + + /* execute */ + pid_t pid; + // NOTE: the warning on argv should be fine, posix_spawnp doesn't mutate those to my knowledge + if (posix_spawnp(&pid, argv[0], &fda, + NULL, argv, NULL)) { + close(fin[0]), close(fin[1]); + close(fout[0]), close(fout[1]); + close(ferr[0]), close(ferr[1]); + jv_free(input); + switch (errno) { + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 failed to run posix_spawn due to an invalid file_actions object")); + default: + return jv_invalid_with_msg(jv_string("exec/2 failed to run posix_spawn")); + } + } + for (size_t i = 0; i < argc; i++) { + jv_mem_free(argv[i]); + } + close(fin[0]), close(fout[1]), close(ferr[1]); + jv_free(args); + if ((ret = posix_spawn_file_actions_destroy(&fda))) { + // NOTE: posix_spawn_file_actions_destroy isn't checked for errors + // because it's non-fatal + } + + /* send and receive data */ + switch (jv_get_kind(input)) { + case JV_KIND_INVALID: + case JV_KIND_NULL: + break; // do not pipe invalid / null + case JV_KIND_STRING: + // NOTE: write isn't checked for errors because it's non-fatal + write(fin[1], jv_string_value(input), jv_string_length_bytes(jv_copy(input))); + break; + default: { + jv s = jv_dump_string(jv_copy(input), 0); + // NOTE: write isn't checked for errors because it's non-fatal + write(fin[1], jv_string_value(s), jv_string_length_bytes(jv_copy(s))); + jv_free(s); + break; + } + } + close(fin[1]); + jv_free(input); + + static const size_t bufsize = 1024; + jv sout = jv_string_empty(0), + serr = jv_string_empty(0); + char *buf = jv_mem_alloc(bufsize); + ssize_t bytes; + while ((bytes = read(fout[0], buf, bufsize)) > 0) { + sout = jv_string_append_buf(sout, buf, bytes); + } + // NOTE: if we want to check the read for failures, it'd be done here + while ((bytes = read(ferr[0], buf, bufsize)) > 0) { + serr = jv_string_append_buf(serr, buf, bytes); + } + // NOTE: if we want to check the read for failures, it'd be done here + close(fout[0]), close(ferr[0]); + jv_mem_free(buf); + + if (waitpid(pid, &ret, 0) == -1) { + jv_free(sout), jv_free(serr); + switch (errno) { + case EINTR: + return jv_invalid_with_msg(jv_string("exec/2 was interrupted by a signal while waiting on the child process")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 passed invalid options to waitpid")); + // we do not expect ECHILD here, so it's a generic failure + case ECHILD: + default: + return jv_invalid_with_msg(jv_string("exec/2 failed in waitpid")); + } + } + + jv obj = jv_object(); + if (WIFEXITED(ret)) { + obj = jv_object_set(obj, jv_string("status"), jv_number(WEXITSTATUS(ret))); + } else { + // POSIX guarantees that this is WIFSIGNALED(ret) + obj = jv_object_set(obj, jv_string("status"), jv_number(-1)); + obj = jv_object_set(obj, jv_string("signal"), jv_number(WTERMSIG(ret))); + } + obj = jv_object_set(obj, jv_string("out"), sout); + obj = jv_object_set(obj, jv_string("err"), serr); + return obj; +} +#endif + #define LIBM_DD(name) \ {f_ ## name, #name, 1}, #define LIBM_DD_NO(name) LIBM_DD(name) @@ -1829,6 +2151,7 @@ BINOPS {f_current_line, "input_line_number", 1}, {f_have_decnum, "have_decnum", 1}, {f_have_decnum, "have_literal_numbers", 1}, + {f_exec, "exec", 3}, }; #undef LIBM_DDDD_NO #undef LIBM_DDD_NO diff --git a/src/builtin.jq b/src/builtin.jq index 802595bafd..904dfcd326 100644 --- a/src/builtin.jq +++ b/src/builtin.jq @@ -278,3 +278,7 @@ def JOIN($idx; stream; idx_expr; join_expr): stream | [., $idx[idx_expr]] | join_expr; def IN(s): any(s == .; .); def IN(src; s): any(src == s; .); + +def exec(path): exec(path; []); +def system(path; args): exec(path; args) | .out | rtrim ; +def system(path): system(path; []); diff --git a/tests/shtest b/tests/shtest index 03fbf665e5..5b5a7b7df2 100755 --- a/tests/shtest +++ b/tests/shtest @@ -689,4 +689,19 @@ $VALGRIND $Q $JQ . <<\NUM -10E-1000000001 NUM +# exec is tested by executing jq +if ! $msys && ! $mingw; then + now=$(date +%s) + if ! r=$($VALGRIND $Q $JQ -rn 'exec("'"$JQ_NO_B"'"; ["-rn", "'"$now"'"]) | .out | rtrim') || + [ "$r" != "$now" ] ; then + echo "exec didn't pipe stdout correctly: expected $now but got $r" + exit 1 + fi + if ! r=$(echo "$now" | $VALGRIND $Q $JQ -r 'system("'"$JQ_NO_B"'"; ["-r", "."])') || + [ "$r" != "$now" ]; then + echo "exec didn't pipe input correctly: expected $now but got $r" + exit 1 + fi +fi + exit 0