Skip to content

Commit

Permalink
refactor: move bsearch function to C-code
Browse files Browse the repository at this point in the history
This commit fixes issue #527 and move the bsearch function to a native
C-code.

The performance is a bit better:

Testing script:
```bash
clear
if [[ `uname` == Darwin ]]; then
    MAX_MEMORY_UNITS=KB
else
    MAX_MEMORY_UNITS=MB
fi

export TIMEFMT='%J   %U  user %S system %P cpu %*E total'$'\n'\
'avg shared (code):         %X KB'$'\n'\
'avg unshared (data/stack): %D KB'$'\n'\
'total (sum):               %K KB'$'\n'\
'max memory:                %M '$MAX_MEMORY_UNITS''$'\n'\
'page faults from disk:     %F'$'\n'\
'other page faults:         %R'

echo "JQ code bsearch"
time /usr/bin/jq -n '[range(30000000)] | bsearch(3000)'

echo "C code bsearch"
time ./jq -n '[range(30000000)] | bsearch(3000)'
````

Results:

```
JQ code bsearch
3000
/usr/bin/jq -n '[range(30000000)] | bsearch(3000)'   8.63s  user 0.77s system 98% cpu 9.542 total
avg shared (code):         0 KB
avg unshared (data/stack): 0 KB
total (sum):               0 KB
max memory:                823 MB
page faults from disk:     1
other page faults:         432828
C code bsearch
3000
./jq -n '[range(30000000)] | bsearch(3000)'   8.44s  user 0.74s system 99% cpu 9.249 total
avg shared (code):         0 KB
avg unshared (data/stack): 0 KB
total (sum):               0 KB
max memory:                824 MB
page faults from disk:     0
other page faults:         432766
```

The results may be better if we can use jvp_array_read, and there is no
need to copy/free the input array in each iteration. I guess that is
like that for API pourposes when the libjq is in use with multiple
threads in place.

Signed-off-by: Eloy Coto <eloy.coto@acalustra.com>
  • Loading branch information
eloycoto committed Nov 16, 2023
1 parent 6c03513 commit 29c1f4e
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 32 deletions.
59 changes: 58 additions & 1 deletion src/builtin.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#endif
#include <sys/time.h>
#include <stdlib.h>
#include <stdio.h>
#include <stddef.h>
#ifdef HAVE_ALLOCA_H
# include <alloca.h>
Expand Down Expand Up @@ -46,7 +47,6 @@ void *alloca (size_t);
#include "jv_private.h"
#include "util.h"


#define BINOP(name) \
static jv f_ ## name(jq_state *jq, jv input, jv a, jv b) { \
jv_free(input); \
Expand Down Expand Up @@ -807,6 +807,62 @@ static jv f_sort_by_impl(jq_state *jq, jv input, jv keys) {
}
}

/* Assuming the input array is sorted, bsearch/1 returns */
/* the index of the target if the target is in the input array; and otherwise */
/* (-1 - ix), where ix is the insertion point that would leave the array sorted. */
/* If the input is not sorted, bsearch will terminate but with irrelevant results. */
static jv f_bsearch(jq_state *jq, jv input, jv target) {
assert(jv_get_kind(input) == JV_KIND_ARRAY);
assert(jv_get_kind(target) == JV_KIND_NUMBER);
int len = jv_array_length(jv_copy(input));
if (len == 0) {
jv_free(input);
jv_free(target);
return jv_number(-1);
} else if (len == 1) {
int result = jv_cmp(target, jv_array_get(input, 0));
if (result == 0 ) {
return jv_number(0);
} else if (result > 0) {
return jv_number(-2);
} else {
return jv_number(-1);
}
}

int start = 0;
int end = len - 1;
jv answer = jv_null();
while (start <end) {
int mid = (start + end) / 2;
int result = jv_cmp(jv_copy(target), jv_array_get(jv_copy(input), mid));
if (result == 0) {
answer = jv_number(mid);
break;
} else if (start == end ) {
answer = jv_number(-1);
break;
} else if (result < 0 ) {
end = mid -1;
} else {
start = mid +1;
}
}
if (jv_get_kind(answer) == JV_KIND_NULL) {
int result = jv_cmp(target, jv_array_get(jv_copy(input), start));
if (result < 0) {
answer = jv_number(-1 - start);
}else {
answer = jv_number(-2 - start);
}
} else {
jv_free(target);
}

jv_free(input);
return answer;
}

static jv f_group_by_impl(jq_state *jq, jv input, jv keys) {
if (jv_get_kind(input) == JV_KIND_ARRAY &&
jv_get_kind(keys) == JV_KIND_ARRAY &&
Expand Down Expand Up @@ -1754,6 +1810,7 @@ BINOPS
{f_sort, "sort", 1},
{f_sort_by_impl, "_sort_by_impl", 2},
{f_group_by_impl, "_group_by_impl", 2},
{f_bsearch, "bsearch", 2},
{f_min, "min", 1},
{f_max, "max", 1},
{f_min_by_impl, "_min_by_impl", 2},
Expand Down
31 changes: 0 additions & 31 deletions src/builtin.jq
Original file line number Diff line number Diff line change
Expand Up @@ -213,37 +213,6 @@ def tostream:
getpath($p) |
reduce path(.[]?) as $q ([$p, .]; [$p+$q]);

# Assuming the input array is sorted, bsearch/1 returns
# the index of the target if the target is in the input array; and otherwise
# (-1 - ix), where ix is the insertion point that would leave the array sorted.
# If the input is not sorted, bsearch will terminate but with irrelevant results.
def bsearch($target):
if length == 0 then -1
elif length == 1 then
if $target == .[0] then 0 elif $target < .[0] then -1 else -2 end
else . as $in
# state variable: [start, end, answer]
# where start and end are the upper and lower offsets to use.
| [0, length-1, null]
| until( .[0] > .[1] ;
if .[2] != null then (.[1] = -1) # i.e. break
else
( ( (.[1] + .[0]) / 2 ) | floor ) as $mid
| $in[$mid] as $monkey
| if $monkey == $target then (.[2] = $mid) # success
elif .[0] == .[1] then (.[1] = -1) # failure
elif $monkey < $target then (.[0] = ($mid + 1))
else (.[1] = ($mid - 1))
end
end )
| if .[2] == null then # compute the insertion point
if $in[ .[0] ] < $target then (-2 -.[0])
else (-1 -.[0])
end
else .[2]
end
end;

# Apply f to composite entities recursively, and to atoms
def walk(f):
def w:
Expand Down

0 comments on commit 29c1f4e

Please sign in to comment.