Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement borrowing #3044

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions src/jq_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -490,4 +490,77 @@ static void jv_test() {
//jv_dump(jv_copy(o2), 0); printf("\n");
jv_free(o2);
}

/// Borrowing
{
jv d = jv_string("b");
jv a = JV_ARRAY(jv_string("a"), jv_borrow(d));
jv_free(d);

jv i = jv_invalid_with_msg(jv_borrow(d));
jv m = jv_invalid_get_msg(i);
assert(!jv_is_borrowed(m));
jv_free(m);

jv c = jv_array_get(jv_borrow(a), 1);
assert(!jv_is_borrowed(c));

jv c2 = jv_getpath(jv_borrow(a), JV_ARRAY(jv_number(1)));
assert(!jv_is_borrowed(c2));
assert(jv_cmp(jv_borrow(c), jv_borrow(c2)) == 0);
jv_free(c);
jv_free(c2);

jv b = jv_borrow(a);
assert(jv_is_borrowed(b));
jv_free(b);
jv_free(b);

assert(jv_equal(jv_copy(a), b));

jv e = jv_array_concat(b, JV_ARRAY(jv_number(5)));
assert(!jv_is_borrowed(e));
jv_free(e);

jv_array_length(b);
jv k = jv_keys(b);
assert(!jv_is_borrowed(k));
jv_free(k);

jv f = jv_array_append(b, jv_number(5));
assert(!jv_is_borrowed(f));
jv_free(f);

jv g = jv_array_append(jv_array(), b);
assert(!jv_is_borrowed(g));
jv_free(g);

jv a1 = jv_parse("{\"key\":\"value\"}");
jv p = jv_set(jv_borrow(a1), jv_string("key"), jv_string("other"));
assert(!jv_is_borrowed(p));
jv_free(p);

jv q = jv_get(jv_borrow(a1), jv_string("key"));
assert(!jv_is_borrowed(q));
jv_free(q);

jv_free(a1);

jv res = jv_array_get(b, 1);
assert(!jv_is_borrowed(res));
jv_free(res);

jv h = jv_setpath(b, JV_ARRAY(jv_number(1)), jv_string("some"));
assert(!jv_is_borrowed(h));
jv_free(h);


b = jv_unborrow(b);

jv_free(a);

assert(jv_get_refcnt(a) == 1);

jv_free(b);
}
}
63 changes: 37 additions & 26 deletions src/jv.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,10 @@ const char* jv_kind_name(jv_kind k) {
return "<unknown>";
}

const jv JV_NULL = {JVP_FLAGS_NULL, 0, 0, 0, {0}};
const jv JV_INVALID = {JVP_FLAGS_INVALID, 0, 0, 0, {0}};
const jv JV_FALSE = {JVP_FLAGS_FALSE, 0, 0, 0, {0}};
const jv JV_TRUE = {JVP_FLAGS_TRUE, 0, 0, 0, {0}};
const jv JV_NULL = {JVP_FLAGS_NULL, 0, 0, 0, 0, {0}};
const jv JV_INVALID = {JVP_FLAGS_INVALID, 0, 0, 0, 0, {0}};
const jv JV_FALSE = {JVP_FLAGS_FALSE, 0, 0, 0, 0, {0}};
const jv JV_TRUE = {JVP_FLAGS_TRUE, 0, 0, 0, 0, {0}};

jv jv_true() {
return JV_TRUE;
Expand Down Expand Up @@ -151,7 +151,7 @@ jv jv_invalid_with_msg(jv err) {
i->refcnt = JV_REFCNT_INIT;
i->errmsg = err;

jv x = {JVP_FLAGS_INVALID_MSG, 0, 0, 0, {&i->refcnt}};
jv x = {JVP_FLAGS_INVALID_MSG, 0, 0, 0, 0, {&i->refcnt}};
return x;
}

Expand All @@ -164,7 +164,7 @@ jv jv_invalid_get_msg(jv inv) {

jv x;
if (JVP_HAS_FLAGS(inv, JVP_FLAGS_INVALID_MSG)) {
x = jv_copy(((jvp_invalid*)inv.u.ptr)->errmsg);
x = jv_unborrow(((jvp_invalid*)inv.u.ptr)->errmsg);
MaxBrandtner marked this conversation as resolved.
Show resolved Hide resolved
}
else {
x = jv_null();
Expand Down Expand Up @@ -590,7 +590,7 @@ static jv jvp_literal_number_new(const char * literal) {
return JV_INVALID;
}

jv r = {JVP_FLAGS_NUMBER_LITERAL, 0, 0, JV_NUMBER_SIZE_INIT, {&n->refcnt}};
jv r = {JVP_FLAGS_NUMBER_LITERAL, 0, 0, 0, JV_NUMBER_SIZE_INIT, {&n->refcnt}};
return r;
}

Expand Down Expand Up @@ -674,7 +674,7 @@ jv jv_number(double x) {
#else
JV_KIND_NUMBER,
#endif
0, 0, 0, {.number = x}
0, 0, 0, 0, {.number = x}
};
return j;
}
Expand Down Expand Up @@ -806,7 +806,7 @@ static jvp_array* jvp_array_alloc(unsigned size) {
}

static jv jvp_array_new(unsigned size) {
jv r = {JVP_FLAGS_ARRAY, 0, 0, 0, {&jvp_array_alloc(size)->refcnt}};
jv r = {JVP_FLAGS_ARRAY, 0, 0, 0, 0, {&jvp_array_alloc(size)->refcnt}};
return r;
}

Expand Down Expand Up @@ -862,14 +862,14 @@ static jv* jvp_array_write(jv* a, int i) {
int j;
for (j = 0; j < jvp_array_length(*a); j++) {
new_array->elements[j] =
jv_copy(array->elements[j + jvp_array_offset(*a)]);
jv_unborrow(array->elements[j + jvp_array_offset(*a)]);
MaxBrandtner marked this conversation as resolved.
Show resolved Hide resolved
}
for (; j < new_length; j++) {
new_array->elements[j] = JV_NULL;
}
new_array->length = new_length;
jvp_array_free(*a);
jv new_jv = {JVP_FLAGS_ARRAY, 0, 0, new_length, {&new_array->refcnt}};
jv new_jv = {JVP_FLAGS_ARRAY, 0, 0, 0, new_length, {&new_array->refcnt}};
*a = new_jv;
return &new_array->elements[i];
}
Expand Down Expand Up @@ -974,7 +974,7 @@ jv jv_array_get(jv j, int idx) {
jv* slot = jvp_array_read(j, idx);
jv val;
if (slot) {
val = jv_copy(*slot);
val = jv_unborrow(*slot);
MaxBrandtner marked this conversation as resolved.
Show resolved Hide resolved
} else {
val = jv_invalid();
}
Expand All @@ -995,7 +995,12 @@ jv jv_array_set(jv j, int idx, jv val) {
// copy/free of val,j coalesced
jv* slot = jvp_array_write(&j, idx);
jv_free(*slot);
*slot = val;
if(jv_is_borrowed(val)){
*slot = jv_unborrow(val);
MaxBrandtner marked this conversation as resolved.
Show resolved Hide resolved
}else{
*slot = val;
}
if(jv_is_borrowed(j)) return jv_unborrow(j);
return j;
}

Expand All @@ -1013,6 +1018,7 @@ jv jv_array_concat(jv a, jv b) {
a = jv_array_append(a, elem);
}
jv_free(b);
if(jv_is_borrowed(a)) return jv_unborrow(a);
MaxBrandtner marked this conversation as resolved.
Show resolved Hide resolved
return a;
}

Expand Down Expand Up @@ -1091,7 +1097,7 @@ static jv jvp_string_copy_replace_bad(const char* data, uint32_t length) {
length = out - s->data;
s->data[length] = 0;
s->length_hashed = length << 1;
jv r = {JVP_FLAGS_STRING, 0, 0, 0, {&s->refcnt}};
jv r = {JVP_FLAGS_STRING, 0, 0, 0, 0, {&s->refcnt}};
return r;
}

Expand All @@ -1102,15 +1108,15 @@ static jv jvp_string_new(const char* data, uint32_t length) {
if (data != NULL)
memcpy(s->data, data, length);
s->data[length] = 0;
jv r = {JVP_FLAGS_STRING, 0, 0, 0, {&s->refcnt}};
jv r = {JVP_FLAGS_STRING, 0, 0, 0, 0, {&s->refcnt}};
return r;
}

static jv jvp_string_empty_new(uint32_t length) {
jvp_string* s = jvp_string_alloc(length);
s->length_hashed = 0;
memset(s->data, 0, length);
jv r = {JVP_FLAGS_STRING, 0, 0, 0, {&s->refcnt}};
jv r = {JVP_FLAGS_STRING, 0, 0, 0, 0, {&s->refcnt}};
return r;
}

Expand Down Expand Up @@ -1153,7 +1159,7 @@ static jv jvp_string_append(jv string, const char* data, uint32_t len) {
memcpy(news->data + currlen, data, len);
news->data[currlen + len] = 0;
jvp_string_free(string);
jv r = {JVP_FLAGS_STRING, 0, 0, 0, {&news->refcnt}};
jv r = {JVP_FLAGS_STRING, 0, 0, 0, 0, {&news->refcnt}};
return r;
}
}
Expand Down Expand Up @@ -1521,7 +1527,7 @@ static jv jvp_object_new(int size) {
for (int i=0; i<size*2; i++) {
hashbuckets[i] = -1;
}
jv r = {JVP_FLAGS_OBJECT, 0, 0, size, {&obj->refcnt}};
jv r = {JVP_FLAGS_OBJECT, 0, 0, 0, size, {&obj->refcnt}};
return r;
}

Expand Down Expand Up @@ -1636,8 +1642,8 @@ static jv jvp_object_unshare(jv object) {
struct object_slot* new_slot = jvp_object_get_slot(new_object, i);
*new_slot = *old_slot;
if (jv_get_kind(old_slot->string) != JV_KIND_NULL) {
new_slot->string = jv_copy(old_slot->string);
new_slot->value = jv_copy(old_slot->value);
new_slot->string = jv_unborrow(old_slot->string);
MaxBrandtner marked this conversation as resolved.
Show resolved Hide resolved
new_slot->value = jv_unborrow(old_slot->value);
}
}

Expand Down Expand Up @@ -1711,8 +1717,7 @@ static int jvp_object_equal(jv o1, jv o2) {
if (jv_get_kind(slot->string) == JV_KIND_NULL) continue;
jv* slot2 = jvp_object_read(o2, slot->string);
if (!slot2) return 0;
// FIXME: do less refcounting here
if (!jv_equal(jv_copy(slot->value), jv_copy(*slot2))) return 0;
if (!jv_equal(jv_borrow(slot->value), jv_borrow(*slot2))) return 0;
MaxBrandtner marked this conversation as resolved.
Show resolved Hide resolved
len1++;
}
return len1 == len2;
Expand Down Expand Up @@ -1772,7 +1777,12 @@ jv jv_object_set(jv object, jv key, jv value) {
// copy/free of object, key, value coalesced
jv* slot = jvp_object_write(&object, key);
jv_free(*slot);
*slot = value;
if(jv_is_borrowed(value)){
MaxBrandtner marked this conversation as resolved.
Show resolved Hide resolved
*slot = jv_unborrow(value);
}else{
*slot = value;
}
if(jv_is_borrowed(object)) return jv_unborrow(object);
return object;
}

Expand All @@ -1781,6 +1791,7 @@ jv jv_object_delete(jv object, jv key) {
assert(JVP_HAS_KIND(key, JV_KIND_STRING));
jvp_object_delete(&object, key);
jv_free(key);
if(jv_is_borrowed(object)) return jv_unborrow(object);
MaxBrandtner marked this conversation as resolved.
Show resolved Hide resolved
return object;
}

Expand Down Expand Up @@ -1862,14 +1873,14 @@ jv jv_object_iter_value(jv object, int iter) {
/*
* Memory management
*/
jv jv_copy(jv j) {
if (JVP_IS_ALLOCATED(j)) {
jv jv__copy(jv j) {
if (JVP_IS_ALLOCATED(j) && !j.borrowed) {
jvp_refcnt_inc(j.u.ptr);
}
return j;
}

void jv_free(jv j) {
void jv__free(jv j) {
switch(JVP_KIND(j)) {
case JV_KIND_ARRAY:
jvp_array_free(j);
Expand Down
16 changes: 13 additions & 3 deletions src/jv.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ struct jv_refcnt;
Really. Do not play with them. */
typedef struct {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You cannot use bitfields in a public struct; their order is not defined; the compiler can order however it wants.

Copy link
Member

@emanuele6 emanuele6 Feb 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If libjq and the program that uses libjq are compiled with different compilers there are no guarantees of what bits will be used, so this is likely to not work.
Even if they are compiled with the same compiler, it may still not work because the order of bit-fields needs to only be consistent in individual compilation units; the compiler may feel like ordering them differently for optimisation reasons.

At most you can have a public struct with bit-fields, and required that the user does not touch those fields manually, so they are only used by libjq functions, but that is an error-prone, and confusing way to define the layout of a public struct in my opinion. (Also you would have to make sure that all the functions that use those bit-fields are in the same compilation unit.)
And in this case it is not possible since there are static functions defined in jv.h that access bit-fields directly without calling to libjq.

Bit-fields don't just subdivide the bits of a struct in the specified order, like regular fields do for bytes: they are different, and their order is not enforced or stable. Only really suitable for internal structs. They are not a very useful C feature because of this.
The only way to do something similar to this with a public struct is with manual masking, maybe facilitated by macros.

Copy link
Member

@emanuele6 emanuele6 Feb 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed that all compilers I tried were suspiciously all using the same layout for bit-fields.
I just learned that apparently the SysV ABI (3.1.2; for amd64) specifies how bit-fields must be layed out in memory for public structs; they need to be layed out in order like for regular fields dividing the byte from most significant bit to least significant bit. Interesting!
Still you probably should avoid using bit fields in a public struct since standard C does not support them, and libjq does not only target amd64 on UNIX-like systems (I didn't bother to check whether other standard ABIs support bit-fields in structs).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You cannot use bitfields in a public struct; their order is not defined; the compiler can order however it wants.

C99 says, § 6.7.2.1, constraint 10:

  1. An implementation may allocate any addressable storage unit large enough to hold a bit-
    field. If enough space remains, a bit-field that immediately follows another bit-field in a
    structure shall be packed into adjacent bits of the same unit. If insufficient space remains,
    whether a bit-field that does not fit is put into the next unit or overlaps adjacent units is
    implementation-defined. The order of allocation of bit-fields within a unit (high-order to
    low-order or low-order to high-order) is implementation-defined. The alignment of the
    addressable storage unit is unspecified.

So, yes, we can't have bit-fields in public structures when either those bits are meant to be accessed by the application or by inline functions or macros exported by the header. But, as you note, platforms often add their own specifications around some of these issues in their ABIs. SysV, for example, does, and so does Windows:

Bit fields defined as int are treated as signed. A Microsoft extension to the ANSI C standard allows char and long types (both signed and unsigned) for bit fields. Unnamed bit fields with base type long, short, or char (signed or unsigned) force alignment to a boundary appropriate to the base type.

Bit fields are allocated within an integer from least-significant to most-significant bit.

The Windows ABI convention packs bit fields into single storage integers, and doesn't straddle storage units.

Which takes care of all concerns here for Windows. I believe we'll find the same is true for Solaris/Illumos, Linux, and OS X. I believe all of those follow SVID to some degree in their ABIs, except maybe OS X.

That said, I've no problem with using a bit of pad_ w/o using bit fields just in case.

Copy link
Member

@emanuele6 emanuele6 Feb 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, but what about non-amd64 architectures?

Anyway, those bit-fields should be replaced with masking with get/set macros, yes.
Also, I suggest using the most signficant bit for the boolean flag and the least significant bits for the integer instead of the other way around.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's get the rest of this PR in good enough shape to rebase-and-merge and then we can resolve this comment.

Copy link
Author

@MaxBrandtner MaxBrandtner Feb 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If bitfields are an issue we could use mod operations instead and perhaps rename the pad_ var to bitfield

I would like to avoid that solution though as a.borrowed is more intuitive than a.bitfield % 2

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just saw that mongoose uses bitfields as well. Seeing as mongoose is quite widely used and they showcase it being used on different architectures, I do not think bitfields will be an issue.

unsigned char kind_flags;
unsigned char pad_;
unsigned char pad_:7;
unsigned char borrowed:1;
unsigned short offset; /* array offsets */
int size;
union {
Expand All @@ -51,8 +52,15 @@ jv_kind jv_get_kind(jv);
const char* jv_kind_name(jv_kind);
static int jv_is_valid(jv x) { return jv_get_kind(x) != JV_KIND_INVALID; }

jv jv_copy(jv);
void jv_free(jv);
void jv__free(jv);
jv jv__copy(jv);

static inline jv jv_copy(jv a){if(a.borrowed) return a; return jv__copy(a);}
static inline void jv_free(jv a){if(!a.borrowed) jv__free(a);}
Comment on lines +58 to +59
Copy link
Member

@emanuele6 emanuele6 Feb 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Making those functions static wrechs the api, now the jv_copy and jv_free symbols don't exist anymore.
Also likely to be broken since a static function imported into different compilation units is accessing a bitfield without calling libjq to let it do it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Making these static inline was my suggestion. We can still export the original jv_copy()/jv_free() from libjq using a bit of pre-processor conditionals.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We still need to address this. When this PR is good shape I'll propose a patch.


static inline jv jv_borrow(jv a){a.borrowed = 1; return a;}
static inline jv jv_unborrow(jv a){a.borrowed = 0; return jv_copy(a);}
static inline jv jv_return(jv a){if(a.borrowed) return jv_unborrow(a); return a;}

int jv_get_refcnt(jv);

Expand All @@ -70,6 +78,8 @@ jv jv_true(void);
jv jv_false(void);
jv jv_bool(int);

static inline int jv_is_borrowed(jv a){return a.borrowed;}

jv jv_number(double);
jv jv_number_with_literal(const char*);
double jv_number_value(jv);
Expand Down
6 changes: 6 additions & 0 deletions src/jv_aux.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,13 +151,16 @@ jv jv_get(jv t, jv k) {
jv_free(t);
jv_free(k);
}

if(jv_is_borrowed(v)) return jv_unborrow(v);
MaxBrandtner marked this conversation as resolved.
Show resolved Hide resolved
return v;
}

jv jv_set(jv t, jv k, jv v) {
if (!jv_is_valid(v)) {
jv_free(t);
jv_free(k);
if(jv_is_borrowed(v)) return jv_unborrow(v);
MaxBrandtner marked this conversation as resolved.
Show resolved Hide resolved
return v;
}
int isnull = jv_get_kind(t) == JV_KIND_NULL;
Expand Down Expand Up @@ -233,6 +236,7 @@ jv jv_set(jv t, jv k, jv v) {
jv_free(v);
t = err;
emanuele6 marked this conversation as resolved.
Show resolved Hide resolved
}
if(jv_is_borrowed(t)) return jv_unborrow(t);
return t;
}

Expand Down Expand Up @@ -385,11 +389,13 @@ jv jv_setpath(jv root, jv path, jv value) {
if (!jv_is_valid(root)){
jv_free(value);
jv_free(path);
if(jv_is_borrowed(root)) return jv_unborrow(root);
return root;
}
if (jv_array_length(jv_copy(path)) == 0) {
jv_free(path);
jv_free(root);
if(jv_is_borrowed(value)) return jv_unborrow(value);
return value;
}
jv pathcurr = jv_array_get(jv_copy(path), 0);
Expand Down