Skip to content

Commit

Permalink
Update f1_c_gen.py
Browse files Browse the repository at this point in the history
  • Loading branch information
20urc3 authored Jun 28, 2024
1 parent 95a6857 commit 56f0eba
Showing 1 changed file with 7 additions and 14 deletions.
21 changes: 7 additions & 14 deletions grammars/f1_c_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,22 +78,13 @@ def to_bytes(self):
# subnode_count
subnode_count = len(self)
ret += subnode_count.to_bytes(4, byteorder='little', signed=False)
# val_len
val_len = len(self.val)

# Encode the value as UTF-8
val_bytes = self.val.encode('utf-8')
# val_len (now stores the byte length of the UTF-8 encoded string)
val_len = len(val_bytes)
ret += val_len.to_bytes(4, byteorder='little', signed=False)
# val
# Latin-1 is an 8-bit character set. The first 128 characters of its
# set are identical to the US ASCII standard. By encoding the string as
# Latin-1, we can handle all hex characters from \u0000 to \u00ff
# Refs:
# - https://stackoverflow.com/questions/66601743/python3-str-to-bytes-convertation-problem
# - https://kb.iu.edu/d/aepu
val_bytes = bytes(self.val, 'latin-1')
if val_len != len(val_bytes):
print(f'The length of `val` should be {val_len}, but found {len(val_bytes)}.')
print(f'`val` bytes in UTF-8 encoding: {val_bytes}')
print('Please check your grammar file!')
sys.exit(1)
ret += val_bytes

# subnodes
Expand All @@ -103,6 +94,7 @@ def to_bytes(self):
return ret

@staticmethod

def from_bytes(data: bytes):
node = TreeNode()
consumed = 0
Expand Down Expand Up @@ -133,6 +125,7 @@ def from_bytes(data: bytes):

return node, consumed


def __str__(self):
ret = ''
if len(self) == 0:
Expand Down

0 comments on commit 56f0eba

Please sign in to comment.