-
Notifications
You must be signed in to change notification settings - Fork 3
/
StringOps.bas
484 lines (378 loc) · 17 KB
/
StringOps.bas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
'-----------------------------------------------------------------------------------------------------------------------
' String related routines
' Copyright (c) 2024 Samuel Gomes
'-----------------------------------------------------------------------------------------------------------------------
$INCLUDEONCE
'$INCLUDE:'StringOps.bi'
'-----------------------------------------------------------------------------------------------------------------------
' Test code for debugging the library
'-----------------------------------------------------------------------------------------------------------------------
'$DEBUG
'$CONSOLE:ONLY
'PRINT STRING_NULL
'PRINT String_ToCStr("abcd") + "<END", LEN(String_ToCStr("abcd"))
'PRINT String_Filter("Hello, -234.234world!", "+-01234567890.", FALSE) + "<END"
'PRINT String_Filter("Hello,-234.234 world!", "+-01234567890.", TRUE) + "<END"
'PRINT String_FormatBoolean(TRUE, 20)
'PRINT String_FormatBoolean(FALSE, 20)
'PRINT String_FormatLong(&HBE, "%.4X")
'PRINT String_FormatInteger64(&HBE, "%.10llu")
'PRINT String_FormatSingle(25.78, "%f")
'PRINT String_FormatDouble(18.4455, "%f")
'PRINT String_FormatOffset(&HDEADBEEFBEEFDEAD, "%p")
'PRINT String_FormatString("hello", "----%s----")
'PRINT String_FormatLong(0, "%.4X")
'PRINT String_RemoveEnclosingPair("(hello + (2 * 5) - world)", "()")
'PRINT STRING_QUOTE + "Hello, world" + STRING_QUOTE + STRING_LF + "Greetings!"
'DIM AS STRING myStr1, myStr2
'myStr1 = "Toolbox64"
'myStr2 = "Shadow Warrior"
'PRINT String_Reverse(myStr1)
'String_Reverse myStr2
'PRINT myStr2
'PRINT myStr1
'PRINT String_IsAlphaNumeric(ASC("9"))
'PRINT String_IsAlphabetic(ASC("x"))
'PRINT String_IsLowerCase(ASC("x"))
'PRINT String_IsUpperCase(ASC("X"))
'PRINT String_IsDigit(ASC("1"))
'PRINT String_IsHexadecimalDigit(ASC("f"))
'PRINT String_IsControlCharacter(13)
'PRINT String_IsGraphicalCharacter(126)
'PRINT String_IsWhiteSpace(9)
'PRINT String_IsBlank(9)
'PRINT String_IsPrintable(32)
'PRINT String_IsPunctuation(ASC("!"))
'DIM r AS _UNSIGNED _OFFSET: r = String_RegExCompile("[Hh]ello [Ww]orld\s*[!]?")
'DIM AS LONG l, n: n = String_RegExSearchCompiled(r, "ahem.. 'hello world !' ..", 1, l)
'IF n > 0 THEN
' PRINT "Match at"; n; ","; l; "chars long"
'END IF
'String_RegExFree r
'n = 1
'DO
' n = String_RegExSearch("b[aeiou]b", "bub bob bib bab", n, l)
' IF n > 0 THEN
' PRINT "Match at"; n; ","; l; "chars long"
' n = n + l
' END IF
'LOOP UNTIL n = 0
'myStr1 = String_GetToken("x = sin(0.9) + cos(0.5)", " ")
'DO
' PRINT myStr1
' myStr1 = String_GetToken(STRING_EMPTY, " ")
'LOOP UNTIL LEN(myStr1) = NULL
'END
'-----------------------------------------------------------------------------------------------------------------------
' Returns a BASIC string (bstring) from a NULL terminated C string (cstring)
FUNCTION String_ToBStr$ (s AS STRING)
$CHECKING:OFF
DIM zeroPos AS LONG: zeroPos = INSTR(s, CHR$(NULL))
IF zeroPos > NULL THEN String_ToBStr = LEFT$(s, zeroPos - 1) ELSE String_ToBStr = s
$CHECKING:ON
END FUNCTION
' Just a convenience function for use when calling external libraries
FUNCTION String_ToCStr$ (s AS STRING)
$CHECKING:OFF
String_ToCStr = s + CHR$(NULL)
$CHECKING:ON
END FUNCTION
' Cleans text of control characters and makes it printable
FUNCTION String_MakePrintable$ (text AS STRING)
DIM buffer AS STRING: buffer = SPACE$(LEN(text))
$CHECKING:OFF
DIM i AS _UNSIGNED LONG: WHILE i < LEN(text)
DIM c AS _UNSIGNED _BYTE: c = PeekStringByte(text, i)
IF c > KEY_SPACE THEN PokeStringByte buffer, i, c
i = i + 1
WEND
$CHECKING:ON
String_MakePrintable = buffer
END FUNCTION
' Returns true if text has a certain enclosing pair like 'hello'
FUNCTION String_HasEnclosingPair%% (text AS STRING, pair AS STRING)
$CHECKING:OFF
IF LEN(text) > 1 AND LEN(pair) > 1 THEN
String_HasEnclosingPair = (PeekStringByte(pair, 0) = PeekStringByte(text, 0) AND PeekStringByte(pair, 1) = PeekStringByte(text, LEN(text) - 1))
END IF
$CHECKING:ON
END FUNCTION
' Removes a string's enclosing pair if it is found. So, 'hello world' can be returned as just hello world
' pair - is the enclosing pair. E.g. "''", "()", "[]" etc.
FUNCTION String_RemoveEnclosingPair$ (text AS STRING, pair AS STRING)
IF String_HasEnclosingPair(text, pair) THEN
String_RemoveEnclosingPair = MID$(text, 2, LEN(text) - 2)
ELSE
String_RemoveEnclosingPair = text
END IF
END FUNCTION
' Tokenizes a string to a dynamic string array
' text - is the input string
' delims - is a list of delimiters (multiple delimiters can be specified)
' quoteChars - is the string containing the opening and closing "quote" characters. Should be 2 chars only or nothing
' returnDelims - if True, then the routine will also return the delimiters in the correct position in the tokens array
' tokens() - is the array that will hold the tokens
' Returns: the number of tokens parsed
FUNCTION String_Tokenize& (text AS STRING, delims AS STRING, quoteChars AS STRING, returnDelims AS _BYTE, tokens() AS STRING)
IF LEN(text) = NULL THEN EXIT FUNCTION ' nothing to be done
DIM arrIdx AS LONG: arrIdx = LBOUND(tokens) ' we'll always start from the array lower bound - whatever it is
DIM insideQuote AS _BYTE ' flag to track if currently inside a quote
DIM token AS STRING ' holds a token until it is ready to be added to the array
DIM char AS STRING * 1 ' this is a single char from text we are iterating through
DIM AS LONG i, count
' Iterate through the characters in the text string
FOR i = 1 TO LEN(text)
char = CHR$(ASC(text, i))
IF insideQuote THEN
IF char = RIGHT$(quoteChars, 1) THEN
' Closing quote char encountered, resume delimiting
insideQuote = FALSE
GOSUB add_token ' add the token to the array
IF returnDelims THEN GOSUB add_delim ' add the closing quote char as delimiter if required
ELSE
token = token + char ' add the character to the current token
END IF
ELSE
IF char = LEFT$(quoteChars, 1) THEN
' Opening quote char encountered, temporarily stop delimiting
insideQuote = TRUE
GOSUB add_token ' add the token to the array
IF returnDelims THEN GOSUB add_delim ' add the opening quote char as delimiter if required
ELSEIF INSTR(delims, char) = NULL THEN
token = token + char ' add the character to the current token
ELSE
GOSUB add_token ' found a delimiter, add the token to the array
IF returnDelims THEN GOSUB add_delim ' found a delimiter, add it to the array if required
END IF
END IF
NEXT
GOSUB add_token ' add the final token if there is any
IF count > NULL THEN REDIM _PRESERVE tokens(LBOUND(tokens) TO arrIdx - 1) AS STRING ' resize the array to the exact size
String_Tokenize = count
EXIT FUNCTION
' Add the token to the array if there is any
add_token:
IF LEN(token) > NULL THEN
tokens(arrIdx) = token ' add the token to the token array
token = STRING_EMPTY ' clear the current token
GOSUB increment_counters_and_resize_array
END IF
RETURN
' Add delimiter to array if required
add_delim:
tokens(arrIdx) = char ' add delimiter to array
GOSUB increment_counters_and_resize_array
RETURN
' Increment the count and array index and resize the array if needed
increment_counters_and_resize_array:
count = count + 1 ' increment the token count
arrIdx = arrIdx + 1 ' move to next position
IF arrIdx > UBOUND(tokens) THEN REDIM _PRESERVE tokens(LBOUND(tokens) TO UBOUND(tokens) + 512) AS STRING ' resize in 512 chunks
RETURN
END FUNCTION
' Extracts tokens from a string. A token is a word that is surrounded
' by separators, such as spaces or commas. Tokens are extracted and
' analyzed when parsing sentences or commands. To use the String_GetNextToken
' function, pass the string to be parsed on the first call, then pass
' a null string on subsequent calls until the function returns a null
' to indicate that the entire string has been parsed.
FUNCTION String_GetToken$ (sourceString AS STRING, delimiters AS STRING)
STATIC startPosition AS _UNSIGNED LONG, originalString AS STRING
' If it's the first call, make a copy of the string
IF LEN(sourceString) <> NULL THEN
startPosition = 1
originalString = sourceString
END IF
DIM currentPosition AS _UNSIGNED LONG: currentPosition = startPosition
' Find the start of the next token (character that isn't a delimiter)
WHILE currentPosition <= LEN(originalString) AND INSTR(delimiters, CHR$(ASC(originalString, currentPosition))) <> NULL
currentPosition = currentPosition + 1
WEND
' Check if the token start is found
IF currentPosition > LEN(originalString) THEN
String_GetToken = STRING_EMPTY ' no more tokens, return an empty string
EXIT FUNCTION
END IF
' Find the end of the token
DIM tokenStart AS _UNSIGNED LONG: tokenStart = currentPosition
WHILE tokenStart <= LEN(originalString) AND INSTR(delimiters, CHR$(ASC(originalString, tokenStart))) = NULL
tokenStart = tokenStart + 1
WEND
DIM tokenEnd AS _UNSIGNED LONG: tokenEnd = tokenStart
String_GetToken = MID$(originalString, currentPosition, tokenEnd - currentPosition)
' Set the starting point for the search for the next token
startPosition = tokenEnd
END FUNCTION
' Takes unwanted characters out of a string by comparing them with a filter string containing only acceptable characters
FUNCTION String_Filter$ (txtToFilter AS STRING, filter AS STRING, inclusiveFilter AS _BYTE)
DIM result AS STRING: result = SPACE$(LEN(txtToFilter)) ' pre-allocate memory for the result string
$CHECKING:OFF
DIM i AS _UNSIGNED LONG: WHILE i < LEN(txtToFilter)
DIM resultIndex AS _UNSIGNED LONG ' index to track the result string length
DIM c AS _UNSIGNED _BYTE: c = PeekStringByte(txtToFilter, i)
IF inclusiveFilter THEN
' Inclusive filtering (keep characters in the filter).
IF INSTR(filter, CHR$(c)) <> NULL THEN
PokeStringByte result, resultIndex, c
resultIndex = resultIndex + 1
END IF
ELSE
' Exclusive filtering (exclude characters in the filter).
IF INSTR(filter, CHR$(c)) = NULL THEN
PokeStringByte result, resultIndex, c
resultIndex = resultIndex + 1
END IF
END IF
i = i + 1
WEND
$CHECKING:ON
' Trim the extra spaces from the result string.
String_Filter = LEFT$(result, resultIndex)
END FUNCTION
' Replaces occurences of substringToFind in originalString with replacement
FUNCTION String_Replace$ (originalString AS STRING, substringToFind AS STRING, replacement AS STRING, startPosition AS _UNSIGNED LONG, replaceCount AS LONG)
' Ensure a valid starting position
DIM position AS _UNSIGNED LONG: position = startPosition
IF position < 1 THEN position = 1
' Initialize the result string
DIM resultString AS STRING: resultString = originalString
' Loop until the specified count is reached or all occurrences are replaced
DO
DIM occurrencesReplaced AS _UNSIGNED LONG
DIM findPosition AS _UNSIGNED LONG: findPosition = INSTR(position, resultString, substringToFind)
' Check if the specified count is reached or no more occurrences are found
IF findPosition = NULL OR (replaceCount > NULL AND occurrencesReplaced >= replaceCount) THEN EXIT DO
' Replace the found occurrence with the new substring
resultString = LEFT$(resultString, findPosition - 1) + replacement + MID$(resultString, findPosition + LEN(substringToFind))
' Move the starting position and increment the replace count
position = findPosition + LEN(replacement)
occurrencesReplaced = occurrencesReplaced + 1
LOOP
String_Replace = resultString
END FUNCTION
' Joins a bunch of strings in sourceArray using delimiter
FUNCTION String_Join$ (sourceArray() AS STRING, delimiter AS STRING)
DIM LB AS _UNSIGNED LONG: LB = LBOUND(sourceArray)
DIM UB AS _UNSIGNED LONG: UB = UBOUND(sourceArray)
' Ensure the array is not empty.
IF UB < LB THEN EXIT FUNCTION ' does this really happen?
' Initialize the result string with the first element.
DIM resultString AS STRING: resultString = sourceArray(LB)
' Concatenate the remaining elements with the delimiter.
LB = LB + 1
DIM i AS _UNSIGNED LONG: FOR i = LB TO UB
resultString = resultString + delimiter + sourceArray(i)
NEXT
String_Join = resultString
END FUNCTION
''' @brief Sorts a string array
''' @param strArr The string array to sort
''' @param l The lower index
''' @param u The upper index
SUB String_SortArray (strArr() AS STRING, l AS _UNSIGNED LONG, u AS _UNSIGNED LONG)
DIM i AS _UNSIGNED LONG: i = l
DIM j AS _UNSIGNED LONG: j = u
DIM pivot AS STRING: pivot = strArr((l + u) \ 2)
WHILE i <= j
WHILE _STRCMP(strArr(i), pivot) < 0
i = i + 1
WEND
WHILE _STRCMP(strArr(j), pivot) > 0
j = j - 1
WEND
IF i <= j THEN
SWAP strArr(i), strArr(j)
i = i + 1
j = j - 1
END IF
WEND
' Recursively sort the partitions
IF l < j THEN String_SortArray strArr(), l, j
IF i < u THEN String_SortArray strArr(), i, u
END SUB
' Reverses and returns the characters of a string
FUNCTION String_Reverse$ (s AS STRING)
$CHECKING:OFF
DIM tmp AS STRING: tmp = s
ReverseMemory _OFFSET(tmp), LEN(tmp)
String_Reverse = tmp
$CHECKING:ON
END FUNCTION
' Reverses the characters of a string in-place
SUB String_Reverse (s AS STRING)
$CHECKING:OFF
ReverseMemory _OFFSET(s), LEN(s)
$CHECKING:ON
END SUB
' Formats a string using C's printf() format specifier
FUNCTION String_FormatString$ (s AS STRING, fmt AS STRING)
$CHECKING:OFF
String_FormatString = __String_FormatString(String_ToCStr(s), String_ToCStr(fmt))
$CHECKING:ON
END FUNCTION
' Formats a long using C's printf() format specifier
FUNCTION String_FormatLong$ (n AS LONG, fmt AS STRING)
$CHECKING:OFF
String_FormatLong = __String_FormatLong(n, String_ToCStr(fmt))
$CHECKING:ON
END FUNCTION
' Formats an integer64 using C's printf() format specifier
FUNCTION String_FormatInteger64$ (n AS _INTEGER64, fmt AS STRING)
$CHECKING:OFF
String_FormatInteger64 = __String_FormatInteger64(n, String_ToCStr(fmt))
$CHECKING:ON
END FUNCTION
' Formats a single using C's printf() format specifier
FUNCTION String_FormatSingle$ (n AS SINGLE, fmt AS STRING)
$CHECKING:OFF
String_FormatSingle = __String_FormatSingle(n, String_ToCStr(fmt))
$CHECKING:ON
END FUNCTION
' Formats a double using C's printf() format specifier
FUNCTION String_FormatDouble$ (n AS DOUBLE, fmt AS STRING)
$CHECKING:OFF
String_FormatDouble = __String_FormatDouble(n, String_ToCStr(fmt))
$CHECKING:ON
END FUNCTION
' Formats an offset using C's printf() format specifier
FUNCTION String_FormatOffset$ (n AS _UNSIGNED _OFFSET, fmt AS STRING)
$CHECKING:OFF
String_FormatOffset = __String_FormatOffset(n, String_ToCStr(fmt))
$CHECKING:ON
END FUNCTION
' Compiles a regex for quick usage with different stings via RegExMatchCompiled()
FUNCTION String_RegExCompile~%& (pattern AS STRING)
$CHECKING:OFF
String_RegExCompile = __String_RegExCompile(String_ToCStr(pattern))
$CHECKING:ON
END FUNCTION
' Does a regex search for a string using a compiled pattern
FUNCTION String_RegExSearchCompiled& (pattern AS _UNSIGNED _OFFSET, text AS STRING, startPos AS LONG, matchLength AS LONG)
$CHECKING:OFF
IF startPos > 0 AND startPos <= LEN(text) THEN
DIM i AS LONG: i = __String_RegExSearchCompiled(pattern, String_ToCStr(text), startPos, matchLength)
IF i > -1 THEN String_RegExSearchCompiled = i + startPos ELSE String_RegExSearchCompiled = i
END IF
$CHECKING:ON
END FUNCTION
' Does a regex search for a string using a pattern string
FUNCTION String_RegExSearch& (pattern AS STRING, text AS STRING, startPos AS LONG, matchLength AS LONG)
$CHECKING:OFF
IF startPos > 0 AND startPos <= LEN(text) THEN
DIM i AS LONG: i = __String_RegExSearch(String_ToCStr(pattern), String_ToCStr(text), startPos, matchLength)
IF i > -1 THEN String_RegExSearch = i + startPos ELSE String_RegExSearch = i
END IF
$CHECKING:ON
END FUNCTION
' Checks if `text` is a RegEx match using a compiled pattern
FUNCTION String_RegExMatchCompiled%% (pattern AS _UNSIGNED _OFFSET, text AS STRING)
$CHECKING:OFF
String_RegExMatchCompiled = __String_RegExMatchCompiled(pattern, String_ToCStr(text))
$CHECKING:ON
END FUNCTION
' Checks if `text` is a RegEx match using a pattern string
FUNCTION String_RegExMatch%% (pattern AS STRING, text AS STRING)
$CHECKING:OFF
String_RegExMatch = __String_RegExMatch(String_ToCStr(pattern), String_ToCStr(text))
$CHECKING:ON
END FUNCTION