Skip to content

Commit

Permalink
🐛 fix quote crlf
Browse files Browse the repository at this point in the history
  • Loading branch information
RF-Tar-Railt committed Nov 14, 2024
1 parent f6a9886 commit c5eb49f
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 4 deletions.
9 changes: 7 additions & 2 deletions src/tarina/_string_c.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ from cpython.unicode cimport (
PyUnicode_Join,
PyUnicode_Split,
PyUnicode_Substring,
PyUnicode_Concat,
)


Expand All @@ -28,6 +29,8 @@ cdef dict QUOTES = {'"': '"', "'": "'"}
cdef unicode CRLF = "\n\r"

cpdef inline list split(str text, str separator, bint crlf=True):
if crlf:
separator = PyUnicode_Concat(separator, CRLF)
text = str_strip(text, BOTHSTRIP, separator)
cdef:
bint escape = 0
Expand Down Expand Up @@ -55,7 +58,7 @@ cpdef inline list split(str text, str separator, bint crlf=True):
PyList_Append(result, ch)
if escape:
result[PyList_GET_SIZE(result)-1] = ch
elif str_contains(separator, ch) or (crlf and str_contains(CRLF, ch)):
elif str_contains(separator, ch):
if quotation:
PyList_Append(quoted_sep_index, PyList_GET_SIZE(result) + 1)
PyList_Append(result, ch)
Expand All @@ -79,6 +82,8 @@ cpdef inline list split(str text, str separator, bint crlf=True):


cpdef inline tuple split_once(str text, str separator, bint crlf=True):
if crlf:
separator = PyUnicode_Concat(separator, CRLF)
text = str_strip(text, LEFTSTRIP, separator)
cdef:
Py_ssize_t index = 0
Expand All @@ -93,7 +98,7 @@ cpdef inline tuple split_once(str text, str separator, bint crlf=True):
while index < length:
ch = PyUnicode_READ_CHAR(text, index)
index += 1
if str_contains(separator, ch) or (crlf and str_contains(CRLF, ch)):
if str_contains(separator, ch):
if quotation == 0:
sep = 1
continue
Expand Down
8 changes: 6 additions & 2 deletions src/tarina/_string_py.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,16 @@ def split_once(text: str, separator: str, crlf: bool = True):
Returns:
Tuple[str, str]: 切割后的字符串, 可能含有空格
"""
if crlf:
separator += CRLF
index, out_text, quotation, escape, sep = 0, "", "", False, False
text = text.lstrip()
first_quoted_sep_index = -1
last_quote_index = 0
tlen = len(text)
for char in text:
index += 1
if char in separator or (crlf and char in CRLF):
if char in separator:
if not quotation:
sep = True
continue
Expand Down Expand Up @@ -63,6 +65,8 @@ def split(text: str, separator: str, crlf: bool = True):
Returns:
List[str]: 切割后的字符串, 可能含有空格
"""
if crlf:
separator += CRLF
text = text.strip(separator)
result, quotation, escape = [], "", False
quoted_sep_index = []
Expand All @@ -81,7 +85,7 @@ def split(text: str, separator: str, crlf: bool = True):
result.append(char)
if escape:
result[-1] = char
elif char in separator or (crlf and char in CRLF):
elif char in separator:
if quotation:
quoted_sep_index.append(len(result) + 1)
result.append(char)
Expand Down

0 comments on commit c5eb49f

Please sign in to comment.