diff --git a/src/tarina/_string_c.pyx b/src/tarina/_string_c.pyx index 07501d7..cc978fd 100644 --- a/src/tarina/_string_c.pyx +++ b/src/tarina/_string_c.pyx @@ -9,6 +9,7 @@ from cpython.unicode cimport ( PyUnicode_Join, PyUnicode_Split, PyUnicode_Substring, + PyUnicode_Concat, ) @@ -28,6 +29,8 @@ cdef dict QUOTES = {'"': '"', "'": "'"} cdef unicode CRLF = "\n\r" cpdef inline list split(str text, str separator, bint crlf=True): + if crlf: + separator = PyUnicode_Concat(separator, CRLF) text = str_strip(text, BOTHSTRIP, separator) cdef: bint escape = 0 @@ -55,7 +58,7 @@ cpdef inline list split(str text, str separator, bint crlf=True): PyList_Append(result, ch) if escape: result[PyList_GET_SIZE(result)-1] = ch - elif str_contains(separator, ch) or (crlf and str_contains(CRLF, ch)): + elif str_contains(separator, ch): if quotation: PyList_Append(quoted_sep_index, PyList_GET_SIZE(result) + 1) PyList_Append(result, ch) @@ -79,6 +82,8 @@ cpdef inline list split(str text, str separator, bint crlf=True): cpdef inline tuple split_once(str text, str separator, bint crlf=True): + if crlf: + separator = PyUnicode_Concat(separator, CRLF) text = str_strip(text, LEFTSTRIP, separator) cdef: Py_ssize_t index = 0 @@ -93,7 +98,7 @@ cpdef inline tuple split_once(str text, str separator, bint crlf=True): while index < length: ch = PyUnicode_READ_CHAR(text, index) index += 1 - if str_contains(separator, ch) or (crlf and str_contains(CRLF, ch)): + if str_contains(separator, ch): if quotation == 0: sep = 1 continue diff --git a/src/tarina/_string_py.py b/src/tarina/_string_py.py index c1fb182..59e1ee3 100644 --- a/src/tarina/_string_py.py +++ b/src/tarina/_string_py.py @@ -15,6 +15,8 @@ def split_once(text: str, separator: str, crlf: bool = True): Returns: Tuple[str, str]: 切割后的字符串, 可能含有空格 """ + if crlf: + separator += CRLF index, out_text, quotation, escape, sep = 0, "", "", False, False text = text.lstrip() first_quoted_sep_index = -1 @@ -22,7 +24,7 @@ def split_once(text: str, separator: str, crlf: bool = True): tlen = len(text) for char in text: index += 1 - if char in separator or (crlf and char in CRLF): + if char in separator: if not quotation: sep = True continue @@ -63,6 +65,8 @@ def split(text: str, separator: str, crlf: bool = True): Returns: List[str]: 切割后的字符串, 可能含有空格 """ + if crlf: + separator += CRLF text = text.strip(separator) result, quotation, escape = [], "", False quoted_sep_index = [] @@ -81,7 +85,7 @@ def split(text: str, separator: str, crlf: bool = True): result.append(char) if escape: result[-1] = char - elif char in separator or (crlf and char in CRLF): + elif char in separator: if quotation: quoted_sep_index.append(len(result) + 1) result.append(char)