diff options
author | CF Bolz-Tereick <cfbolz@gmx.de> | 2024-03-02 11:41:50 +0100 |
---|---|---|
committer | CF Bolz-Tereick <cfbolz@gmx.de> | 2024-03-02 11:41:50 +0100 |
commit | 14c41dca28db62c88f4e6fb13467fcbf87913d34 (patch) | |
tree | 40f71d62b91313816e9952932d6b13ab256bf6dc /pypy | |
parent | make expandtabs do the right thing about unicode characters (diff) | |
download | pypy-14c41dca28db62c88f4e6fb13467fcbf87913d34.tar.gz pypy-14c41dca28db62c88f4e6fb13467fcbf87913d34.tar.bz2 pypy-14c41dca28db62c88f4e6fb13467fcbf87913d34.zip |
use a string builder for computing the result
(so far expandtabs is quadratic?!)
Diffstat (limited to 'pypy')
-rw-r--r-- | pypy/objspace/std/test/test_unicodeobject.py | 2 | ||||
-rw-r--r-- | pypy/objspace/std/unicodeobject.py | 14 |
2 files changed, 9 insertions, 7 deletions
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py index f6c9912197..cb5ae00ddf 100644 --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -761,8 +761,6 @@ class AppTestUnicodeString: def test_expandtabs_overflows_gracefully(self): import sys - if sys.maxint > (1 << 32): - skip("Wrong platform") raises((OverflowError, MemoryError), u't\tt\t'.expandtabs, sys.maxint) def test_expandtabs_0(self): diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py index 7653ed1f61..69662b230f 100644 --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -469,16 +469,20 @@ class W_UnicodeObject(W_Root): ovfcheck(len(splitted) * tabsize) except OverflowError: raise oefmt(space.w_OverflowError, "new string is too long") - expanded = oldtoken = splitted.pop(0) - newlen = self._len() - len(splitted) + newlen = self._len() - len(splitted) + 1 + builder = StringBuilder(len(value)) + oldtoken = splitted[0] + builder.append(oldtoken) - for token in splitted: + for index in range(1, len(splitted)): + token = splitted[index] dist = self._tabindent(oldtoken, tabsize) - expanded += ' ' * dist + token + builder.append_multiple_char(' ', dist) + builder.append(token) newlen += dist oldtoken = token - return W_UnicodeObject(expanded, newlen) + return W_UnicodeObject(builder.build(), newlen) def _tabindent(self, token, tabsize): if tabsize <= 0: |