aboutsummaryrefslogtreecommitdiff
path: root/pypy
diff options
context:
space:
mode:
authorCF Bolz-Tereick <cfbolz@gmx.de>2024-03-02 11:41:50 +0100
committerCF Bolz-Tereick <cfbolz@gmx.de>2024-03-02 11:41:50 +0100
commit14c41dca28db62c88f4e6fb13467fcbf87913d34 (patch)
tree40f71d62b91313816e9952932d6b13ab256bf6dc /pypy
parentmake expandtabs do the right thing about unicode characters (diff)
downloadpypy-14c41dca28db62c88f4e6fb13467fcbf87913d34.tar.gz
pypy-14c41dca28db62c88f4e6fb13467fcbf87913d34.tar.bz2
pypy-14c41dca28db62c88f4e6fb13467fcbf87913d34.zip
use a string builder for computing the result
(so far expandtabs is quadratic?!)
Diffstat (limited to 'pypy')
-rw-r--r--pypy/objspace/std/test/test_unicodeobject.py2
-rw-r--r--pypy/objspace/std/unicodeobject.py14
2 files changed, 9 insertions, 7 deletions
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
index f6c9912197..cb5ae00ddf 100644
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -761,8 +761,6 @@ class AppTestUnicodeString:
def test_expandtabs_overflows_gracefully(self):
import sys
- if sys.maxint > (1 << 32):
- skip("Wrong platform")
raises((OverflowError, MemoryError), u't\tt\t'.expandtabs, sys.maxint)
def test_expandtabs_0(self):
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
index 7653ed1f61..69662b230f 100644
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -469,16 +469,20 @@ class W_UnicodeObject(W_Root):
ovfcheck(len(splitted) * tabsize)
except OverflowError:
raise oefmt(space.w_OverflowError, "new string is too long")
- expanded = oldtoken = splitted.pop(0)
- newlen = self._len() - len(splitted)
+ newlen = self._len() - len(splitted) + 1
+ builder = StringBuilder(len(value))
+ oldtoken = splitted[0]
+ builder.append(oldtoken)
- for token in splitted:
+ for index in range(1, len(splitted)):
+ token = splitted[index]
dist = self._tabindent(oldtoken, tabsize)
- expanded += ' ' * dist + token
+ builder.append_multiple_char(' ', dist)
+ builder.append(token)
newlen += dist
oldtoken = token
- return W_UnicodeObject(expanded, newlen)
+ return W_UnicodeObject(builder.build(), newlen)
def _tabindent(self, token, tabsize):
if tabsize <= 0: