aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarl Friedrich Bolz-Tereick <cfbolz@gmx.de>2021-03-02 13:23:40 +0100
committerCarl Friedrich Bolz-Tereick <cfbolz@gmx.de>2021-03-02 13:23:40 +0100
commit158a579505440c8945891562eaa1aca973ca94cb (patch)
treead1e260cf0da3440846c08ce681e45da1c0b9f46
parentmake init_flags a init_function (diff)
downloadpypy-158a579505440c8945891562eaa1aca973ca94cb.tar.gz
pypy-158a579505440c8945891562eaa1aca973ca94cb.tar.bz2
pypy-158a579505440c8945891562eaa1aca973ca94cb.zip
fast path for unicode.upper/lower for ascii
-rw-r--r--pypy/objspace/std/test/test_unicodeobject.py9
-rw-r--r--pypy/objspace/std/unicodeobject.py26
2 files changed, 29 insertions, 6 deletions
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
index 6b1c7315da..e8763dc496 100644
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -207,6 +207,15 @@ class TestUnicodeObject:
for end in range(start, len(u)):
assert w_u._unicode_sliced_constant_index_jit(space, start, end)._utf8 == u[start: end].encode("utf-8")
+ def test_lower_upper_ascii(self):
+ from pypy.module.unicodedata.interp_ucd import unicodedb
+ # check that ascii chars tolower/toupper still behave sensibly in the
+ # unicodedb - unlikely to ever change, but well
+ for ch in range(128):
+ unilower, = unicodedb.tolower_full(ch)
+ assert chr(unilower) == chr(ch).lower()
+ uniupper, = unicodedb.toupper_full(ch)
+ assert chr(uniupper) == chr(ch).upper()
class AppTestUnicodeStringStdOnly:
def test_compares(self):
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
index 4fa1a98437..0be4a9e55c 100644
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -503,11 +503,18 @@ class W_UnicodeObject(W_Root):
return tformat.formatter_field_name_split()
def descr_lower(self, space):
- builder = rutf8.Utf8StringBuilder(len(self._utf8))
- for ch in rutf8.Utf8StringIterator(self._utf8):
+ if self.is_ascii():
+ return space.newutf8(self._utf8.lower(), len(self._utf8))
+ return self._descr_lower(self._utf8)
+
+ @staticmethod
+ @jit.elidable
+ def _descr_lower(utf8):
+ builder = rutf8.Utf8StringBuilder(len(utf8))
+ for ch in rutf8.Utf8StringIterator(utf8):
lower = unicodedb.tolower(ch)
builder.append_code(lower)
- return self.from_utf8builder(builder)
+ return W_UnicodeObject.from_utf8builder(builder)
def descr_isdecimal(self, space):
return self._is_generic(space, '_isdecimal')
@@ -650,11 +657,18 @@ class W_UnicodeObject(W_Root):
return space.newlist(strs_w)
def descr_upper(self, space):
- builder = rutf8.Utf8StringBuilder(len(self._utf8))
- for ch in rutf8.Utf8StringIterator(self._utf8):
+ if self.is_ascii():
+ return space.newutf8(self._utf8.upper(), len(self._utf8))
+ return self._descr_upper(self._utf8)
+
+ @staticmethod
+ @jit.elidable
+ def _descr_upper(utf8):
+ builder = rutf8.Utf8StringBuilder(len(utf8))
+ for ch in rutf8.Utf8StringIterator(utf8):
ch = unicodedb.toupper(ch)
builder.append_code(ch)
- return self.from_utf8builder(builder)
+ return W_UnicodeObject.from_utf8builder(builder)
@unwrap_spec(width=int)
def descr_zfill(self, space, width):