aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArmin Rigo <arigo@tunes.org>2011-08-01 16:43:47 +0200
committerArmin Rigo <arigo@tunes.org>2011-08-01 16:43:47 +0200
commit72db948fb682f7e9b72b65d16f160e52dbc272f5 (patch)
tree700e4aa5aa36ec1b28f23285c457a66d4834f86e
parentmerge heads (diff)
downloadpypy-72db948fb682f7e9b72b65d16f160e52dbc272f5.tar.gz
pypy-72db948fb682f7e9b72b65d16f160e52dbc272f5.tar.bz2
pypy-72db948fb682f7e9b72b65d16f160e52dbc272f5.zip
Test and fix.
-rw-r--r--pypy/module/_multibytecodec/c_codecs.py12
-rw-r--r--pypy/module/_multibytecodec/test/test_app_incremental.py25
-rw-r--r--pypy/translator/c/src/cjkcodecs/multibytecodec.c5
-rw-r--r--pypy/translator/c/src/cjkcodecs/multibytecodec.h2
4 files changed, 36 insertions, 8 deletions
diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
index d0ab9a532b..2e093cc786 100644
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -190,8 +190,8 @@ pypy_cjk_enc_init = llexternal('pypy_cjk_enc_init',
rffi.SSIZE_T)
pypy_cjk_enc_free = llexternal('pypy_cjk_enc_free', [ENCODEBUF_P],
lltype.Void)
-pypy_cjk_enc_chunk = llexternal('pypy_cjk_enc_chunk', [ENCODEBUF_P],
- rffi.SSIZE_T)
+pypy_cjk_enc_chunk = llexternal('pypy_cjk_enc_chunk',
+ [ENCODEBUF_P, rffi.SSIZE_T], rffi.SSIZE_T)
pypy_cjk_enc_reset = llexternal('pypy_cjk_enc_reset', [ENCODEBUF_P],
rffi.SSIZE_T)
pypy_cjk_enc_outbuf = llexternal('pypy_cjk_enc_outbuf', [ENCODEBUF_P],
@@ -208,6 +208,8 @@ pypy_cjk_enc_replace_on_error = llexternal('pypy_cjk_enc_replace_on_error',
rffi.SSIZE_T)
pypy_cjk_enc_getcodec = llexternal('pypy_cjk_enc_getcodec',
[ENCODEBUF_P], MULTIBYTECODEC_P)
+MBENC_FLUSH = 1
+MBENC_RESET = 2
def encode(codec, unicodedata, errors="strict", errorcb=None, namecb=None):
encodebuf = pypy_cjk_enc_new(codec)
@@ -225,8 +227,12 @@ def encodeex(encodebuf, unicodedata, errors="strict", errorcb=None,
try:
if pypy_cjk_enc_init(encodebuf, inbuf, inleft) < 0:
raise MemoryError
+ if ignore_error == 0:
+ flags = MBENC_FLUSH | MBENC_RESET
+ else:
+ flags = MBENC_RESET
while True:
- r = pypy_cjk_enc_chunk(encodebuf)
+ r = pypy_cjk_enc_chunk(encodebuf, flags)
if r == 0 or r == ignore_error:
break
multibytecodec_encerror(encodebuf, r, errors,
diff --git a/pypy/module/_multibytecodec/test/test_app_incremental.py b/pypy/module/_multibytecodec/test/test_app_incremental.py
index 7fd96eccdd..754a9e1263 100644
--- a/pypy/module/_multibytecodec/test/test_app_incremental.py
+++ b/pypy/module/_multibytecodec/test/test_app_incremental.py
@@ -22,6 +22,15 @@ class AppTestClasses:
return IncrementalHzEncoder
""")
+ cls.w_IncrementalBig5hkscsEncoder = cls.space.appexec([], """():
+ import _codecs_cn
+ from _multibytecodec import MultibyteIncrementalEncoder
+
+ class IncrementalBig5hkscsEncoder(MultibyteIncrementalEncoder):
+ codec = _codecs_cn.getcodec('big5hkscs')
+
+ return IncrementalBig5hkscsEncoder
+ """)
def test_decode_hz(self):
d = self.IncrementalHzDecoder()
@@ -136,3 +145,19 @@ class AppTestClasses:
for i in range(13):
r = e.encode(u"a" * (2**i))
assert r == "a" * (2**i)
+
+ def test_encode_big5hkscs(self):
+ #e = self.IncrementalBig5hkscsEncoder()
+ #r = e.encode(u'\xca', True)
+ #assert r == '\x88f'
+ #r = e.encode(u'\xca', True)
+ #assert r == '\x88f'
+ #raises(UnicodeEncodeError, e.encode, u'\u0304', True)
+ #
+ e = self.IncrementalBig5hkscsEncoder()
+ r = e.encode(u'\xca')
+ assert r == ''
+ r = e.encode(u'\xca')
+ assert r == '\x88f'
+ r = e.encode(u'\u0304')
+ assert r == '\x88b'
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.c b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
index f4d1e416e3..9eb0b71688 100644
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.c
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
@@ -180,11 +180,8 @@ static int expand_encodebuffer(struct pypy_cjk_enc_s *d, Py_ssize_t esize)
return 0;
}
-#define MBENC_RESET MBENC_MAX<<1
-
-Py_ssize_t pypy_cjk_enc_chunk(struct pypy_cjk_enc_s *d)
+Py_ssize_t pypy_cjk_enc_chunk(struct pypy_cjk_enc_s *d, Py_ssize_t flags)
{
- int flags = MBENC_FLUSH | MBENC_RESET; /* XXX always, for now */
while (1)
{
Py_ssize_t r;
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.h b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
index 6d79acda43..f83f2aa034 100644
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.h
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
@@ -117,7 +117,7 @@ struct pypy_cjk_enc_s *pypy_cjk_enc_new(const MultibyteCodec *codec);
Py_ssize_t pypy_cjk_enc_init(struct pypy_cjk_enc_s *d,
Py_UNICODE *inbuf, Py_ssize_t inlen);
void pypy_cjk_enc_free(struct pypy_cjk_enc_s *);
-Py_ssize_t pypy_cjk_enc_chunk(struct pypy_cjk_enc_s *);
+Py_ssize_t pypy_cjk_enc_chunk(struct pypy_cjk_enc_s *, Py_ssize_t);
Py_ssize_t pypy_cjk_enc_reset(struct pypy_cjk_enc_s *);
char *pypy_cjk_enc_outbuf(struct pypy_cjk_enc_s *);
Py_ssize_t pypy_cjk_enc_outlen(struct pypy_cjk_enc_s *);