aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2020-07-10 23:26:06 +0300
committerGitHub <noreply@github.com>2020-07-10 23:26:06 +0300
commit4c8f09d7cef8c7aa07d5b5232b5b64f63819a743 (patch)
tree2d90e13b6dc939f019a6cafc45ea604cbb90f584 /Objects
parentbpo-20179: Convert the _overlapped module to the Argument Clinic (GH-14275) (diff)
downloadcpython-4c8f09d7cef8c7aa07d5b5232b5b64f63819a743.tar.gz
cpython-4c8f09d7cef8c7aa07d5b5232b5b64f63819a743.tar.bz2
cpython-4c8f09d7cef8c7aa07d5b5232b5b64f63819a743.zip
bpo-36346: Make using the legacy Unicode C API optional (GH-21437)
Add compile time option USE_UNICODE_WCHAR_CACHE. Setting it to 0 makes the interpreter not using the wchar_t cache and the legacy Unicode C API.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/unicodeobject.c81
1 files changed, 58 insertions, 23 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 648dd15ca09..2e1045ad3a7 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3150,9 +3150,11 @@ unicode_get_widechar_size(PyObject *unicode)
assert(unicode != NULL);
assert(_PyUnicode_CHECK(unicode));
+#if USE_UNICODE_WCHAR_CACHE
if (_PyUnicode_WSTR(unicode) != NULL) {
return PyUnicode_WSTR_LENGTH(unicode);
}
+#endif /* USE_UNICODE_WCHAR_CACHE */
assert(PyUnicode_IS_READY(unicode));
res = _PyUnicode_LENGTH(unicode);
@@ -3173,16 +3175,21 @@ unicode_get_widechar_size(PyObject *unicode)
static void
unicode_copy_as_widechar(PyObject *unicode, wchar_t *w, Py_ssize_t size)
{
- const wchar_t *wstr;
-
assert(unicode != NULL);
assert(_PyUnicode_CHECK(unicode));
- wstr = _PyUnicode_WSTR(unicode);
+#if USE_UNICODE_WCHAR_CACHE
+ const wchar_t *wstr = _PyUnicode_WSTR(unicode);
if (wstr != NULL) {
memcpy(w, wstr, size * sizeof(wchar_t));
return;
}
+#else /* USE_UNICODE_WCHAR_CACHE */
+ if (PyUnicode_KIND(unicode) == sizeof(wchar_t)) {
+ memcpy(w, PyUnicode_DATA(unicode), size * sizeof(wchar_t));
+ return;
+ }
+#endif /* USE_UNICODE_WCHAR_CACHE */
assert(PyUnicode_IS_READY(unicode));
if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) {
@@ -4378,7 +4385,6 @@ unicode_decode_call_errorhandler_wchar(
Py_ssize_t requiredsize;
Py_ssize_t newpos;
PyObject *inputobj = NULL;
- wchar_t *repwstr;
Py_ssize_t repwlen;
if (*errorHandler == NULL) {
@@ -4424,9 +4430,19 @@ unicode_decode_call_errorhandler_wchar(
goto onError;
}
- repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen);
- if (repwstr == NULL)
+#if USE_UNICODE_WCHAR_CACHE
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
+ repwlen = PyUnicode_GetSize(repunicode);
+ if (repwlen < 0)
+ goto onError;
+_Py_COMP_DIAG_POP
+#else /* USE_UNICODE_WCHAR_CACHE */
+ repwlen = PyUnicode_AsWideChar(repunicode, NULL, 0);
+ if (repwlen < 0)
goto onError;
+ repwlen--;
+#endif /* USE_UNICODE_WCHAR_CACHE */
/* need more space? (at least enough for what we
have+the replacement+the rest of the string (starting
at the new input position), so we won't have to check space
@@ -4446,7 +4462,7 @@ unicode_decode_call_errorhandler_wchar(
goto onError;
}
}
- wcsncpy(*buf + *outpos, repwstr, repwlen);
+ PyUnicode_AsWideChar(repunicode, *buf + *outpos, repwlen);
*outpos += repwlen;
*endinpos = newpos;
*inptr = *input + newpos;
@@ -7748,6 +7764,7 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
/* Create a substring so that we can get the UTF-16 representation
of just the slice under consideration. */
PyObject *substring;
+ int ret = -1;
assert(len > 0);
@@ -7759,11 +7776,22 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
substring = PyUnicode_Substring(unicode, offset, offset+len);
if (substring == NULL)
return -1;
+#if USE_UNICODE_WCHAR_CACHE
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
p = PyUnicode_AsUnicodeAndSize(substring, &size);
if (p == NULL) {
Py_DECREF(substring);
return -1;
}
+_Py_COMP_DIAG_POP
+#else /* USE_UNICODE_WCHAR_CACHE */
+ p = PyUnicode_AsWideCharString(substring, &size);
+ Py_CLEAR(substring);
+ if (p == NULL) {
+ return -1;
+ }
+#endif /* USE_UNICODE_WCHAR_CACHE */
assert(size <= INT_MAX);
/* First get the size of the result */
@@ -7775,16 +7803,15 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
goto error;
/* If we used a default char, then we failed! */
if (pusedDefaultChar && *pusedDefaultChar) {
- Py_DECREF(substring);
- return -2;
+ ret = -2;
+ goto done;
}
if (*outbytes == NULL) {
/* Create string object */
*outbytes = PyBytes_FromStringAndSize(NULL, outsize);
if (*outbytes == NULL) {
- Py_DECREF(substring);
- return -1;
+ goto done;
}
out = PyBytes_AS_STRING(*outbytes);
}
@@ -7793,12 +7820,10 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
const Py_ssize_t n = PyBytes_Size(*outbytes);
if (outsize > PY_SSIZE_T_MAX - n) {
PyErr_NoMemory();
- Py_DECREF(substring);
- return -1;
+ goto done;
}
if (_PyBytes_Resize(outbytes, n + outsize) < 0) {
- Py_DECREF(substring);
- return -1;
+ goto done;
}
out = PyBytes_AS_STRING(*outbytes) + n;
}
@@ -7808,19 +7833,29 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
p, (int)size,
out, outsize,
NULL, pusedDefaultChar);
- Py_CLEAR(substring);
if (outsize <= 0)
goto error;
- if (pusedDefaultChar && *pusedDefaultChar)
- return -2;
- return 0;
+ if (pusedDefaultChar && *pusedDefaultChar) {
+ ret = -2;
+ goto done;
+ }
+ ret = 0;
+
+done:
+#if USE_UNICODE_WCHAR_CACHE
+ Py_DECREF(substring);
+#else /* USE_UNICODE_WCHAR_CACHE */
+ PyMem_Free(p);
+#endif /* USE_UNICODE_WCHAR_CACHE */
+ return ret;
error:
- Py_XDECREF(substring);
- if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION)
- return -2;
+ if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) {
+ ret = -2;
+ goto done;
+ }
PyErr_SetFromWindowsErr(0);
- return -1;
+ goto done;
}
/*