Alien-SVN
view release on metacpan or search on metacpan
src/subversion/subversion/libsvn_subr/utf.c view on Meta::CPAN
/*
* utf.c: UTF-8 conversion routines
*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
* ====================================================================
*/
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <apr_strings.h>
#include <apr_lib.h>
#include <apr_xlate.h>
#include <apr_atomic.h>
#include "svn_hash.h"
#include "svn_string.h"
#include "svn_error.h"
#include "svn_pools.h"
#include "svn_ctype.h"
#include "svn_utf.h"
#include "svn_private_config.h"
#include "win32_xlate.h"
#include "private/svn_utf_private.h"
#include "private/svn_dep_compat.h"
#include "private/svn_string_private.h"
#include "private/svn_mutex.h"
/* Use these static strings to maximize performance on standard conversions.
* Any strings on other locations are still valid, however.
*/
static const char *SVN_UTF_NTOU_XLATE_HANDLE = "svn-utf-ntou-xlate-handle";
static const char *SVN_UTF_UTON_XLATE_HANDLE = "svn-utf-uton-xlate-handle";
static const char *SVN_APR_UTF8_CHARSET = "UTF-8";
static svn_mutex__t *xlate_handle_mutex = NULL;
static svn_boolean_t assume_native_charset_is_utf8 = FALSE;
/* The xlate handle cache is a global hash table with linked lists of xlate
* handles. In multi-threaded environments, a thread "borrows" an xlate
* handle from the cache during a translation and puts it back afterwards.
* This avoids holding a global lock for all translations.
* If there is no handle for a particular key when needed, a new is
* handle is created and put in the cache after use.
* This means that there will be at most N handles open for a key, where N
* is the number of simultanous handles in use for that key. */
typedef struct xlate_handle_node_t {
apr_xlate_t *handle;
/* FALSE if the handle is not valid, since its pool is being
destroyed. */
svn_boolean_t valid;
/* The name of a char encoding or APR_LOCALE_CHARSET. */
const char *frompage, *topage;
struct xlate_handle_node_t *next;
} xlate_handle_node_t;
/* This maps const char * userdata_key strings to xlate_handle_node_t **
handles to the first entry in the linked list of xlate handles. We don't
store the pointer to the list head directly in the hash table, since we
remove/insert entries at the head in the list in the code below, and
we can't use apr_hash_set() in each character translation because that
function allocates memory in each call where the value is non-NULL.
Since these allocations take place in a global pool, this would be a
memory leak. */
static apr_hash_t *xlate_handle_hash = NULL;
/* "1st level cache" to standard conversion maps. We may access these
* using atomic xchange ops, i.e. without further thread synchronization.
* If the respective item is NULL, fallback to hash lookup.
*/
static void * volatile xlat_ntou_static_handle = NULL;
static void * volatile xlat_uton_static_handle = NULL;
/* Clean up the xlate handle cache. */
static apr_status_t
xlate_cleanup(void *arg)
{
/* We set the cache variables to NULL so that translation works in other
cleanup functions, even if it isn't cached then. */
xlate_handle_hash = NULL;
/* ensure no stale objects get accessed */
xlat_ntou_static_handle = NULL;
xlat_uton_static_handle = NULL;
return APR_SUCCESS;
}
/* Set the handle of ARG to NULL. */
static apr_status_t
xlate_handle_node_cleanup(void *arg)
{
xlate_handle_node_t *node = arg;
node->valid = FALSE;
return APR_SUCCESS;
}
void
svn_utf_initialize2(svn_boolean_t assume_native_utf8,
apr_pool_t *pool)
{
if (!xlate_handle_hash)
{
/* We create our own subpool, which we protect with the mutex.
We can't use the pool passed to us by the caller, since we will
use it for xlate handle allocations, possibly in multiple threads,
and pool allocation is not thread-safe. */
apr_pool_t *subpool = svn_pool_create(pool);
svn_mutex__t *mutex;
svn_error_t *err = svn_mutex__init(&mutex, TRUE, subpool);
if (err)
{
svn_error_clear(err);
return;
}
xlate_handle_mutex = mutex;
xlate_handle_hash = apr_hash_make(subpool);
apr_pool_cleanup_register(subpool, NULL, xlate_cleanup,
apr_pool_cleanup_null);
}
if (!assume_native_charset_is_utf8)
assume_native_charset_is_utf8 = assume_native_utf8;
}
/* Return a unique string key based on TOPAGE and FROMPAGE. TOPAGE and
* FROMPAGE can be any valid arguments of the same name to
* apr_xlate_open(). Allocate the returned string in POOL. */
static const char*
get_xlate_key(const char *topage,
const char *frompage,
apr_pool_t *pool)
{
/* In the cases of SVN_APR_LOCALE_CHARSET and SVN_APR_DEFAULT_CHARSET
* topage/frompage is really an int, not a valid string. So generate a
* unique key accordingly. */
if (frompage == SVN_APR_LOCALE_CHARSET)
frompage = "APR_LOCALE_CHARSET";
else if (frompage == SVN_APR_DEFAULT_CHARSET)
frompage = "APR_DEFAULT_CHARSET";
if (topage == SVN_APR_LOCALE_CHARSET)
topage = "APR_LOCALE_CHARSET";
else if (topage == SVN_APR_DEFAULT_CHARSET)
topage = "APR_DEFAULT_CHARSET";
return apr_pstrcat(pool, "svn-utf-", frompage, "to", topage,
"-xlate-handle", (char *)NULL);
}
/* Atomically replace the content in *MEM with NEW_VALUE and return
* the previous content of *MEM. If atomicy cannot be guaranteed,
* *MEM will not be modified and NEW_VALUE is simply returned to
* the caller.
*/
static APR_INLINE void*
atomic_swap(void * volatile * mem, void *new_value)
{
#if APR_HAS_THREADS
#if APR_VERSION_AT_LEAST(1,3,0)
/* Cast is necessary because of APR bug:
https://issues.apache.org/bugzilla/show_bug.cgi?id=50731 */
return apr_atomic_xchgptr((volatile void **)mem, new_value);
#else
/* old APRs don't support atomic swaps. Simply return the
* input to the caller for further proccessing. */
return new_value;
#endif
#else
/* no threads - no sync. necessary */
void *old_value = (void*)*mem;
*mem = new_value;
return old_value;
#endif
}
/* Set *RET to a newly created handle node for converting from FROMPAGE
to TOPAGE, If apr_xlate_open() returns APR_EINVAL or APR_ENOTIMPL, set
(*RET)->handle to NULL. If fail for any other reason, return the error.
Allocate *RET and its xlate handle in POOL. */
static svn_error_t *
xlate_alloc_handle(xlate_handle_node_t **ret,
const char *topage, const char *frompage,
src/subversion/subversion/libsvn_subr/utf.c view on Meta::CPAN
put_xlate_handle_node_internal(xlate_handle_node_t *node,
const char *userdata_key)
{
xlate_handle_node_t **node_p = svn_hash_gets(xlate_handle_hash, userdata_key);
if (node_p == NULL)
{
userdata_key = apr_pstrdup(apr_hash_pool_get(xlate_handle_hash),
userdata_key);
node_p = apr_palloc(apr_hash_pool_get(xlate_handle_hash),
sizeof(*node_p));
*node_p = NULL;
svn_hash_sets(xlate_handle_hash, userdata_key, node_p);
}
node->next = *node_p;
*node_p = node;
return SVN_NO_ERROR;
}
/* Put back NODE into the xlate handle cache for use by other calls.
If there is no global cache, store the handle in POOL.
Ignore errors related to locking/unlocking the mutex. */
static svn_error_t *
put_xlate_handle_node(xlate_handle_node_t *node,
const char *userdata_key,
apr_pool_t *pool)
{
assert(node->next == NULL);
if (!userdata_key)
return SVN_NO_ERROR;
/* push previous global node to the hash */
if (xlate_handle_hash)
{
/* 1st level: global, static items */
if (userdata_key == SVN_UTF_NTOU_XLATE_HANDLE)
node = atomic_swap(&xlat_ntou_static_handle, node);
else if (userdata_key == SVN_UTF_UTON_XLATE_HANDLE)
node = atomic_swap(&xlat_uton_static_handle, node);
if (node == NULL)
return SVN_NO_ERROR;
SVN_MUTEX__WITH_LOCK(xlate_handle_mutex,
put_xlate_handle_node_internal(node,
userdata_key));
}
else
{
/* Store it in the per-pool cache. */
apr_pool_userdata_set(node, userdata_key, apr_pool_cleanup_null, pool);
}
return SVN_NO_ERROR;
}
/* Return the apr_xlate handle for converting native characters to UTF-8. */
static svn_error_t *
get_ntou_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool)
{
return get_xlate_handle_node(ret, SVN_APR_UTF8_CHARSET,
assume_native_charset_is_utf8
? SVN_APR_UTF8_CHARSET
: SVN_APR_LOCALE_CHARSET,
SVN_UTF_NTOU_XLATE_HANDLE, pool);
}
/* Return the apr_xlate handle for converting UTF-8 to native characters.
Create one if it doesn't exist. If unable to find a handle, or
unable to create one because apr_xlate_open returned APR_EINVAL, then
set *RET to null and return SVN_NO_ERROR; if fail for some other
reason, return error. */
static svn_error_t *
get_uton_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool)
{
return get_xlate_handle_node(ret,
assume_native_charset_is_utf8
? SVN_APR_UTF8_CHARSET
: SVN_APR_LOCALE_CHARSET,
SVN_APR_UTF8_CHARSET,
SVN_UTF_UTON_XLATE_HANDLE, pool);
}
/* Copy LEN bytes of SRC, converting non-ASCII and zero bytes to ?\nnn
sequences, allocating the result in POOL. */
static const char *
fuzzy_escape(const char *src, apr_size_t len, apr_pool_t *pool)
{
const char *src_orig = src, *src_end = src + len;
apr_size_t new_len = 0;
char *new;
const char *new_orig;
/* First count how big a dest string we'll need. */
while (src < src_end)
{
if (! svn_ctype_isascii(*src) || *src == '\0')
new_len += 5; /* 5 slots, for "?\XXX" */
else
new_len += 1; /* one slot for the 7-bit char */
src++;
}
/* Allocate that amount, plus one slot for '\0' character. */
new = apr_palloc(pool, new_len + 1);
new_orig = new;
/* And fill it up. */
while (src_orig < src_end)
{
if (! svn_ctype_isascii(*src_orig) || src_orig == '\0')
{
/* This is the same format as svn_xml_fuzzy_escape uses, but that
function escapes different characters. Please keep in sync!
### If we add another fuzzy escape somewhere, we should abstract
### this out to a common function. */
apr_snprintf(new, 6, "?\\%03u", (unsigned char) *src_orig);
new += 5;
}
else
{
*new = *src_orig;
new += 1;
}
src_orig++;
}
*new = '\0';
return new_orig;
}
/* Convert SRC_LENGTH bytes of SRC_DATA in NODE->handle, store the result
( run in 0.852 second using v1.01-cache-2.11-cpan-d7f47b0818f )