USB_Config_Vendor/CC_SDK/Environment/cppp-reiconv/lib/iconv.cpp
2026-02-03 14:36:30 +08:00

660 lines
18 KiB
C++

/*
* Copyright (C) 1999-2008, 2011, 2016, 2018, 2020, 2022-2023 Free Software Foundation, Inc.
* This file is part of the cppp-reiconv library.
*
* The cppp-reiconv library is free software; you can redistribute it
* and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either version 3
* of the License, or (at your option) any later version.
*
* The cppp-reiconv library is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with the cppp-reiconv library; see the file COPYING.
* If not, see <https://www.gnu.org/licenses/>.
*/
#include <cppp/reiconv.hpp>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <limits.h>
namespace cppp
{
namespace base
{
namespace reiconv
{
#if ENABLE_EXTRA
/*
* Consider all system dependent encodings, for any system,
* and the extra encodings.
*/
#define USE_AIX
#define USE_OSF1
#define USE_DOS
#define USE_ZOS
#define USE_EXTRA
#else
/*
* Consider those system dependent encodings that are needed for the
* current system.
*/
#ifdef _AIX
#define USE_AIX
#endif
#if defined(__osf__) || defined(VMS)
#define USE_OSF1
#endif
#if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)))
#define USE_DOS
#endif
/* Enable the EBCDIC encodings not only on z/OS but also on Linux/s390, for
easier interoperability between z/OS and Linux/s390. */
#if defined(__MVS__) || (defined(__linux__) && (defined(__s390__) || defined(__s390x__)))
#define USE_ZOS
#endif
#endif
/*
* Data type for general conversion loop.
*/
struct loop_funcs
{
size_t (*loop_convert)(iconv_t icd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
size_t (*loop_reset)(iconv_t icd, char **outbuf, size_t *outbytesleft);
};
/*
* Converters.
*/
#include "converters.h"
/*
* Table of all supported encodings.
*/
struct encoding
{
struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
int oflags; /* flags for unicode -> multibyte conversion */
int codepage; /* codepage number */
};
#define DEFALIAS(xxx_alias, xxx) /* nothing */
enum
{
#define DEFENCODING(xxx_names, codepage, xxx, xxx_ifuncs1, xxx_ifuncs2, xxx_ofuncs1, xxx_ofuncs2) ei_##xxx,
#include "encodings.def"
#ifdef USE_AIX
#include "encodings_aix.def"
#endif
#ifdef USE_OSF1
#include "encodings_osf1.def"
#endif
#ifdef USE_DOS
#include "encodings_dos.def"
#endif
#ifdef USE_ZOS
#include "encodings_zos.def"
#endif
#ifdef USE_EXTRA
#include "encodings_extra.def"
#endif
#undef DEFENCODING
ei_for_broken_compilers_that_dont_like_trailing_commas
};
#include "flags.h"
static struct encoding const all_encodings[] = {
#define DEFENCODING(xxx_names, codepage, xxx, xxx_ifuncs1, xxx_ifuncs2, xxx_ofuncs1, xxx_ofuncs2) \
{xxx_ifuncs1, xxx_ifuncs2, xxx_ofuncs1, xxx_ofuncs2, ei_##xxx##_oflags, codepage},
#include "encodings.def"
#ifdef USE_AIX
#include "encodings_aix.def"
#endif
#ifdef USE_OSF1
#include "encodings_osf1.def"
#endif
#ifdef USE_DOS
#include "encodings_dos.def"
#endif
#ifdef USE_ZOS
#include "encodings_zos.def"
#endif
#ifdef USE_EXTRA
#include "encodings_extra.def"
#endif
#undef DEFENCODING
};
#undef DEFALIAS
int lookup_by_codepage(int codepage)
{
for(size_t i = 0; i < sizeof(all_encodings) / sizeof(all_encodings[0]); i++)
{
if(all_encodings[i].codepage == codepage)
{
return i;
}
}
return -1;
}
/*
* Conversion loops.
*/
#include "loops.h"
/*
* Alias lookup function.
* Defines
* struct alias { int name; unsigned int encoding_index; };
* const struct alias * HashPool::aliases_lookup (const char *str, unsigned int len);
* #define MAX_WORD_LENGTH ...
*/
#if defined _AIX
#include "aliases_sysaix.h"
#elif defined hpux || defined __hpux
#include "aliases_syshpux.h"
#elif defined __osf__
#include "aliases_sysosf1.h"
#elif defined __sun
#include "aliases_syssolaris.h"
#else
#include "aliases.h"
#endif
/*
* System dependent alias lookup function.
* Defines
* const struct alias * aliases2_lookup (const char *str);
*/
#if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_ZOS) || defined(USE_EXTRA) /* || ... */
struct stringpool2_t
{
#define S(tag, name, encoding_index) char stringpool_##tag[sizeof(name)];
#include "aliases2.h"
#undef S
};
static const struct stringpool2_t stringpool2_contents = {
#define S(tag, name, encoding_index) name,
#include "aliases2.h"
#undef S
};
#define stringpool2 ((const char *)&stringpool2_contents)
static const struct alias sysdep_aliases[] = {
#define S(tag, name, encoding_index) {(int)(size_t)(void*)&((struct stringpool2_t *)0)->stringpool_##tag, encoding_index},
#include "aliases2.h"
#undef S
};
#ifdef __GNUC__
__inline
#else
#ifdef __cplusplus
inline
#endif
#endif
static const struct alias *
aliases2_lookup(const char *str)
{
const struct alias *ptr;
unsigned int count;
for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases) / sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
if (!strcmp(str, stringpool2 + ptr->name))
return ptr;
return NULL;
}
#else
#define aliases2_lookup(str) NULL
#define stringpool2 NULL
#endif
#if 0
/* Like !strcasecmp, except that the both strings can be assumed to be ASCII
and the first string can be assumed to be in uppercase. */
static int strequal (const char* str1, const char* str2)
{
unsigned char c1;
unsigned char c2;
for (;;) {
c1 = * (unsigned char *) str1++;
c2 = * (unsigned char *) str2++;
if (c1 == 0)
break;
if (c2 >= 'a' && c2 <= 'z')
c2 -= 'a'-'A';
if (c1 != c2)
break;
}
return (c1 == c2);
}
#endif
iconv_t iconv_open(const char *tocode, const char *fromcode)
{
struct conv_struct *cd;
unsigned int from_index;
unsigned int from_surface;
unsigned int to_index;
unsigned int to_surface;
int discard_ilseq;
#include "iconv_open1.h"
cd = (struct conv_struct *)malloc(sizeof(struct conv_struct));
if (cd == NULL)
{
errno = ENOMEM;
return (iconv_t)(-1);
}
#include "iconv_open2.h"
return (iconv_t)cd;
invalid:
errno = EINVAL;
return (iconv_t)(-1);
}
iconv_t iconv_open(int tocode_cp, int fromcode_cp, bool strict)
{
struct conv_struct *cd;
unsigned int from_index;
unsigned int from_surface;
unsigned int to_index;
unsigned int to_surface;
int discard_ilseq = (int)!strict;
from_surface = ICONV_SURFACE_NONE;
to_surface = ICONV_SURFACE_NONE;
to_index = lookup_by_codepage(tocode_cp);
from_index = lookup_by_codepage(fromcode_cp);
if(to_index == -1 || from_index == -1 || tocode_cp == -1 || fromcode_cp == -1)
{
errno = EINVAL;
return (iconv_t)(-1);
}
cd = (struct conv_struct *)malloc(sizeof(struct conv_struct));
if (cd == NULL)
{
errno = ENOMEM;
return (iconv_t)(-1);
}
#include "iconv_open2.h"
return (iconv_t)cd;
}
size_t iconv(iconv_t icd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft)
{
conv_t cd = (conv_t)icd;
if (inbuf == NULL || *inbuf == NULL)
return cd->lfuncs.loop_reset(icd, outbuf, outbytesleft);
else
return cd->lfuncs.loop_convert(icd, (const char **)inbuf, inbytesleft, outbuf, outbytesleft);
}
int iconv_close(iconv_t icd)
{
conv_t cd = (conv_t)icd;
free(cd);
return 0;
}
/* Bit mask of all valid surfaces. */
#define ALL_SURFACES (ICONV_SURFACE_EBCDIC_ZOS_UNIX)
int iconvctl(iconv_t icd, int request, void *argument)
{
conv_t cd = (conv_t)icd;
switch (request)
{
case ICONV_TRIVIALP:
*(int *)argument = ((cd->lfuncs.loop_convert == unicode_loop_convert && cd->iindex == cd->oindex &&
cd->isurface == cd->osurface)
? 1
: 0);
return 0;
case ICONV_GET_DISCARD_ILSEQ:
*(int *)argument = cd->discard_ilseq;
return 0;
case ICONV_SET_DISCARD_ILSEQ:
cd->discard_ilseq = (*(const int *)argument ? 1 : 0);
return 0;
case ICONV_SET_HOOKS:
if (argument != NULL)
{
cd->hooks = *(const struct iconv_hooks *)argument;
}
else
{
cd->hooks.uc_hook = NULL;
cd->hooks.data = NULL;
}
return 0;
case ICONV_SET_FALLBACKS:
if (argument != NULL)
{
cd->fallbacks = *(const struct iconv_fallbacks *)argument;
}
else
{
cd->fallbacks.mb_to_uc_fallback = NULL;
cd->fallbacks.uc_to_mb_fallback = NULL;
cd->fallbacks.data = NULL;
}
return 0;
case ICONV_GET_FROM_SURFACE:
*(unsigned int *)argument = cd->isurface;
return 0;
case ICONV_SET_FROM_SURFACE:
if ((*(const unsigned int *)argument & ~ALL_SURFACES) == 0)
{
cd->isurface = *(const unsigned int *)argument;
return 0;
}
else
{
errno = EINVAL;
return -1;
}
case ICONV_GET_TO_SURFACE:
*(unsigned int *)argument = cd->osurface;
return 0;
case ICONV_SET_TO_SURFACE:
if ((*(const unsigned int *)argument & ~ALL_SURFACES) == 0)
{
cd->osurface = *(const unsigned int *)argument;
return 0;
}
else
{
errno = EINVAL;
return -1;
}
default:
errno = EINVAL;
return -1;
}
}
/* An alias after its name has been converted from 'int' to 'const char*'. */
struct nalias
{
const char *name;
unsigned int encoding_index;
};
static int compare_by_index(const void *arg1, const void *arg2)
{
const struct nalias *alias1 = (const struct nalias *)arg1;
const struct nalias *alias2 = (const struct nalias *)arg2;
return (int)alias1->encoding_index - (int)alias2->encoding_index;
}
static int compare_by_name(const void *arg1, const void *arg2)
{
const char *name1 = *(const char *const *)arg1;
const char *name2 = *(const char *const *)arg2;
/* Compare alphabetically, but put "CS" names at the end. */
int sign = strcmp(name1, name2);
if (sign != 0)
{
sign = ((name1[0] == 'C' && name1[1] == 'S') - (name2[0] == 'C' && name2[1] == 'S')) * 4 + (sign >= 0 ? 1 : -1);
}
return sign;
}
void iconvlist(int (*do_one)(unsigned int namescount, const char *const *names, void *data), void *data)
{
#define aliascount1 sizeof(aliases) / sizeof(aliases[0])
#ifndef aliases2_lookup
#define aliascount2 sizeof(sysdep_aliases) / sizeof(sysdep_aliases[0])
#else
#define aliascount2 0
#endif
#define aliascount (aliascount1 + aliascount2)
struct nalias aliasbuf[aliascount];
const char *namesbuf[aliascount];
size_t num_aliases;
{
/* Put all existing aliases into a buffer. */
size_t i;
size_t j;
j = 0;
for (i = 0; i < aliascount1; i++)
{
const struct alias *p = &aliases[i];
if (p->name >= 0)
{
aliasbuf[j].name = stringpool + p->name;
aliasbuf[j].encoding_index = p->encoding_index;
j++;
}
}
#ifndef aliases2_lookup
for (i = 0; i < aliascount2; i++)
{
aliasbuf[j].name = stringpool2 + sysdep_aliases[i].name;
aliasbuf[j].encoding_index = sysdep_aliases[i].encoding_index;
j++;
}
#endif
num_aliases = j;
}
/* Sort by encoding_index. */
if (num_aliases > 1)
qsort(aliasbuf, num_aliases, sizeof(struct nalias), compare_by_index);
{
/* Process all aliases with the same encoding_index together. */
size_t j;
j = 0;
while (j < num_aliases)
{
unsigned int ei = aliasbuf[j].encoding_index;
size_t i = 0;
do
namesbuf[i++] = aliasbuf[j++].name;
while (j < num_aliases && aliasbuf[j].encoding_index == ei);
if (i > 1)
qsort(namesbuf, i, sizeof(const char *), compare_by_name);
/* Call the callback. */
if (do_one(i, namesbuf, data))
break;
}
}
#undef aliascount
#undef aliascount2
#undef aliascount1
}
/* version number: (major<<8) + minor */
int reiconv_version = (2 << 8) + 1;
constexpr size_t tmpbufsize = 4096;
int iconv_string(const iconv_t& cd, const char *start, const char *end, char **resultp,
size_t *lengthp)
{
size_t length;
char* result;
/* Determine the length we need. */
{
size_t count = 0;
char tmpbuf[tmpbufsize];
char* inptr = (char*)start;
size_t insize = end - start;
while (insize > 0)
{
char *outptr = tmpbuf;
size_t outsize = tmpbufsize;
size_t res = iconv(cd, &inptr, &insize, &outptr, &outsize);
if (res == (size_t)(-1) && errno != E2BIG)
{
return (errno == EINVAL ? EILSEQ : errno);
}
count += outptr - tmpbuf;
}
{
char *outptr = tmpbuf;
size_t outsize = tmpbufsize;
size_t res = iconv(cd, NULL, NULL, &outptr, &outsize);
if (res == (size_t)(-1))
{
return errno;
}
count += outptr - tmpbuf;
}
length = count;
}
if (lengthp != NULL)
{
*lengthp = length;
}
if (resultp == NULL)
{
/* If resultp is nullptr, we can't save results. */
return 0;
}
result = (*resultp == NULL ? (char*)malloc(length) : (char*)realloc(*resultp, length));
*resultp = result;
if (length == 0)
{
return 0;
}
if (result == NULL)
{
return (errno = ENOMEM);
}
iconv(cd, NULL, NULL, NULL, NULL); /* return to the initial state */
/* Do the conversion for real. */
{
char* inptr = (char*)start;
size_t insize = end - start;
char* outptr = result;
size_t outsize = length;
while (insize > 0)
{
size_t res = iconv(cd, &inptr, &insize, &outptr, &outsize);
if (res == (size_t)(-1))
{
if (errno == EINVAL)
{
break;
}
else
{
return errno;
}
}
}
{
size_t res = iconv(cd, NULL, NULL, &outptr, &outsize);
if (res == (size_t)(-1))
{
return errno;
}
}
if (outsize != 0)
{
abort();
}
}
return 0;
}
int iconv_string(const char* tocode, const char* fromcode, const char* start,
const char* end, char** resultp, size_t* lengthp)
{
iconv_t cd = iconv_open(tocode, fromcode);
if (cd == (iconv_t)(-1))
{
if (errno != EINVAL)
{
return -1;
}
#pragma region autodetect
/* Unsupported fromcode or tocode. Check whether the caller requested
autodetection. */
if (!strcmp(fromcode, "autodetect_utf8"))
{
int ret;
/* Try UTF-8 first. There are very few ISO-8859-1 inputs that would
be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1. */
ret = iconv_string(tocode, "UTF-8", start, end, resultp, lengthp);
if (!(ret < 0 && errno == EILSEQ))
return ret;
ret = iconv_string(tocode, "ISO-8859-1", start, end, resultp, lengthp);
return ret;
}
if (!strcmp(fromcode, "autodetect_jp"))
{
int ret;
/* Try 7-bit encoding first. If the input contains bytes >= 0x80,
it will fail. */
ret = iconv_string(tocode, "ISO-2022-JP-2", start, end, resultp, lengthp);
if (!(ret < 0 && errno == EILSEQ))
return ret;
/* Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This
is unavoidable. People will condemn SHIFT_JIS.
If we tried SHIFT_JIS first, then some short EUC-JP inputs would
come out wrong, and people would condemn EUC-JP and Unix, which
would not be good. */
ret = iconv_string(tocode, "EUC-JP", start, end, resultp, lengthp);
if (!(ret < 0 && errno == EILSEQ))
return ret;
/* Finally try SHIFT_JIS. */
ret = iconv_string(tocode, "SHIFT_JIS", start, end, resultp, lengthp);
return ret;
}
if (!strcmp(fromcode, "autodetect_kr"))
{
int ret;
/* Try 7-bit encoding first. If the input contains bytes >= 0x80,
it will fail. */
ret = iconv_string(tocode, "ISO-2022-KR", start, end, resultp, lengthp);
if (!(ret < 0 && errno == EILSEQ))
return ret;
/* Finally try EUC-KR. */
ret = iconv_string(tocode, "EUC-KR", start, end, resultp, lengthp);
return ret;
}
#pragma endregion
errno = EINVAL;
return -1;
}
int ret = iconv_string(cd, start, end, resultp, lengthp);
iconv_close(cd);
return ret;
}
int iconv_string(int tocode_cp, int fromcode_cp, const char* start,
const char* end, char** resultp, size_t* lengthp, bool strict)
{
iconv_t cd = iconv_open(tocode_cp, fromcode_cp, strict);
if (cd == (iconv_t)(-1))
{
return errno;
}
int ret = iconv_string(cd, start, end, resultp, lengthp);
iconv_close(cd);
return ret;
}
} // namespace reiconv
} // namespace base
} // namespace cppp