151 lines
3.7 KiB
C++
151 lines
3.7 KiB
C++
/*
|
||
* 文件名:CCByte_CCENCODE_H
|
||
* 作者:
|
||
* 日期:
|
||
*
|
||
* It provides support for the encodings:
|
||
* 以下是支持的编码格式:
|
||
*
|
||
European languages
|
||
ASCII, ISO-8859-{1,2,3,4,5,7,9,10,13,14,15,16},
|
||
KOI8-R, KOI8-U, KOI8-RU,
|
||
CP{1250,1251,1252,1253,1254,1257}, CP{850,866,1131},
|
||
Mac{Roman,CentralEurope,Iceland,Croatian,Romania},
|
||
Mac{Cyrillic,Ukraine,Greek,Turkish},
|
||
Macintosh
|
||
Semitic languages
|
||
ISO-8859-{6,8}, CP{1255,1256}, CP862, Mac{Hebrew,Arabic}
|
||
Japanese
|
||
EUC-JP, SHIFT_JIS, CP932, ISO-2022-JP, ISO-2022-JP-2, ISO-2022-JP-1,
|
||
ISO-2022-JP-MS
|
||
Chinese
|
||
EUC-CN, HZ, GBK, CP936, GB18030, GB18030:2022, EUC-TW, BIG5, CP950,
|
||
BIG5-HKSCS, BIG5-HKSCS:2004, BIG5-HKSCS:2001, BIG5-HKSCS:1999,
|
||
ISO-2022-CN, ISO-2022-CN-EXT
|
||
Korean
|
||
EUC-KR, CP949, ISO-2022-KR, JOHAB
|
||
Armenian
|
||
ARMSCII-8
|
||
Georgian
|
||
Georgian-Academy, Georgian-PS
|
||
Tajik
|
||
KOI8-T
|
||
Kazakh
|
||
PT154, RK1048
|
||
Thai
|
||
ISO-8859-11, TIS-620, CP874, MacThai
|
||
Laotian
|
||
MuleLao-1, CP1133
|
||
Vietnamese
|
||
VISCII, TCVN, CP1258
|
||
Platform specifics
|
||
HP-ROMAN8, NEXTSTEP
|
||
Full Unicode
|
||
UTF-8
|
||
UCS-2, UCS-2BE, UCS-2LE
|
||
UCS-4, UCS-4BE, UCS-4LE
|
||
UTF-16, UTF-16BE, UTF-16LE
|
||
UTF-32, UTF-32BE, UTF-32LE
|
||
UTF-7
|
||
C99, JAVA
|
||
Full Unicode, in terms of 'uint16_t' or 'uint32_t'
|
||
(with machine dependent endianness and alignment)
|
||
UCS-2-INTERNAL, UCS-4-INTERNAL
|
||
When configured with the option -DENABLE_EXTRA, it also provides support for a few extra encodings:
|
||
|
||
European languages
|
||
CP{437,737,775,852,853,855,857,858,860,861,863,865,869,1125}
|
||
Semitic languages
|
||
CP864
|
||
Japanese
|
||
EUC-JISX0213, Shift_JISX0213, ISO-2022-JP-3
|
||
Chinese
|
||
BIG5-2003 (experimental)
|
||
Turkmen
|
||
TDS565
|
||
Platform specifics
|
||
ATARIST, RISCOS-LATIN1
|
||
EBCDIC compatible (not ASCII compatible, very rarely used)
|
||
European languages
|
||
IBM-{037,273,277,278,280,282,284,285,297,423,500,870,871,875,880},
|
||
IBM-{905,924,1025,1026,1047,1112,1122,1123,1140,1141,1142,1143},
|
||
IBM-{1144,1145,1146,1147,1148,1149,1153,1154,1155,1156,1157,1158},
|
||
IBM-{1165,1166,4971}
|
||
Semitic languages
|
||
IBM-{424,425,12712,16804}
|
||
Persian
|
||
IBM-1097
|
||
Thai
|
||
IBM-{838,1160}
|
||
Laotian
|
||
IBM-1132
|
||
Vietnamese
|
||
IBM-{1130,1164}
|
||
Indic languages
|
||
IBM-1137
|
||
*
|
||
* */
|
||
|
||
// 防止重复包含头文件
|
||
#ifndef CCBYTE_CCENCODE_H
|
||
#define CCBYTE_CCENCODE_H
|
||
|
||
// Windows系统特定的预处理指令
|
||
#ifdef _WIN32
|
||
|
||
#endif
|
||
|
||
// 包含必要的头文件
|
||
#include "cppp/reiconv.hpp"
|
||
#include "CC.h"
|
||
|
||
// 使用cppp库的基础命名空间
|
||
using namespace cppp::base;
|
||
|
||
// 定义标准字符集枚举
|
||
enum StandardCharsets{
|
||
UTF8,
|
||
GBK,
|
||
GB2312,
|
||
UNICODE_t,
|
||
UTF16,
|
||
UTF16BE,
|
||
UTF16LE,
|
||
UTF32,
|
||
UTF32BE,
|
||
UTF32LE,
|
||
ASCII,
|
||
ISO8859,
|
||
KOI8R
|
||
};
|
||
|
||
// 定义CTL命名空间
|
||
namespace CTL{
|
||
// 定义Charset命名空间
|
||
namespace Charset {
|
||
// 根据标准字符集枚举返回对应的字符集格式字符串
|
||
const char* format(StandardCharsets standardCharsets);
|
||
}
|
||
|
||
// 定义Encode类
|
||
class Encode{
|
||
public:
|
||
// 格式化字符串,从UTF-8编码转换为目标编码
|
||
static std::string Format(const std::string& utf8Str,const std::string& str);
|
||
|
||
// 格式化字符串,从源编码转换为目标编码
|
||
static std::string Format(const std::string& str,const std::string &from,const std::string &to);
|
||
|
||
// 格式化字符串,从源编码转换为目标编码
|
||
static std::string formatString(const std::string& str,StandardCharsets from,StandardCharsets to);
|
||
|
||
// 格式化字符串,从源编码转换为目标编码,并以字符向量形式返回结果
|
||
static std::vector<char> Format(const std::string& str,StandardCharsets from,StandardCharsets to);
|
||
|
||
// 格式化字符数据,从源编码转换为目标编码
|
||
static std::vector<char> Format(const std::vector<char>& data,StandardCharsets from,StandardCharsets to);
|
||
};
|
||
}
|
||
|
||
#endif
|