c++使用icu国际化(i18n)

2022-12-29 15:06:38 浏览数 (2)

icu

International Components for Unicode,https://github.com/unicode-org/icu.git https://icu.unicode.org/ 帮助文档: https://unicode-org.github.io/icu/userguide/icu/howtouseicu.html

  • i18n,Internationalization (in/i18n) library
  • io,Ustdio/iostream library (icuio),c 读取文件是ansi的需要编码转换,使用icu、u_fopen可以读取unicode

编码检测

代码语言:javascript复制
/*
 * data,    传入参数, 需要探测的字符串
 * len,     传入参数, 探测字符串长度
 * detected  传出参数, 探测的最有可能的字符编码名称, 调用者需要释放该字段
**/
bool detectTextEncoding(const char *data, int32_t len, char **detected) {
    UCharsetDetector *csd;
    const UCharsetMatch **csm;
    int32_t match, matchCount = 0;

    UErrorCode status = U_ZERO_ERROR;

    csd = ucsdet_open(&status);
    if (status != U_ZERO_ERROR)
        return false;

    ucsdet_setText(csd, data, len, &status);
    if (status != U_ZERO_ERROR)
        return false;

    csm = ucsdet_detectAll(csd, &matchCount, &status);
    if (status != U_ZERO_ERROR)
        return false;

#if 0 //打印出探测的可能的编码
    for(match = 0; match < matchCount; match  = 1)
    {
        const char *name = ucsdet_getName(csm[match], &status);
        const char *lang = ucsdet_getLanguage(csm[match], &status);
        int32_t confidence = ucsdet_getConfidence(csm[match], &status);

        if (lang == NULL || strlen(lang) == 0)
                lang = "**";

        printf("%s (%s) %d
", name, lang, confidence);
    }
#endif

    if (matchCount > 0) {
        *detected = strdup(ucsdet_getName(csm[0], &status)); //分配了内存, 需要释放
        if (status != U_ZERO_ERROR)
            return false;
    }

    printf("charset = %s
", *detected);

    ucsdet_close(csd);
    return true;
}

编码转换

代码语言:javascript复制
/*
 * toConverterName,      转换后的字符编码
 * fromConverterName,    转换前的字符编码
 * target,               存储转换后的字符串, 传出参数
 * targetCapacity,       存储容量,target的大小
 * source,              需要转换的字符串
 * sourceLength,         source的大小
**/
int convert(const char *toConverterName, const char *fromConverterName,
            char *target, int32_t targetCapacity, const char *source, int32_t sourceLength) {
    UErrorCode error = U_ZERO_ERROR;
    ucnv_convert(toConverterName, fromConverterName, target, targetCapacity, source, sourceLength, &error);

    return error;
}
代码语言:javascript复制
conv = ucnv_open("iso-8859-3", &status);
/* Convert from ISO-8859-3 to Unicode */
len = ucnv_toUChars(conv, target, targetSize, source, sourceLen, &status);
ucnv_close(conv);

i18n国际化

resouce tree structure:

c打开resource

代码语言:javascript复制
UErrorCode status = U_ZERO_ERROR;
UResourceBundle* icuRoot = ures_open(NULL, "root", &status);
if (U_SUCCESS(status)) {
	//ures_getStringByKey
	ures_close(icuRoot);
}

c 打开resource

代码语言:javascript复制
UErrorCode status = U_ZERO_ERROR;
// we rely on automatic construction of Locale object from a char*
ResourceBundle myResource("myPackage", "de_AT", status); 
if (U_SUCCESS(status)) {
}

查询key

代码语言:javascript复制
UResourceBundle *zones = ures_getByKey(icuRoot, "zoneStrings", NULL, &status);
	if (U_SUCCESS(status)) {
		ures_close(zones);
	}

bundle file

代码语言:javascript复制
root {
    menu {
        id { "mainmenu" }
        items {
            {
                id { "file" }
                name { "&File" }
                items {
                    {
                        id { "open" }
                        name { "&Open" }
                    }
                    {
                        id { "save" }
                        name { "&Save" }
                    }
                    {
                        id { "exit" }
                        name { "&Exit" }
                    }
                }
            }
        }
   }
}

生成binary resouce bundlefile

genrb -d dest_dirname root.txt en.txt

0 人点赞