Character Encoding Conversion Techniques in C++


// Convert UTF-8 to GB2312
char* ConvertUTF8ToGB(const char* utfInput) {
    int bufferSize = MultiByteToWideChar(CP_UTF8, 0, utfInput, -1, NULL, 0);
    wchar_t* wideBuffer = new wchar_t[bufferSize+1];
    memset(wideBuffer, 0, (bufferSize+1)*sizeof(wchar_t));
    MultiByteToWideChar(CP_UTF8, 0, utfInput, -1, wideBuffer, bufferSize);
    
    bufferSize = WideCharToMultiByte(CP_ACP, 0, wideBuffer, -1, NULL, 0, NULL, NULL);
    char* output = new char[bufferSize+1];
    memset(output, 0, bufferSize+1);
    WideCharToMultiByte(CP_ACP, 0, wideBuffer, -1, output, bufferSize, NULL, NULL);
    
    delete[] wideBuffer;
    return output;
}

// Convert GB2312 to UTF-8
char* ConvertGBToUTF8(const char* gbInput) {
    int bufferSize = MultiByteToWideChar(CP_ACP, 0, gbInput, -1, NULL, 0);
    wchar_t* wideBuffer = new wchar_t[bufferSize+1];
    memset(wideBuffer, 0, (bufferSize+1)*sizeof(wchar_t));
    MultiByteToWideChar(CP_ACP, 0, gbInput, -1, wideBuffer, bufferSize);
    
    bufferSize = WideCharToMultiByte(CP_UTF8, 0, wideBuffer, -1, NULL, 0, NULL, NULL);
    char* output = new char[bufferSize+1];
    memset(output, 0, bufferSize+1);
    WideCharToMultiByte(CP_UTF8, 0, wideBuffer, -1, output, bufferSize, NULL, NULL);
    
    delete[] wideBuffer;
    return output;
}

UTF-8 and ANSI Conversion


// Convert UTF-8 to ANSI
void UTF8ToANSI(std::string& utf8String) {
    int wideSize = MultiByteToWideChar(CP_UTF8, 0, utf8String.c_str(), -1, NULL, 0);
    wchar_t* wideBuffer = new wchar_t[wideSize];
    MultiByteToWideChar(CP_UTF8, 0, utf8String.c_str(), -1, wideBuffer, wideSize);
    
    int ansiSize = WideCharToMultiByte(CP_ACP, 0, wideBuffer, -1, NULL, 0, NULL, NULL);
    char* ansiBuffer = new char[ansiSize];
    WideCharToMultiByte(CP_ACP, 0, wideBuffer, -1, ansiBuffer, ansiSize, NULL, NULL);
    
    utf8String = ansiBuffer;
    delete[] wideBuffer;
    delete[] ansiBuffer;
}

// Convert ANSI to UTF-8
void ANSIToUTF8(std::string& ansiString) {
    int wideSize = MultiByteToWideChar(CP_ACP, 0, ansiString.c_str(), -1, NULL, 0);
    wchar_t* wideBuffer = new wchar_t[wideSize];
    MultiByteToWideChar(CP_ACP, 0, ansiString.c_str(), -1, wideBuffer, wideSize);
    
    int utf8Size = WideCharToMultiByte(CP_UTF8, 0, wideBuffer, -1, NULL, 0, NULL, NULL);
    char* utf8Buffer = new char[utf8Size];
    WideCharToMultiByte(CP_UTF8, 0, wideBuffer, -1, utf8Buffer, utf8Size, NULL, NULL);
    
    ansiString = utf8Buffer;
    delete[] wideBuffer;
    delete[] utf8Buffer;
}

TCHAR and char Conversion


// Convert TCHAR to char
char* TCHARToChar(const TCHAR* tcharStr) {
#ifdef _UNICODE
    int charSize = WideCharToMultiByte(CP_ACP, 0, tcharStr, -1, NULL, 0, NULL, NULL);
    char* charBuffer = (char*)malloc(charSize);
    WideCharToMultiByte(CP_ACP, 0, tcharStr, -1, charBuffer, charSize, NULL, NULL);
#else
    char* charBuffer = (char*)malloc(strlen(tcharStr) + 1);
    strcpy(charBuffer, tcharStr);
#endif
    return charBuffer;
}

// Convert char to TCHAR
TCHAR* CharToTCHAR(const char* charStr) {
#ifdef _UNICODE
    int tcharSize = MultiByteToWideChar(CP_ACP, 0, charStr, -1, NULL, 0);
    TCHAR* tcharBuffer = (TCHAR*)malloc((tcharSize + 1) * sizeof(TCHAR));
    MultiByteToWideChar(CP_ACP, 0, charStr, -1, tcharBuffer, tcharSize);
#else
    TCHAR* tcharBuffer = (TCHAR*)malloc(strlen(charStr) + 1);
    strcpy(tcharBuffer, charStr);
#endif
    return tcharBuffer;
}

Tags: character-encoding unicode utf8 gb2312 ansi

Posted on Mon, 22 Jun 2026 17:59:29 +0000 by Reformed