Jun 30, 2009 at 1:46pm UTC
I'm working on a project in VS2008 that compiles in MBCS but I need to work with some UTF8 string in order to communicate with some web services. I've worked out how to do it if I were to compile the project in Unicode, but converting the entire project would be a massive undertaking. Is there any way I can convert either MBCS to UTF8 or MBCS to Unicode? Either one would work for me.
Thanks!
Last edited on Jun 30, 2009 at 1:51pm UTC
Jun 30, 2009 at 2:11pm UTC
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
#include <cwchar>
#define BOM8A 0xEF
#define BOM8B 0xBB
#define BOM8C 0xBF
//Support function. Do NOT call directly.
//Note: Assumes WCHAR_MAX>=0xFFFF
void UTF8_WC(wchar_t *dst,const uchar *src,ulong srcl){
for (ulong a=0;a<srcl;a++){
uchar byte=*src++;
wchar_t c;
if (!(byte&128))
c=byte;
else if ((byte&192)==128)
continue ;
else if ((byte&224)==192){
c=byte&31;
c<<=6;
c|=*src&63;
}else if ((byte&240)==224){
c=byte&15;
c<<=6;
c|=*src&63;
c<<=6;
c|=src[1]&63;
}else if ((byte&248)==240){
#if WCHAR_MAX==0xFFFF
c='?' ;
#else
c=byte&7;
c<<=6;
c|=src[1]&63;
c<<=6;
c|=*src[2]&63;
c<<=6;
c|=*src[3]&63;
#endif
}
*dst++=c;
}
}
//Call this one.
std::wstring UniFromUTF8(const std::string &str){
ulong start=0;
if (str.size()>=3 && (uchar)str[0]==BOM8A && (uchar)str[1]==BOM8B && (uchar)str[2]==BOM8C)
start+=3;
const uchar *str2=(const uchar *)&str[0]+start;
ulong size=0;
for (ulong a=0,end=str.size();a<end;a++,str2++)
if (*str2<128 || (*str2&192)==192)
size++;
std::wstring res;
res.resize(size);
str2=(const uchar *)&str[0]+start;
UTF8_WC(&res[0],str2,str.size()-start);
return res;
}
Enjoy.
Last edited on Jun 30, 2009 at 2:12pm UTC