#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <locale.h>
#include <stdint.h>
#include <assert.h>
// separate words with only one space
char *handle_whitespace(char *content)
{
// to be removed later
int i, len = mbstowcs(NULL,content,0)+1;
wchar_t unicode_content[len];
wchar_t *normalized_content
= malloc(len
* sizeof(wchar_t));
// convert char to wchar_t
mbstowcs(unicode_content, content, len);
short space_added = 0;
int written=0;
for(i=0; unicode_content[i] ; ++i){
if(!space_added) {
normalized_content[written] = L' ';
space_added = 1;
written++;
}
}else {
normalized_content[written] = unicode_content[i];
space_added = 0;
written++;
}
}
normalized_content[written] = L'\0';
// convert wchar_t back to char
int newlen = wcstombs(NULL, normalized_content, 0)+1;
char *newstr
= malloc(newlen
); wcstombs(newstr,normalized_content,newlen);
free(normalized_content
); return newstr;
}
int main(void) {
char *h = handle_whitespace("hello こんにちは world blah");
return 0;
}
I2luY2x1ZGUgPHN0ZGlvLmg+CiNpbmNsdWRlIDxzdGRsaWIuaD4KI2luY2x1ZGUgPHN0cmluZy5oPgojaW5jbHVkZSA8Y3R5cGUuaD4KI2luY2x1ZGUgPGxvY2FsZS5oPgojaW5jbHVkZSA8c3RkaW50Lmg+CiNpbmNsdWRlIDxhc3NlcnQuaD4KLy8gc2VwYXJhdGUgd29yZHMgd2l0aCBvbmx5IG9uZSBzcGFjZQpjaGFyICpoYW5kbGVfd2hpdGVzcGFjZShjaGFyICpjb250ZW50KQp7CiAgIC8vIHRvIGJlIHJlbW92ZWQgbGF0ZXIKICAgc2V0bG9jYWxlKExDX0FMTCwgImVuX1VTLnV0ZjgiKTsKICAgaW50IGksIGxlbiA9IG1ic3Rvd2NzKE5VTEwsY29udGVudCwwKSsxOwogICAKICAgd2NoYXJfdCB1bmljb2RlX2NvbnRlbnRbbGVuXTsKICAgd2NoYXJfdCAqbm9ybWFsaXplZF9jb250ZW50ID0gbWFsbG9jKGxlbiAqIHNpemVvZih3Y2hhcl90KSk7CiAgIAogICAvLyBjb252ZXJ0IGNoYXIgdG8gd2NoYXJfdAogICBtYnN0b3djcyh1bmljb2RlX2NvbnRlbnQsIGNvbnRlbnQsIGxlbik7CiAgIAogICBzaG9ydCBzcGFjZV9hZGRlZCA9IDA7CiAgIAogICBpbnQgd3JpdHRlbj0wOwogICBmb3IoaT0wOyB1bmljb2RlX2NvbnRlbnRbaV0gOyArK2kpewogICAgICAgaWYoaXN3c3BhY2UodW5pY29kZV9jb250ZW50W2ldKSl7CgkgaWYoIXNwYWNlX2FkZGVkKSB7CgkgICBub3JtYWxpemVkX2NvbnRlbnRbd3JpdHRlbl0gPSBMJyAnOwoJICAgc3BhY2VfYWRkZWQgPSAxOwoJICAgd3JpdHRlbisrOwoJIH0KICAgICAgIH1lbHNlIHsJIAogICAgICAgICBub3JtYWxpemVkX2NvbnRlbnRbd3JpdHRlbl0gPSB1bmljb2RlX2NvbnRlbnRbaV07Cgkgc3BhY2VfYWRkZWQgPSAwOwoJIHdyaXR0ZW4rKzsKICAgICAgIH0KICAgICAgIAogICB9CiAgIG5vcm1hbGl6ZWRfY29udGVudFt3cml0dGVuXSA9IEwnXDAnOwogICAKICAgLy8gY29udmVydCB3Y2hhcl90IGJhY2sgdG8gY2hhcgogICBpbnQgbmV3bGVuID0gd2NzdG9tYnMoTlVMTCwgbm9ybWFsaXplZF9jb250ZW50LCAwKSsxOwogICBjaGFyICpuZXdzdHIgPSBtYWxsb2MobmV3bGVuKTsKICAgd2NzdG9tYnMobmV3c3RyLG5vcm1hbGl6ZWRfY29udGVudCxuZXdsZW4pOwogIAogICBmcmVlKG5vcm1hbGl6ZWRfY29udGVudCk7CiAgIHJldHVybiBuZXdzdHI7Cn0KCgppbnQgbWFpbih2b2lkKSB7CgljaGFyICpoID0gaGFuZGxlX3doaXRlc3BhY2UoImhlbGxvICAgICAgICAgIOOBk+OCk+OBq+OBoeOBryAgICAgICAgICAgICB3b3JsZCAgICAgIGJsYWgiKTsKCXByaW50ZigiJXNcbiIsaCk7CglyZXR1cm4gMDsKfQo=