commit 95a39ef1eb13fef04091f6161578751f61c0769b Author: Xnoe Date: Mon Jun 8 09:59:51 2020 +0100 Initial commit of simplistic and bad UTF-8... thing? Written in C++. Stores characters as ints representing the one to four byte UTF8 data. diff --git a/a.out b/a.out new file mode 100755 index 0000000..7e8ddfc Binary files /dev/null and b/a.out differ diff --git a/utf8.h b/utf8.h new file mode 100644 index 0000000..d31c14b --- /dev/null +++ b/utf8.h @@ -0,0 +1,50 @@ +#include +#include +#include +#include + +unsigned int fetch32(char**); + +class char32 { + unsigned int fetch32(char** cstr) { + unsigned int r(0); + int i(1); + unsigned char compare = (unsigned char)**cstr; + if (compare >> 3 == 0b11110) i = 4; + if (compare >> 4 == 0b1110) i = 3; + if (compare >> 5 == 0b110) i = 2; + for (;i>0;i--) { + r <<= 8; + r += (unsigned char)**cstr; + (*cstr)++; + } + return r; + } +public: + unsigned int c; + char32(unsigned int i) {c = i;} + char32(const char* s) {c = fetch32((char**)&s);} + char32(char** s) {c = fetch32(s);} + bool operator==(char* cs) {return c==fetch32(&cs);} + unsigned int operator>>(int a) const {return c>>a;} +}; + +std::ostream& operator<<(std::ostream& stream, const char32& c32) { + stream << (char)(c32>>24) << (char)(c32>>16) << (char)(c32>>8) << (char)c32.c; + return stream; +} + +struct string32 { + std::vector cs; + string32(char* sd) { + while (sd[0]) + cs.push_back(char32(&sd)); + } + char32 operator[](int i) {return cs[i];} +}; + +std::ostream& operator<<(std::ostream& stream, const string32& s32) { + for (int i=0;i