From 95a39ef1eb13fef04091f6161578751f61c0769b Mon Sep 17 00:00:00 2001 From: Xnoe Date: Mon, 8 Jun 2020 09:59:51 +0100 Subject: [PATCH] Initial commit of simplistic and bad UTF-8... thing? Written in C++. Stores characters as ints representing the one to four byte UTF8 data. --- a.out | Bin 0 -> 32064 bytes utf8.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100755 a.out create mode 100644 utf8.h diff --git a/a.out b/a.out new file mode 100755 index 0000000000000000000000000000000000000000..7e8ddfc46c022905c3ceac65379d4428748a9779 GIT binary patch literal 32064 zcmeHwe|%NdmG8+f%qS!Q!6KqwsXtIO%`YH9s0qn|6G?7j`_KFF$d4Z*u2HFAy2Mm6VW@{s04lX| z?Hv4jk#?ar1?6mklk`dnK$S;FW-`oWd=?#plH^g!Z z7AQG}%Acf${`NE84LzXI;SvAZK5Y0P+D3BS+%7kKn)NBD%)J3}Wak$>xOv)It}mTm z)^U9blK<)WT4$hjQQ6lz+pg&h1bcd~=`CM$&7!ixNT{$xw43x3@5)VH zFNK~?;HPWnYdc#cu>fUpAnqd3Hs68;6l;q#)7;n<3Yw9qHykxh&8%&zH{1MS|HeQh z>JK;7S9gYj{w8m0r(e=iQ*LgIc1CI&qs7I(4sY0uhP{Dkq}HeFYk@0m^+p0dGZcx2 z{obxxUz4dfE;N0b38|7I(~Ne6LwA~;{@})Fhv^T8L*a&1L|xq&Ef0huCdB<^wZTBN zrdZ!3q>2|Ssa9{B>Fw+c`9Lszy^2BMqI zO+`Y@mo-_M6}+=6m2uU=IAKHBFPbFkDD#DSqHLQ#*v6Kj6S}1TakkOtGb2-l)GfpwzbqsCjpxqf8RyKJD@bg4)XJ zYO}PkNLyJ~yS&;gDJ-+jN(vWh)$7((Ew43;3ri(>-8wLrQM6S1&;=rpm5FzfsbCrUB%z<}~^Kl3MT)`>-8Elb|d;~{Y zhZ^s+Hkf5$HEni_ESv>EGV1WO$TPyTD+OY{1D_>X#J@!jyqY_SQ0~BsbqOJHRJI79 zk$&g=YB=yjQ%6IKJR_Xy1H!Cpp?@`@2Z3T8EiLkt_<5-$5FHNuIg&;E+wH(lb>Mp) z_5qq{`;MJm%&IJ#4mpQAEO zfun<>{39yU)Hk|Sl%Jw9O;@AcqWpa-(^NOwBFc|anWnhW22p;H$~3i&R*Less7zDZ zXpt!2LuH!EMm?hZ&#N`9eGp}F6#si|?6q5s*!xD`v9X4xx`CV}SQZ!q)84=;#u!-k zF&C9{LbDP3 zAUf6PU-l?e#HXRjaRdKscqwNW7&F}VxXYurfSGhOB2s1>u`O{U)^o(@*N;3u8`_Q7 zaQr{e+4;|p02hzFNr-i)5(y)=PcYG1j&KJB(@^XsBlgSqVh02DBr*Jhli@tU@FHpG zhi)PEZ*h&Vs09%^`<4y+lvVR#)gf3lGOW{8wAQbmFnou=I6`_~L8uuO`|=+g+CCJ$ zM$pJq(1fk86FEjTL50BNqrrXpiHzev7RbK+^Br}esy(P`SDVqFec60rx*jhcG6tY< zVBrh6TSax=gtnmgb1Dujt3WrzXMRpTc~LNu6aFo}4E2C%AIp4vQaTNqo`<;5^EWt& z=`Ta*WkY%{OfDE$_y`L<522q@o2B02_+uo2w%5nojMAyX!ssQ(=TL8uiLh-m*#;Jz zjzK#97}eXi-{3}~mkVNBPR4r1`?j3S=qcH@Wjv8UJY9)~Y@>z<{()I_byVrXAW#U; zzgtC0hvRurHuy8qGtZOQ3zQe)Pf)xGB-}R+_l?1`V-`k4)UZBI?HJZik_hOCF)jaD z?SB0a`OoTqz-Z_jzo_Rd(9vNa5GuQGJWrq~yGT?{-}n@PA{U|s+BdG=>mNskQ~L6s z9m>$hxoiwHWQfyPL0{L&jN`BN>*E{t4N_o{A8c!z$zoV80z{{d>u(;9WpY!AFV^!3 zvGf=8>z~B-zVYkW2)9q^5%RYZ z2waG`UvBhoIAjboBS`gQ+>G;xLlniBY=mLQUy;LBM8_#u!+|pV87fEfD5zxEM|0w> zKq7ExbR@L~o`4SwAB{5QviVTnugSo$Jj<(PAS@!vw3yFP2o zB?ym@0b)6o@GBsIrkKW+Jt)mD+`Q@7o$s|9|Q*e<; zF49A`IFB{t9xqa|{X&*zH^>afX{lty_R8YhtficM>6oU-Bg$EQC)|xgW$=nNkq+kX8?V_?=pSeiXgV$>1K$)=btX*4AD1A>h@GKRU2M8I~InDF&o$cFgSp(*rdnNh^&_|$UH{cG^av#mZVj9+W6E*%J z3dqEFNcwO*M=S_vT80V3@vT%*KZ2~8>N;S=-XiQQS^aLKPv1?UC|d9@G-HbiT}ZAF zk`Qi&bv+|2`Hu9)?&BdP_E54|e##2jJINK^oGkgTqVwGqzQPs8PoZIbV;N}cJXQ7i zLIFh{bpskF+%ui9)TaJs{H-l87Bn9)_I{QLHpTMPA1xNRzks=S8yJu{A2J4JI$Te^ z0S7e{4C2I|s`9sg8~z{NPF)0-uKE}%`?l=XdM*VUdM||v5fKe&&~d8?H) zFh(?CwQb)JNDBQirp9yu)o7GZ+en4CXFjFzy6W6mQrL+ zA!`1QV|1X(W(r!Ne@qxD7T#DdzJMOq;`d^J^`9GqQ=D_PJvWRQ{p8nixL4(H(r=(n z^g_=N^zWWH`al|vH;JVrRVRwR|3HJU+-Jnz24)EPR<5~m?SXLv_V<1VI`b|jUU7*a zMCXel^#(@X05wK*!7Qo(LpcKo_?%_`h!v?(p&tp)P?&!~4j{~?JcaoQFvn@cK+3rm z&WZnktVmk#lJ_U(Jj_*gzB@o8IYqv^70P73n>hEN#Xq2!>z^&mK=E-=#7YD{IwV#i z_JxeKYN6G!m@{WVY#d`l<=j$gQjSO_R*Eem)2%3o)sHxPN~CTYaTFlpnhmVTDdepF zEtG|qsx18Qm}-XzHB@ZRsPo-*sOHE8qBRlHd=CAn4pmYYzDDCE_KWyMRH1ztmDJMY znf)?wx+gvnn#1vLkP=uw9gv%E$1vPPF+4`GQt@JB3Z0AI=Vl~VDW|{ZU@kO@Vf-~) zseA5>VNlb+?x(d2x`xf6m7diLUm$=YWPXYOy70G_<5SkmD<)B^Ftx_6nA>l!D<~Qc zxg&rs1n9UJLkLZfiGmu8@)*V&{%Cn1##@Q75{4$v>b<|Qj6BmqW1RZvJW^AkkL5o? z*W5Yr1OLvWXYUa#zK7$pEqzr@G;W0doKdXId=Fhz8{3cK5t4gd^q3WJ`$Q1|M;Eqx z>u}2k!M2BxefvGPRIOJl(xf<{;Y5j!7gtFG=p{XY%Rw3~)Zye6;qrH&((3C1H9*Lq zYix>L31WnT5iyx27>U*D6}BYFujG?*>Pz?pO~b4hs~>_&K<>h8^%1d{1;=naF8pA* zOR@JwPs((R7(KuCH2MI0e-9_~tFC~V3$0;Z%zNuY8`#HbI^tj*z@R&BM1r!Z5 z=+rh=^?=oh6hGte6}ndF100UOE2dqVu+bA%UmpI1LlGRiFe>C44m~Xl*ns)doqxpL z3Pg<Yv0UmAV; zGK`9Mdwz`?b8p#DwZ3XY)vZ82T=ZB?Dd1e_sRUdP_$J_Xz*B$+ z0O$NFkto2M2+IMB0B;3s0DKxS3OEM1190Alun%xK;32?vz;VF)0B7UPh3^BF1C9W$ z13V1a3pgVVc|Z@~ZoumS4*{+N90&Xea4z0-_#AL4-~ueU*8|=FcsHO4_&DGWz&(J+ z01pG6_iMBda30`XJfz9P^>z(lF<={DCE#|zdjOvR+zGfJ@Bv(5!~qWi=Hd;CnYbA- zA8;k00r)r|y;<=T;QfH70G|fD6nBeW23!Gn0`LyNsd(e!F~F+ZgMz?T6J0GXF@%J3YhR}M@ zM86w$zn)M5Pn9(K_aTZ^vu zWo15K>!6oqbD`q`C(n&G-2-~|Y3P-pYp0=;+Kjsz>O^mx_1J6h{ROg5q>stZhv9?o z0A@!JoXQVmXTNR7lc?u<)H9zHDjuKR)&rn_7j5ly(n&|Ft>XlE3e(t1cI?96Fm!wX znEf$&N!3Go-m&$RA?Ia6&lS$P9QDwB_OsK}klIdS{(r>ltbCHAFPb0sS>FHu; zJ?K<_p;Ldr)_*tX-Jl~S*!Him>5qfH9`uDydbdsA19}*Ax`Sf1zt*N72E7IJtDJPd zO+N|xZE55^kfFI}9q5lb2SKlM(lb{I29Y-~4{DgNOPusBTiyfu4A4`x?^4JdgAY$Z z|8c}p_B&v9jG3(WGOc>(jlmZ&zkk(Pk0aLZ2mPNw&v(+Vx9xZm^nV1s%1K9G+zA$H z#{tma1f6cKSvIVb@*be#pdSSNgp*Ev`MzDpRLtp*V{KtLc`~a71J!Xg==(vhbkd+9{~MotXWd^wFfd!gHCInCZ{~L!QrP7@LUg` z%bh%qcp3w}0rYuJ`Ucy^0<25cf{r|G`?1}o7lFP3^mJpY0rZWar;Dd<&^Lpg&c}nG ze-HF@Ic*o{KLR~noQ$x%-)Vnli{;}n(0>K_R6eFYENoT{1(}M4{$}Ge+s(K zNniOuR$q3en0ZM@6L=P34LQfj(`@@N3VIRf>Du@Z=vAkY|3}cT1wCCK9Rz&^=wfZ7 z#^V~h{!c((0(vSRcwqBPOvCd*4?FFrIO~u)Xx!5}b`yAhi1X}oF!)tH#P`>BT`kb{ z$24`3ueO3d3i?gXx-!=a1w`KoI<3FwJLx>{6Mcwm1>NJMJLb3}Oc%dJQa0Av_R)`O z&WHRmr##va0SoE(fW8Ryr2Gnci0PG}H-Mf_e+%gA-b^HFo%%CtgaIDlwt~J5bi5g3 z`%tC?!icry6X3~39UBO-Jg?xtW!C1+{&tq0=NZV>^X5O0Q2 zl~?5zRprfJp66MfHyixR^K!*+%4qFQzQoePI#9}8mt$&ly@d0;YXB>6!F!+wGbDzv z;7&YtS;m~jH*4C-%o899HEp?#0E zSVXJV$&*PBOnP9_1Ct(@^uVMCCOt6efk_WcdSKE6lOCA#!2cf}Q2Vo}{aDogE9zR< z4ew)w!qZ+ebf_}zEknmt39OA{__3GPCT{rQUIx?Eelg*85Aj-In{(r@x_LXxRvxqx*1;ZN|wlmzs@IHnQGyGeI&oX?8;hPM9 z#qd*xs^0rs)R~0XfoX|n{z`u^8VY$T3b7fJr@Q1zd6vvMv6O`gQzqS(=KEWPGls;w~BgXPS!7WIRTP%Wp)_!fuS7 zlz5D8aU;>Gh4`mg`#U-*Sz4Yo?vwHPxWVki$nz{1qxK7QQnOI0M?AD}VyWI4TEo|q zaa79E&ec?YaZ<5+m6cbL@w2p4ahs)`r>T9XlKC;}Tbvkqo`q4pH6`BO2hJ&#Wo0`X zOKBoo`yGDko_tUq%h2+*T=pBM6YVmAKVSQMujE(wG88k>FH<>TiTk_2wJ~1heP{kl zRquA8c4EK$55z|%Wmi|6(7OT|*!jxs&z(ZgxfA^V_l$S<*M6aAqMzReejeJj>=GfU z84_q$py7o7o_N59pYJn%J>$Q{`*B{u_y-vu{HjE#yuP3DE0}*i^UHnyrfIv_Vc%i= zhsjMz!8q4LsR-7aQnfJ=+=oDvvLE$Ape2;P)As6X%Pk znZKIbwLr3KFEaja9=Mcu=y;3qbv!N>O0+fuhLJs$9Jgv+Igj!8Fu#h24UGR0+o{G; zH{+LHCKY>F|6eeEy=z?jOzCI)mHy+5caPJlSRj&}|NBy@_$Jn`Gro)KUB>tz@TuaB z-mfD)Nz6zsLBi82>ZI-^+1c z%=o!j`cS<;;&IB|sZ{}=%0C;J|Kvqdfx&w20iN=JJD-1>`R8~fV-xegokstKxUeF7 z+v{wl^t0I44@<3>Ns_^%)U$R78+ zwiNhO``|V*|2W5+^7A8%cjt!}82{soWrfXL?>OV1^+?15#^)j4$eyRz4=QgjVSLav zu2%t1^X%3ZR>0tCzye&g?)E0**RdXV90nMl$>aBjEcO!P-TCA#;8VrV$G}s)?l_?r zOQ>DPT;uC%9s1RQm5`{s%dpJVtKJ624{~6JSUN^Qeeml3fl=&ZF`~l8Ck1~D)cn_aD!3pLcWPX)@W?>#Ad)({E>u7#- zD&X^*nBTq5yp8cqoaalJ|52f5n%2Voo5A>}nBTpwdtLBPtoV1pdnTgA4U$y&Ifw&l zR}1%hG0Uz1K2^Ik+Bdn-*WE34ajq+_=nnXO{yPH^zv&AFv6Xd?5Bn}-2kW|W(+q?H z*q$22vSO3AU5;YAYr+I*KW3D$!L}E>I=A_y9F>a#0nJ=lw`O@&9YlIs&05pExihl1 zF|>BoBKj>tX$kgm#_sQ?ZkQ|9R@LjKQL|PvYyQjXs`}b$(5M2_8xDIno7h-786mdw zw)C%Av7%9LGMlQF*Xh{e8%CFwm}ZyvcE8yb+T=F_?Pf6K@Adh+qhspI-y~32u4Q;M%U{$)lsmW|C zvxROHKdTV?dt23cYlV8KWNM(RyR*7PZ2Y}?6|`;)_F$9k-r~}r|4zB}x6@p`yvy5b zMz9OElqiQ;f#Al{l0dHxhu^%~LSVn{1|MP0th0uzZ}ciNb!AZs_o&u_rLmYByGZ_m zM2SO@Xk{rnj_mZed81y-m#1^fVy9b75|jImQ#Yik#AdLaN?WSCjlrhs4wp;aqT#h` z5YuhgRlO(TN1SnwpT`q@G8#in4*v>5ngumBdQIC z!d>1@JDgPz+HOMUP(KtSn5jG0u3|klhZ+JZCdM8ORCzxojrtY+G%rkc*q&{U%F{!)Yk|y zu@AiLyhY+COf+oNPW_I^)+?<1MnQVL2vmI&wGacorh5%Wz6^pi@rF)!|4gb@X>ks= zR4pNgmY_qZ8mi?q9|&;8k=YgrnH|Wo^r3>&j0XOO0A;PyyP>$KP5#u2dRnv|#)+N( zSEY>*SMoVy9X3CrPv#0o6sO5>Ji5;0N)^0TTA&cM;=S04cb;>syhz?Vvk{BMT!!Qh zt;p*%71Z2R81}aprv2IivX3=%Os_S9*({z@!~{VDs!65J(RoYv`XXy z9?I;4#qEB44x+R~Z;%-u{bEfoDKlXyBG}RUG|pwuv%+vhP9|qmQ~^aB{ZX^2+n%cA z#N-%p*7P!Qs^$9h?rxSkljT;sXxg;DmjTyfwcroSxmM*utO?GP^XycPm{5t&47J{d zsT-ZBf(*l!lO%;g15yNcAUsrLR;vY*;+N@wSXDj|=}~^oVnX^mb2OH7Li6ig4XfxA zA?ats;tC}3U`T#*L^Vinr%DeM=^ zEes`mg(c|oixE~9-Q4ZhsbOS^dqJk6q)je*zWldX@L?0TiTKKi8?~D2p_P*uNR81l zvbA=tGGI;W7T|*~vH@!_vSsg_A^FCLHzdwtrfW+pJ54g zW3ftKtV!ogj10oVDnH1Z3@7i?&_ zn@#oAroOs{KFdRE_A+-PCM@2+U{?L|0Wu*x(ho2zBM1Ffj@Mf;zfEv8eFtcfh=((e z@TA$efjc(65yhD6aDl^D0$*ZsbuNC2vG@_ILuaBAtKW1ngOLFV(OcrXKkg9(ACymw z!!tyoGX!PioPPGQS2$@ROgP6%eaVYh8ENpGdoFE^`tSI{{b?;Gq%GoFLVk{j8lcjM zV?L(OK&5a@U2Bo97ndrU7`3PMRvEH|9I=Z{jxFazJK+;eu2pzzBx0MHB4VwHQ!T1x zLQd!B63qU-4nL9rK5j)*lepe=f7ObvZrp*AKC*Egi<~Au@Z^j1h`RvfRLkX-1-?4>ILmF8>_;QH!Dldjd=7frZlfc#s$ur1Kevymw#A`B=@qtz7Ym?!8HTVi5L z;|FOnGFUM(n2I~oYZlv9Nn8HJHK08l#G*%Bvx@bE^}Vg+Zgx0JCaVHN$TGNG}yt)<>A4*jFnNzhK^GP2vLwNHC$jDLFvQg&?`}EOX zjER?SJ6K^O=0)*!GYWoOF^iizXY#$Q$Ng0_dqx)5*(pARm*!)4>FurP?um2=_v>rm zKiORqKVp|W+_l2U=B}u>6)+l>utS~E4TW&GR)`g=zYrg3yC&-0s1@QKSYd}Z(xDZ$ zZ4N?M!f04hHsMQmxW$-MFhRr6^O69cb$3R!LRu4Pg*YwT7{UoYCx?H9TMNS>aaXF) z-(j|gyP(2S*a(E1zbXa zi#XivUj;0^^QY`r?|&(HHOndc6`z9ZK&N*IRaw0!rr=hVC;Q1xWj~{}jW{9QN?yHB zrl5xnR`n}+)&4tJemN^t@0}^A-lHUWdKBQ6|F6JN6?ol19(cYa4u<^rfRWNxtALYD z_;bp8SWdwsPG+3D)N9*Z^49xqcz#5OvR9Vuvci801ZVx~dAEW$@N;V=uj*IjL6^LG zZ&pD(zE3(-KPV(UkEQxm|Ec$P6bvawhRSY*{wtg~<<qU^t{w5uio2J zu!shi<8asiEND)7^?sj%oeo(#b=Q9YIC_Rhu}z11uS|U(*zIpszk&xLgGa2Eym}9L zcRgvd`&*XmvXVan0>!SXU%dyXzQ3%#w@l-Y4kf4Tx)(TVzmiw)AC9acZtHa{S-`(a zUcph|obp-=vo=V$m5T}}IR!rg!70DBMRIOsc{R@6@~S+J6Vj#Z-`OH5JCr;rcN`wa zIh}rvKVm7Og5yx~{5pa~QR +#include +#include +#include + +unsigned int fetch32(char**); + +class char32 { + unsigned int fetch32(char** cstr) { + unsigned int r(0); + int i(1); + unsigned char compare = (unsigned char)**cstr; + if (compare >> 3 == 0b11110) i = 4; + if (compare >> 4 == 0b1110) i = 3; + if (compare >> 5 == 0b110) i = 2; + for (;i>0;i--) { + r <<= 8; + r += (unsigned char)**cstr; + (*cstr)++; + } + return r; + } +public: + unsigned int c; + char32(unsigned int i) {c = i;} + char32(const char* s) {c = fetch32((char**)&s);} + char32(char** s) {c = fetch32(s);} + bool operator==(char* cs) {return c==fetch32(&cs);} + unsigned int operator>>(int a) const {return c>>a;} +}; + +std::ostream& operator<<(std::ostream& stream, const char32& c32) { + stream << (char)(c32>>24) << (char)(c32>>16) << (char)(c32>>8) << (char)c32.c; + return stream; +} + +struct string32 { + std::vector cs; + string32(char* sd) { + while (sd[0]) + cs.push_back(char32(&sd)); + } + char32 operator[](int i) {return cs[i];} +}; + +std::ostream& operator<<(std::ostream& stream, const string32& s32) { + for (int i=0;i