diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..02edc69 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright © 2024 Yumehaki Technology + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README b/README new file mode 100644 index 0000000..cdfe8c0 --- /dev/null +++ b/README @@ -0,0 +1,31 @@ +Yumehaki Sūchi Format + +[JA]以下の規格が含まれます。 +・YSF8(UTF-8と互換あり) +・YSF16-ABE(U+FFFF以下はUTF-16BEと互換あり) +・YSF16-ALE(U+FFFF以下はUTF-16LEと互換あり) +・YSF16-BBE(U+D7FEとU+D7FF以外はUTF-16BEと互換あり) +・YSF16-BLE(U+D7FEとU+D7FF以外はUTF-16LEと互換あり) +・YSF32BE(UTF-32BEと互換あり) +・YSF32LE(UTF-32LEと互換あり) +・YSF7(修正UTF-7と互換あり) + +[EN]Includes the following standards: +・YSF8 (compatible with UTF-8) +・YSF16-ABE (U+FFFF and below are compatible with UTF-16BE) +・YSF16-ALE (U+FFFF and below are compatible with UTF-16LE) +・YSF16-BBE (other than U+D7FE and U+D7FF are compatible with UTF-16BE) +・YSF16-BLE (other than U+D7FE and U+D7FF are compatible with UTF-16LE) +・YSF32BE (compatible with UTF-32BE) +・YSF32LE (compatible with UTF-32LE) +・YSF7 (compatible with modified UTF-7) + +[EO]Inkluzivas la sekvajn normojn: +・YSF8 (kongrua kun UTF-8) +・YSF16-ABE (U+FFFF kaj sube estas kongruaj kun UTF-16BE) +・YSF16-ALE (U+FFFF kaj sube estas kongruaj kun UTF-16LE) +・YSF16-BBE (krom U+D7FE kaj U+D7FF estas kongruaj kun UTF-16BE) +・YSF16-BLE (krom U+D7FE kaj U+D7FF estas kongruaj kun UTF-16LE) +・YSF32BE (kongrua kun UTF-32BE) +・YSF32LE (kongrua kun UTF-32LE) +・YSF7 (kongrua kun modifita UTF-7) diff --git a/ysf.cpp b/ysf.cpp new file mode 100644 index 0000000..63fcd9d --- /dev/null +++ b/ysf.cpp @@ -0,0 +1,1136 @@ +/*<8> +0〜7=1Bytes +8〜11=2Bytes +12〜16=3Bytes +17〜21=4Bytes +22〜25=5Bytes +26〜31=6Bytes +32〜∞=7Bytes~ + 32〜32=7Bytes + 33〜38=8Bytes + 39〜44=9Bytes + 45〜50=10Bytes + 51〜56=11Bytes + 57〜62=12Bytes + 63〜68=13Bytes + ... to ∞ + +<16A> +0x0000〜0xD7FF=2Bytes +0xD800〜0xDFFF=4Bytes +0xE000〜0xFFFF=2Bytes +0x10000〜0x7FFFFFFF=8Bytes +0x80000000〜0x87FFFFFF=6Bytes +0x1080000000〜0x20007FFFFFF=8Bytes +0x200080000000〜0x4020007FFFFFFF=10Bytes +... to ∞ + +<16B> +0000〜D7FD=2Bytes +D7FE〜D7FF=6Bytes +D800〜DFFF=6Bytes +E000〜FFFF=2Bytes +10000〜10FFFF=4Bytes +110000〜20FFFF=8Bytes +210000〜4020FFFF=10Bytes +40210000〜1004020FFFF=12Bytes +10040210000〜401004020FFFF=14Bytes +... to ∞ + +0000~D7FD :単独 +D7FE :D7FE,D7FF,110000~ +D7FF :不正 +D800~DBFF :10000~10FFFF +DC00~DFFF :不正 +E000~FFFF :単独 + +<32> +0〜31=4Bytes +32〜58=8Bytes +59〜88=12Bytes +89〜118=16Bytes +... to ∞ + +<7> +0x00〜0x25=1Bytes +0x26=2Bytes +0x27〜0x7F=1Bytes +0x80〜∞=3Bytes〜*/ +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace boost::multiprecision; +using namespace boost::random; +typedef vector bytes; +typedef vector codepoint; +typedef number> bigfloat; + +int rnd = time(nullptr); + +/* 乱数関連ここから */ +cpp_int random(int k){ + cpp_int r; + rnd ++; + boost::random::mt19937 gen(rnd); + boost::random::uniform_int_distribution dist(0, cpp_int(1) << k); + r = dist(gen); + return r; +} +/* 乱数関連ここまで */ + +cpp_int keta(cpp_int a){ + cpp_int i = 0; + while (a > 1){ + a /= 2; + i++; + } + return i; +} + +codepoint eight2vec(bytes s){ + int i = 0; + codepoint v; + while (i < s.size()){ + if (s[i] < 0x80){ + //1 + v.push_back(s[i]); + i++; + }else if (s[i] < 0xC2){ + v.push_back(-1); + i++; + }else if (s[i] < 0xE0){ + //2 + v.push_back((s[i]-0xC0)*0x40+s[i+1]-0x80); + i+=2; + }else if (s[i] < 0xF0){ + //3 + v.push_back((s[i]-0xE0)*0x1000+(s[i+1]-0x80)*0x40+s[i+2]-0x80); + i+=3; + }else if (s[i] < 0xF8){ + //4 + v.push_back((s[i]-0xF0)*0x40000+(s[i+1]-0x80)*0x1000+(s[i+2]-0x80)*0x40+s[i+3]-0x80); + i+=4; + }else if (s[i] <= 0xF9){ + //5 + v.push_back((s[i]-0xF8)*0x1000000+(s[i+1]-0x80)*0x40000+(s[i+2]-0x80)*0x1000+(s[i+3]-0x80)*0x40+s[i+4]-0x80); + i+=5; + }else if (s[i] <= 0xFB){ + //6 + v.push_back((s[i]-0xFA)*0x40000000+(s[i+1]-0x80)*0x1000000+(s[i+2]-0x80)*0x40000+(s[i+3]-0x80)*0x1000+(s[i+4]-0x80)*0x40+s[i+5]-0x80); + i+=6; + }else if (s[i] <= 0xFD){ + //7+ + cpp_int w = s[i]-0xFC; + for (int j=1;j=0x80 && s[i+j]<0xC0){ + w *= 0x40; + w += s[i+j]-0x80; + }else if (s[i+j]==0xC0 || s[i+j]==0xC1){ + w *= 0x2; + w += s[i+j]-0xC0; + i += j+1; + break; + }else{ + //Err + v.push_back(-1); + i += j+1; + break; + } + } + // w += 0x80000000; + v.push_back(w); + }else{ + v.push_back(-1); + i++; + break; + } + } + return v; +} + +bytes vec2eight(codepoint v){ + bytes b; + cpp_int k = 0; + for (int i=0;i= j && k < j+6){ + if (k == j+5){ + b.push_back((unsigned char)(0xFC + (p/(cpp_int(1) << int(k))))); + p = p % (cpp_int(1) << int(k)); + }else{ + b.push_back((unsigned char)(0xFC)); + } + bytes b2; + while (p >= 1){ + b2.insert(b2.begin(),(unsigned char)(p%0x40+0x80)); + p /= 0x40; + } + b2.push_back((unsigned char)(v[i]%2+0xC0)); + b.insert(b.end(), b2.begin(), b2.end());//移し + break; + } + j += 6; + } + } + } + return b; +} + +codepoint sixteenABE2vec(bytes s){ + int i = 0; + codepoint v; + cpp_int k; + cpp_int l; + while (i < s.size()){ + k = s[i]*0x100+s[i+1]; + if (k <= 0xD7FF || k>= 0xE000){ + v.push_back(k); + i += 2; + }else if (k >= 0xD800 && k <= 0xD87F){ + l = (k-0xD800)*0x10; + k = s[i+2]*0x100 + s[i+3]; + if (k >= 0xDFF0 && k <= 0xDFFF){ + l += (k - 0xDFF0) + 0xD800; + v.push_back(l); + i += 4; + }else{ + //Err + v.push_back(-1); + i += 4; + } + }else if (k >= 0xD880 && k <= 0xD8FF){ + l = (k-0xD880)*0x1000000; + k = s[i+2]*0x100+s[i+3]; + if (k < 0xD900 || k > 0xD9FF){ + //Err + v.push_back(-1); + i += 8; + continue; + } + l += (k-0xD900)*0x10000; + k = s[i+4]*0x100+s[i+5]; + if (k < 0xDA00 || k > 0xDAFF){ + //Err + v.push_back(-1); + i += 8; + continue; + } + l += (k-0xDA00)*0x100; + k = s[i+6]*0x100+s[i+7]; + if (k < 0xDB00 || k > 0xDBFF){ + //Err + v.push_back(-1); + i += 8; + continue; + } + l += (k-0xDB00); + l += 0x10000; + v.push_back(l); + i += 8; + }else if (k >= 0xDC00 && k <= 0xDDFF){ + l = k-0xDC00; + for (int j=2;i+j= 0xD900 && k <= 0xDAFF){ + l *= 0x200; + l += k-0xD900; + }else if (k >= 0xDE00 && k <= 0xDFFF){ + l *= 0x200; + l += k-0xDE00; + l += 0x80000000; + v.push_back(l); + i += j+2; + break; + }else{ + v.push_back(-1); + i += j+2; + break; + } + } + }else{ + v.push_back(-1); + i+=2; + } + } + return v; +} + +bytes vec2sixteenABE(codepoint v){ + bytes b; + cpp_int k = 0; + for (int i=0;i= 0xE000 && v[i] <= 0xFFFF)){ + b.push_back((unsigned char)(v[i]/0x100)); + b.push_back((unsigned char)(v[i]%0x100)); + }else if (v[i] >= 0xD800 && v[i] <= 0xDFFF){ + b.push_back((unsigned char)(0xD8)); + b.push_back((unsigned char)((v[i]-0xD800)/0x10 % 0x80)); + b.push_back((unsigned char)(0xDF)); + b.push_back((unsigned char)((v[i]-0xD800)%0x10 + 0xF0)); + }else if (v[i] >= 0x10000 && v[i] <= 0x7FFFFFFF){ + b.push_back((unsigned char)(0xD8)); + b.push_back((unsigned char)((v[i]-0x10000)/0x1000000%0x100 + 0x80)); + b.push_back((unsigned char)(0xD9)); + b.push_back((unsigned char)((v[i]-0x10000)/0x10000%0x100)); + b.push_back((unsigned char)(0xDA)); + b.push_back((unsigned char)((v[i]-0x10000)/0x100%0x100)); + b.push_back((unsigned char)(0xDB)); + b.push_back((unsigned char)((v[i]-0x10000)%0x100)); + }else{ + k = keta(v[i] - 0x80000000); + cpp_int j = 0; + cpp_int p = 0; + while (true){ + if (k >= j && k < j+9){ + p = v[i] - 0x80000000; + bytes b2; + b2.insert(b2.begin(),(unsigned char)(p%0x200/0x100+0xDE)); + b2.insert(b2.begin()+1,(unsigned char)(p%0x100)); + p /= 0x200; + do{ + b2.insert(b2.begin(),(unsigned char)(p%0x200/0x100+0xD9)); + b2.insert(b2.begin()+1,(unsigned char)(p%0x100)); + p /= 0x200; + }while (p >= 1); + b2[0] += 0x03; + b.insert(b.end(), b2.begin(), b2.end());//移し + break; + } + j += 9; + } + } + } + return b; +} + +codepoint sixteenALE2vec(bytes s){ + int i = 0; + codepoint v; + cpp_int k; + cpp_int l; + while (i < s.size()){ + k = s[i+1]*0x100+s[i]; + if (k <= 0xD7FF || k>= 0xE000){ + v.push_back(k); + i += 2; + }else if (k >= 0xD800 && k <= 0xD87F){ + l = (k-0xD800)*0x10; + k = s[i+3]*0x100 + s[i+2]; + if (k >= 0xDFF0 && k <= 0xDFFF){ + l += (k - 0xDFF0) + 0xD800; + v.push_back(l); + i += 4; + }else{ + //Err + v.push_back(-1); + i += 4; + } + }else if (k >= 0xD880 && k <= 0xD8FF){ + l = (k-0xD880)*0x1000000; + k = s[i+3]*0x100+s[i+2]; + if (k < 0xD900 || k > 0xD9FF){ + //Err + v.push_back(-1); + i += 8; + continue; + } + l += (k-0xD900)*0x10000; + k = s[i+5]*0x100+s[i+4]; + if (k < 0xDA00 || k > 0xDAFF){ + //Err + v.push_back(-1); + i += 8; + continue; + } + l += (k-0xDA00)*0x100; + k = s[i+7]*0x100+s[i+6]; + if (k < 0xDB00 || k > 0xDBFF){ + //Err + v.push_back(-1); + i += 8; + continue; + } + l += (k-0xDB00); + l += 0x10000; + v.push_back(l); + i += 8; + }else if (k >= 0xDC00 && k <= 0xDDFF){ + l = k-0xDC00; + for (int j=2;i+j= 0xD900 && k <= 0xDAFF){ + l *= 0x200; + l += k-0xD900; + }else if (k >= 0xDE00 && k <= 0xDFFF){ + l *= 0x200; + l += k-0xDE00; + l += 0x80000000; + v.push_back(l); + i += j+2; + break; + }else{ + v.push_back(-1); + i += j+2; + break; + } + } + }else{ + v.push_back(-1); + i+=2; + } + } + return v; +} + +bytes vec2sixteenALE(codepoint v){ + bytes b; + cpp_int k = 0; + for (int i=0;i= 0xE000 && v[i] <= 0xFFFF)){ + b.push_back((unsigned char)(v[i]%0x100)); + b.push_back((unsigned char)(v[i]/0x100)); + }else if (v[i] >= 0xD800 && v[i] <= 0xDFFF){ + b.push_back((unsigned char)((v[i]-0xD800)/0x10 % 0x80)); + b.push_back((unsigned char)(0xD8)); + b.push_back((unsigned char)((v[i]-0xD800)%0x10 + 0xF0)); + b.push_back((unsigned char)(0xDF)); + }else if (v[i] >= 0x10000 && v[i] <= 0x7FFFFFFF){ + b.push_back((unsigned char)((v[i]-0x10000)/0x1000000%0x100 + 0x80)); + b.push_back((unsigned char)(0xD8)); + b.push_back((unsigned char)((v[i]-0x10000)/0x10000%0x100)); + b.push_back((unsigned char)(0xD9)); + b.push_back((unsigned char)((v[i]-0x10000)/0x100%0x100)); + b.push_back((unsigned char)(0xDA)); + b.push_back((unsigned char)((v[i]-0x10000)%0x100)); + b.push_back((unsigned char)(0xDB)); + }else{ + k = keta(v[i] - 0x80000000); + cpp_int j = 0; + cpp_int p = 0; + while (true){ + if (k >= j && k < j+9){ + p = v[i] - 0x80000000; + bytes b2; + b2.insert(b2.begin(),(unsigned char)(p%0x100)); + b2.insert(b2.begin()+1,(unsigned char)(p%0x200/0x100+0xDE)); + p /= 0x200; + do{ + b2.insert(b2.begin(),(unsigned char)(p%0x100)); + b2.insert(b2.begin()+1,(unsigned char)(p%0x200/0x100+0xD9)); + p /= 0x200; + }while (p >= 1); + b2[0] += 0x03; + b.insert(b.end(), b2.begin(), b2.end());//移し + break; + } + j += 9; + } + } + } + return b; +} + +codepoint sixteenA2vec(bytes s){ + return sixteenABE2vec(s); +} +bytes vec2sixteenA(codepoint v){ + return vec2sixteenABE(v); +} + +codepoint sixteenBBE2vec(bytes s){ + int i = 0; + codepoint v; + cpp_int k; + cpp_int l; + while (i < s.size()){ + k = s[i]*0x100+s[i+1]; + if (k <= 0xD7FD || k>= 0xE000){ + v.push_back(k); + i += 2; + }else if (k >= 0xD800 && k <= 0xDBFF){ + l = (k-0xD800)*0x400; + k = s[i+2]*0x100+s[i+3]; + if (k < 0xDC00 || k > 0xDFFF){ + //Err + v.push_back(-1); + i += 4; + continue; + } + l += k-0xDC00; + l += 0x10000; + v.push_back(l); + i += 4; + }else if (k == 0xD7FE){ + l = 0; + for (int j=2;j= 0xDC00 && k <= 0xDFFF){ + if (i+j < s.size()-2 && j==2){ + if (s[i+j+2]*0x100+s[i+j+3] == 0xD7FF){ + v.push_back(k); + i += 6; + break; + } + } + l *= 0x400; + l += k-0xDC00; + }else if (k >= 0xD800 && k <= 0xDBFF){ + if (i+j < s.size()-2 && j==2){ + if (s[i+j+2]*0x100+s[i+j+3] == 0xD7FF){ + v.push_back(k); + i += 6; + break; + } + } + l *= 0x400; + l += k-0xD800; + k = s[i+j+2]*0x100+s[i+j+3]; + if (k == 0xD7FF){ + l += 0x110000; + v.push_back(l); + i += j+4; + break; + } + }else if (k == 0xD7FF){ + v.push_back(l); + i += j; + break; + }else if (l==0){ + v.push_back(k); + i += 2; + break; + }else{ + v.push_back(-1); + i += j; + break; + } + } + }else{ + v.push_back(-1); + i+=2; + } + } + return v; +} + +bytes vec2sixteenBBE(codepoint v){ + bytes b; + cpp_int k = 0; + for (int i=0;i= 0xE000 && v[i] <= 0xFFFF)){ + b.push_back((unsigned char)(v[i]/0x100)); + b.push_back((unsigned char)(v[i]%0x100)); + }else if (v[i] == 0xD7FE){ + b.push_back((unsigned char)(0xD7)); + b.push_back((unsigned char)(0xFE)); + b.push_back((unsigned char)(0xD7)); + b.push_back((unsigned char)(0xFE)); + b.push_back((unsigned char)(0xD7)); + b.push_back((unsigned char)(0xFF)); + }else if (v[i] == 0xD7FF){ + b.push_back((unsigned char)(0xD7)); + b.push_back((unsigned char)(0xFE)); + b.push_back((unsigned char)(0xD7)); + b.push_back((unsigned char)(0xFF)); + b.push_back((unsigned char)(0xD7)); + b.push_back((unsigned char)(0xFF)); + }else if (v[i] >= 0xD800 && v[i] <= 0xDFFF){ + b.push_back((unsigned char)(0xD7)); + b.push_back((unsigned char)(0xFE)); + b.push_back((unsigned char)(v[i]/0x100)); + b.push_back((unsigned char)(v[i]%0x100)); + b.push_back((unsigned char)(0xD7)); + b.push_back((unsigned char)(0xFF)); + }else if (v[i] >= 0x10000 && v[i] <= 0x10FFFF){ + b.push_back((unsigned char)((v[i]-0x10000)/0x400/0x100 + 0xD8)); + b.push_back((unsigned char)((v[i]-0x10000)/0x400%0x100)); + b.push_back((unsigned char)((v[i]-0x10000)%0x400/0x100 + 0xDC)); + b.push_back((unsigned char)((v[i]-0x10000)%0x400%0x100)); + }else if (v[i] == 0x110000){ + b.push_back((unsigned char)(0xD7)); + b.push_back((unsigned char)(0xFE)); + b.push_back((unsigned char)(0xDC)); + b.push_back((unsigned char)(0x00)); + b.push_back((unsigned char)(0xD8)); + b.push_back((unsigned char)(0x00)); + b.push_back((unsigned char)(0xD7)); + b.push_back((unsigned char)(0xFF)); + }else{ + k = keta(v[i] - 0x110000); + cpp_int j = 0; + cpp_int p = 0; + while (true){ + if (k >= j && k < j+10){ + p = v[i] - 0x110000; + b.push_back(0xD7); + b.push_back(0xFE); + bytes b2; + b2.insert(b2.begin(),(unsigned char)(p%0x400/0x100+0xD8)); + b2.insert(b2.begin()+1,(unsigned char)(p%0x100)); + p /= 0x400; + do{ + b2.insert(b2.begin(),(unsigned char)(p%0x400/0x100+0xDC)); + b2.insert(b2.begin()+1,(unsigned char)(p%0x100)); + p /= 0x400; + }while (p >= 1); + b.insert(b.end(), b2.begin(), b2.end());//移し + b.push_back(0xD7); + b.push_back(0xFF); + break; + } + j += 10; + } + } + } + return b; +} + +codepoint sixteenBLE2vec(bytes s){ + int i = 0; + codepoint v; + cpp_int k; + cpp_int l; + while (i < s.size()){ + k = s[i+1]*0x100+s[i]; + if (k <= 0xD7FD || k>= 0xE000){ + v.push_back(k); + i += 2; + }else if (k >= 0xD800 && k <= 0xDBFF){ + l = (k-0xD800)*0x400; + k = s[i+3]*0x100+s[i+2]; + if (k < 0xDC00 || k > 0xDFFF){ + //Err + v.push_back(-1); + i += 4; + continue; + } + l += k-0xDC00; + l += 0x10000; + v.push_back(l); + i += 4; + }else if (k == 0xD7FE){ + l = 0; + for (int j=2;j= 0xDC00 && k <= 0xDFFF){ + if (i+j < s.size()-2 && j==2){ + if (s[i+j+3]*0x100+s[i+j+2] == 0xD7FF){ + v.push_back(k); + i += 6; + break; + } + } + l *= 0x400; + l += k-0xDC00; + }else if (k >= 0xD800 && k <= 0xDBFF){ + if (i+j < s.size()-2 && j==2){ + if (s[i+j+3]*0x100+s[i+j+2] == 0xD7FF){ + v.push_back(k); + i += 6; + break; + } + } + l *= 0x400; + l += k-0xD800; + k = s[i+j+3]*0x100+s[i+j+2]; + if (k == 0xD7FF){ + l += 0x110000; + v.push_back(l); + i += j+4; + break; + } + }else if (k == 0xD7FF){ + v.push_back(l); + i += j; + break; + }else if (l==0){ + v.push_back(k); + i += 2; + break; + }else{ + v.push_back(-1); + i += j; + break; + } + } + }else{ + v.push_back(-1); + i+=2; + } + } + return v; +} + +bytes vec2sixteenBLE(codepoint v){ + bytes b; + cpp_int k = 0; + for (int i=0;i= 0xE000 && v[i] <= 0xFFFF)){ + b.push_back((unsigned char)(v[i]%0x100)); + b.push_back((unsigned char)(v[i]/0x100)); + }else if (v[i] == 0xD7FE){ + b.push_back((unsigned char)(0xFE)); + b.push_back((unsigned char)(0xD7)); + b.push_back((unsigned char)(0xFE)); + b.push_back((unsigned char)(0xD7)); + b.push_back((unsigned char)(0xFF)); + b.push_back((unsigned char)(0xD7)); + }else if (v[i] == 0xD7FF){ + b.push_back((unsigned char)(0xFE)); + b.push_back((unsigned char)(0xD7)); + b.push_back((unsigned char)(0xFF)); + b.push_back((unsigned char)(0xD7)); + b.push_back((unsigned char)(0xFF)); + b.push_back((unsigned char)(0xD7)); + }else if (v[i] >= 0xD800 && v[i] <= 0xDFFF){ + b.push_back((unsigned char)(0xFE)); + b.push_back((unsigned char)(0xD7)); + b.push_back((unsigned char)(v[i]%0x100)); + b.push_back((unsigned char)(v[i]/0x100)); + b.push_back((unsigned char)(0xFF)); + b.push_back((unsigned char)(0xD7)); + }else if (v[i] >= 0x10000 && v[i] <= 0x10FFFF){ + b.push_back((unsigned char)((v[i]-0x10000)/0x400%0x100)); + b.push_back((unsigned char)((v[i]-0x10000)/0x400/0x100 + 0xD8)); + b.push_back((unsigned char)((v[i]-0x10000)%0x400%0x100)); + b.push_back((unsigned char)((v[i]-0x10000)%0x400/0x100 + 0xDC)); + }else if (v[i] == 0x110000){ + b.push_back((unsigned char)(0xFE)); + b.push_back((unsigned char)(0xD7)); + b.push_back((unsigned char)(0x00)); + b.push_back((unsigned char)(0xDC)); + b.push_back((unsigned char)(0x00)); + b.push_back((unsigned char)(0xD8)); + b.push_back((unsigned char)(0xFF)); + b.push_back((unsigned char)(0xD7)); + }else{ + k = keta(v[i] - 0x110000); + cpp_int j = 0; + cpp_int p = 0; + while (true){ + if (k >= j && k < j+10){ + p = v[i] - 0x110000; + b.push_back(0xFE); + b.push_back(0xD7); + bytes b2; + b2.insert(b2.begin(),(unsigned char)(p%0x100)); + b2.insert(b2.begin()+1,(unsigned char)(p%0x400/0x100+0xD8)); + p /= 0x400; + do{ + b2.insert(b2.begin(),(unsigned char)(p%0x100)); + b2.insert(b2.begin()+1,(unsigned char)(p%0x400/0x100+0xDC)); + p /= 0x400; + }while (p >= 1); + b.insert(b.end(), b2.begin(), b2.end());//移し + b.push_back(0xFF); + b.push_back(0xD7); + break; + } + j += 10; + } + } + } + return b; +} + +codepoint sixteenB2vec(bytes s){ + return sixteenBBE2vec(s); +} +bytes vec2sixteenB(codepoint v){ + return vec2sixteenBBE(v); +} + +codepoint thirtytwoBE2vec(bytes s){ + int i = 0; + codepoint v; + cpp_int k; + cpp_int l; + while (i < s.size()){ + k = (unsigned int)(s[i])*0x1000000+(unsigned int)(s[i+1]*0x10000+s[i+2]*0x100+s[i+3]); + if (k < 0x80000000){ + v.push_back(k); + i+=4; + }else if (k <= 0x9FFFFFFF){ + l = k - 0x80000000; + for (int j=4;i+j < s.size();j+=4){ + k = (unsigned int)(s[i+j]*0x1000000)+(unsigned int)(s[i+j+1]*0x10000+s[i+j+2]*0x100+s[i+j+3]); + if (k >= 0xC0000000){ + l *= 1<<30; + l += k - 0xC0000000; + }else if (k >= 0xA0000000 && k <= 0xBFFFFFFF){ + l *= 1<<29; + l += k - 0xA0000000; + l += 0x80000000; + v.push_back(l); + i += j+4; + break; + }else{ + v.push_back(-1); + i += 4; + break; + } + } + }else{ + v.push_back(-1); + i += 4; + break; + } + } + return v; +} + +bytes vec2thirtytwoBE(codepoint v){ + bytes b; + cpp_int k = 0; + for (int i=0;i= j && k < j+30){ + p = (v[i] - 0x80000000)*2; + bytes b2; + b2.insert(b2.begin(),(unsigned char)((p%0x40000000/2)/0x1000000 + 0xA0)); + b2.insert(b2.begin()+1,(unsigned char)((p%0x40000000/2)/0x10000%0x100)); + b2.insert(b2.begin()+2,(unsigned char)((p%0x40000000/2)/0x100%0x100)); + b2.insert(b2.begin()+3,(unsigned char)((p%0x40000000/2)%0x100)); + p /= 0x40000000; + while (p >= 0x20000000){ + b2.insert(b2.begin(),(unsigned char)((p%0x40000000)/0x1000000 + 0xC0)); + b2.insert(b2.begin()+1,(unsigned char)((p%0x40000000)/0x10000%0x100)); + b2.insert(b2.begin()+2,(unsigned char)((p%0x40000000)/0x100%0x100)); + b2.insert(b2.begin()+3,(unsigned char)((p%0x40000000)%0x100)); + p /= 0x40000000; + } + b2.insert(b2.begin(),(unsigned char)(p/0x1000000 + 0x80)); + b2.insert(b2.begin()+1,(unsigned char)(p/0x10000%0x100)); + b2.insert(b2.begin()+2,(unsigned char)(p/0x100%0x100)); + b2.insert(b2.begin()+3,(unsigned char)(p%0x100)); + b.insert(b.end(), b2.begin(), b2.end());//移し + break; + } + j += 30; + } + } + } + } + return b; +} + +codepoint thirtytwoLE2vec(bytes s){ + int i = 0; + codepoint v; + cpp_int k; + cpp_int l; + while (i < s.size()){ + k = (unsigned int)(s[i+3])*0x1000000+(unsigned int)(s[i+2]*0x10000+s[i+1]*0x100+s[i]); + if (k < 0x80000000){ + v.push_back(k); + i+=4; + }else if (k <= 0x9FFFFFFF){ + l = k - 0x80000000; + for (int j=4;i+j < s.size();j+=4){ + k = (unsigned int)(s[i+j+3]*0x1000000)+(unsigned int)(s[i+j+2]*0x10000+s[i+j+1]*0x100+s[i+j]); + if (k >= 0xC0000000){ + l *= 1<<30; + l += k - 0xC0000000; + }else if (k >= 0xA0000000 && k <= 0xBFFFFFFF){ + l *= 1<<29; + l += k - 0xA0000000; + l += 0x80000000; + v.push_back(l); + i += j+4; + break; + }else{ + v.push_back(-1); + i += 4; + break; + } + } + }else{ + v.push_back(-1); + i += 4; + break; + } + } + return v; +} + +bytes vec2thirtytwoLE(codepoint v){ + bytes b; + cpp_int k = 0; + for (int i=0;i= j && k < j+30){ + p = (v[i] - 0x80000000)*2; + bytes b2; + b2.insert(b2.begin(),(unsigned char)((p%0x40000000/2)%0x100)); + b2.insert(b2.begin()+1,(unsigned char)((p%0x40000000/2)/0x100%0x100)); + b2.insert(b2.begin()+2,(unsigned char)((p%0x40000000/2)/0x10000%0x100)); + b2.insert(b2.begin()+3,(unsigned char)((p%0x40000000/2)/0x1000000 + 0xA0)); + p /= 0x40000000; + while (p >= 0x20000000){ + b2.insert(b2.begin(),(unsigned char)((p%0x40000000)%0x100)); + b2.insert(b2.begin()+1,(unsigned char)((p%0x40000000)/0x100%0x100)); + b2.insert(b2.begin()+2,(unsigned char)((p%0x40000000)/0x10000%0x100)); + b2.insert(b2.begin()+3,(unsigned char)((p%0x40000000)/0x1000000 + 0xC0)); + p /= 0x40000000; + } + b2.insert(b2.begin(),(unsigned char)(p%0x100)); + b2.insert(b2.begin()+1,(unsigned char)(p/0x100%0x100)); + b2.insert(b2.begin()+2,(unsigned char)(p/0x10000%0x100)); + b2.insert(b2.begin()+3,(unsigned char)(p/0x1000000 + 0x80)); + b.insert(b.end(), b2.begin(), b2.end());//移し + break; + } + j += 30; + } + } + } + } + return b; +} + +codepoint thirtytwo2vec(bytes s){ + return thirtytwoBE2vec(s); +} +bytes vec2thirtytwo(codepoint v){ + return vec2thirtytwoBE(v); +} +codepoint seven2vec(bytes s){ + int i = 0; + cpp_int l = 0; + codepoint v; + codepoint chk = {0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x2B,0x2F}; + while (i < s.size()){ + if (s[i]==0x26){ + l = 0; + for (int j=1;i+j=0x80){ + v.push_back(-1); + i++; + }else{ + v.push_back(s[i]); + } + i++; + } + return v; +} + +bytes vec2seven(codepoint v){ + bytes b; + codepoint chk = {0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x2B,0x2F}; + for (int i=0;i= 0x80){ + b.push_back(0x26); + bytes b2; + cpp_int p = v[i]; + while (p >= 1){ + b2.insert(b2.begin(),(unsigned char)(chk[(int)(p%0x40)])); + p /= 0x40; + } + b.insert(b.end(), b2.begin(), b2.end()); + b.push_back(0x2D); + } + } + return b; +} + +void vecprint(codepoint c){ + for (int i=0;i