Split text_encoding off into its own repo.

It's more generally useful, and I was working with it as its own
crate anyway.
This commit is contained in:
Nathan Vegdahl 2018-08-23 13:33:14 -07:00
parent 30a1440399
commit a843aee904
41 changed files with 0 additions and 6816 deletions

189
Cargo.lock generated
View File

@ -9,7 +9,6 @@ dependencies = [
"serde_derive 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)",
"smallvec 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", "smallvec 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)",
"termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)", "termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
"text_encoding 0.1.0",
"unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
@ -30,37 +29,6 @@ dependencies = [
"unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "bit-set"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bit-vec 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "bit-vec"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "bitflags"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byteorder"
version = "1.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cloudabi"
version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bitflags 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "docopt" name = "docopt"
version = "0.8.3" version = "0.8.3"
@ -73,25 +41,6 @@ dependencies = [
"strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", "strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "fnv"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "fuchsia-zircon"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bitflags 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "fuchsia-zircon-sys"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "lazy_static" name = "lazy_static"
version = "1.0.2" version = "1.0.2"
@ -110,11 +59,6 @@ dependencies = [
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "num-traits"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "0.4.9" version = "0.4.9"
@ -123,28 +67,6 @@ dependencies = [
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "proptest"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bit-set 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"bitflags 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"byteorder 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)",
"quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rusty-fork 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"tempfile 3.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "quick-error"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "quote" name = "quote"
version = "0.6.4" version = "0.6.4"
@ -153,23 +75,6 @@ dependencies = [
"proc-macro2 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)", "proc-macro2 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "rand"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_core"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.1.40" version = "0.1.40"
@ -203,22 +108,6 @@ dependencies = [
"ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "regex-syntax"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "remove_dir_all"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "ropey" name = "ropey"
version = "0.8.4" version = "0.8.4"
@ -227,17 +116,6 @@ dependencies = [
"smallvec 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", "smallvec 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "rusty-fork"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"tempfile 3.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"wait-timeout 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.70" version = "1.0.70"
@ -276,18 +154,6 @@ dependencies = [
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "tempfile"
version = "3.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
"redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
"remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "termion" name = "termion"
version = "1.5.1" version = "1.5.1"
@ -298,13 +164,6 @@ dependencies = [
"redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "text_encoding"
version = "0.1.0"
dependencies = [
"proptest 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "thread_local" name = "thread_local"
version = "0.3.5" version = "0.3.5"
@ -352,68 +211,24 @@ name = "void"
version = "1.0.2" version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "wait-timeout"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "winapi"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata] [metadata]
"checksum aho-corasick 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c1c6d463cbe7ed28720b5b489e7c083eeb8f90d08be2a0d6bb9e1ffea9ce1afa" "checksum aho-corasick 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c1c6d463cbe7ed28720b5b489e7c083eeb8f90d08be2a0d6bb9e1ffea9ce1afa"
"checksum bit-set 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6f1efcc46c18245a69c38fcc5cc650f16d3a59d034f3106e9ed63748f695730a"
"checksum bit-vec 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4440d5cb623bb7390ae27fec0bb6c61111969860f8e3ae198bfa0663645e67cf"
"checksum bitflags 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d0c54bb8f454c567f21197eefcdbf5679d0bd99f2ddbe52e84c77061952e6789"
"checksum byteorder 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8389c509ec62b9fe8eca58c502a0acaf017737355615243496cde4994f8fa4f9"
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
"checksum docopt 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d8acd393692c503b168471874953a2531df0e9ab77d0b6bbc582395743300a4a" "checksum docopt 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d8acd393692c503b168471874953a2531df0e9ab77d0b6bbc582395743300a4a"
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
"checksum lazy_static 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "fb497c35d362b6a331cfd94956a07fc2c78a4604cdbee844a81170386b996dd3" "checksum lazy_static 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "fb497c35d362b6a331cfd94956a07fc2c78a4604cdbee844a81170386b996dd3"
"checksum libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)" = "b685088df2b950fccadf07a7187c8ef846a959c142338a48f9dc0b94517eb5f1" "checksum libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)" = "b685088df2b950fccadf07a7187c8ef846a959c142338a48f9dc0b94517eb5f1"
"checksum memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "796fba70e76612589ed2ce7f45282f5af869e0fdd7cc6199fa1aa1f1d591ba9d" "checksum memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "796fba70e76612589ed2ce7f45282f5af869e0fdd7cc6199fa1aa1f1d591ba9d"
"checksum num-traits 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "630de1ef5cc79d0cdd78b7e33b81f083cbfe90de0f4b2b2f07f905867c70e9fe"
"checksum proc-macro2 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)" = "cccdc7557a98fe98453030f077df7f3a042052fae465bb61d2c2c41435cfd9b6" "checksum proc-macro2 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)" = "cccdc7557a98fe98453030f077df7f3a042052fae465bb61d2c2c41435cfd9b6"
"checksum proptest 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)" = "56f423fe98260316065f96eda6fcb2b892d08114a77ad753e4a257c5303ce0fc"
"checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0"
"checksum quote 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "b71f9f575d55555aa9c06188be9d4e2bfc83ed02537948ac0d520c24d0419f1a" "checksum quote 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "b71f9f575d55555aa9c06188be9d4e2bfc83ed02537948ac0d520c24d0419f1a"
"checksum rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e464cd887e869cddcae8792a4ee31d23c7edd516700695608f5b98c67ee0131c"
"checksum rand_core 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "edecf0f94da5551fc9b492093e30b041a891657db7940ee221f9d2f66e82eef2"
"checksum redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "c214e91d3ecf43e9a4e41e578973adeb14b474f2bee858742d127af75a0112b1" "checksum redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "c214e91d3ecf43e9a4e41e578973adeb14b474f2bee858742d127af75a0112b1"
"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" "checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76"
"checksum regex 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9329abc99e39129fcceabd24cf5d85b4671ef7c29c50e972bc5afe32438ec384" "checksum regex 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9329abc99e39129fcceabd24cf5d85b4671ef7c29c50e972bc5afe32438ec384"
"checksum regex-syntax 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7d707a4fa2637f2dca2ef9fd02225ec7661fe01a53623c1e6515b6916511f7a7" "checksum regex-syntax 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7d707a4fa2637f2dca2ef9fd02225ec7661fe01a53623c1e6515b6916511f7a7"
"checksum regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "747ba3b235651f6e2f67dfa8bcdcd073ddb7c243cb21c442fc12395dfcac212d"
"checksum remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5"
"checksum ropey 0.8.4 (git+https://github.com/cessen/ropey)" = "<none>" "checksum ropey 0.8.4 (git+https://github.com/cessen/ropey)" = "<none>"
"checksum rusty-fork 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ea98d8d2644fd8b4946a2be90e8c6dc52b652e03079c46e134d9815062b9082d"
"checksum serde 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)" = "0c3adf19c07af6d186d91dae8927b83b0553d07ca56cbf7f2f32560455c91920" "checksum serde 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)" = "0c3adf19c07af6d186d91dae8927b83b0553d07ca56cbf7f2f32560455c91920"
"checksum serde_derive 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)" = "3525a779832b08693031b8ecfb0de81cd71cfd3812088fafe9a7496789572124" "checksum serde_derive 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)" = "3525a779832b08693031b8ecfb0de81cd71cfd3812088fafe9a7496789572124"
"checksum smallvec 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "26df3bb03ca5eac2e64192b723d51f56c1b1e0860e7c766281f4598f181acdc8" "checksum smallvec 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "26df3bb03ca5eac2e64192b723d51f56c1b1e0860e7c766281f4598f181acdc8"
"checksum strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d15c810519a91cf877e7e36e63fe068815c678181439f2f29e2562147c3694" "checksum strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d15c810519a91cf877e7e36e63fe068815c678181439f2f29e2562147c3694"
"checksum syn 0.14.5 (registry+https://github.com/rust-lang/crates.io-index)" = "4bad7abdf6633f07c7046b90484f1d9dc055eca39f8c991177b1046ce61dba9a" "checksum syn 0.14.5 (registry+https://github.com/rust-lang/crates.io-index)" = "4bad7abdf6633f07c7046b90484f1d9dc055eca39f8c991177b1046ce61dba9a"
"checksum tempfile 3.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c4b103c6d08d323b92ff42c8ce62abcd83ca8efa7fd5bf7927efefec75f58c76"
"checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" "checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096"
"checksum thread_local 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279ef31c19ededf577bfd12dfae728040a21f635b06a24cd670ff510edd38963" "checksum thread_local 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279ef31c19ededf577bfd12dfae728040a21f635b06a24cd670ff510edd38963"
"checksum ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fd2be2d6639d0f8fe6cdda291ad456e23629558d466e2789d2c3e9892bda285d" "checksum ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fd2be2d6639d0f8fe6cdda291ad456e23629558d466e2789d2c3e9892bda285d"
@ -423,7 +238,3 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" "checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
"checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122" "checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122"
"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" "checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
"checksum wait-timeout 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "b9f3bf741a801531993db6478b95682117471f76916f5e690dd8d45395b09349"
"checksum winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "773ef9dcc5f24b7d850d0ff101e542ff24c3b090a9768e03ff889fdef41f00fd"
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View File

@ -1,7 +1,6 @@
[workspace] [workspace]
members = [ members = [
"sub_crates/backend", "sub_crates/backend",
"sub_crates/text_encoding",
] ]
[package] [package]
@ -28,6 +27,3 @@ termion = "1.5"
# Local crate dependencies # Local crate dependencies
[dependencies.backend] [dependencies.backend]
path = "sub_crates/backend" path = "sub_crates/backend"
[dependencies.text_encoding]
path = "sub_crates/text_encoding"

View File

@ -1,13 +0,0 @@
[package]
name = "text_encoding"
version = "0.1.0"
authors = ["Nathan Vegdahl <cessen@cessen.com>"]
license = "MIT"
build = "build.rs"
[lib]
name = "text_encoding"
path = "src/lib.rs"
[dev-dependencies]
proptest = "0.8"

View File

@ -1,221 +0,0 @@
use std::env;
use std::fs::File;
use std::io::{BufRead, Read, Write};
use std::path::Path;
fn main() {
let out_dir = env::var("OUT_DIR").unwrap();
// Generate all of the single byte encoding tables and wrapper code.
{
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-ibm866.txt").unwrap(),
File::create(&Path::new(&out_dir).join("ibm866.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-iso-8859-2.txt").unwrap(),
File::create(&Path::new(&out_dir).join("iso-8859-2.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-iso-8859-3.txt").unwrap(),
File::create(&Path::new(&out_dir).join("iso-8859-3.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-iso-8859-4.txt").unwrap(),
File::create(&Path::new(&out_dir).join("iso-8859-4.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-iso-8859-5.txt").unwrap(),
File::create(&Path::new(&out_dir).join("iso-8859-5.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-iso-8859-6.txt").unwrap(),
File::create(&Path::new(&out_dir).join("iso-8859-6.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-iso-8859-7.txt").unwrap(),
File::create(&Path::new(&out_dir).join("iso-8859-7.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-iso-8859-8.txt").unwrap(),
File::create(&Path::new(&out_dir).join("iso-8859-8.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-iso-8859-10.txt").unwrap(),
File::create(&Path::new(&out_dir).join("iso-8859-10.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-iso-8859-13.txt").unwrap(),
File::create(&Path::new(&out_dir).join("iso-8859-13.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-iso-8859-14.txt").unwrap(),
File::create(&Path::new(&out_dir).join("iso-8859-14.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-iso-8859-15.txt").unwrap(),
File::create(&Path::new(&out_dir).join("iso-8859-15.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-iso-8859-16.txt").unwrap(),
File::create(&Path::new(&out_dir).join("iso-8859-16.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-koi8-r.txt").unwrap(),
File::create(&Path::new(&out_dir).join("koi8-r.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-koi8-u.txt").unwrap(),
File::create(&Path::new(&out_dir).join("koi8-u.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-macintosh.txt").unwrap(),
File::create(&Path::new(&out_dir).join("macintosh.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-windows-874.txt").unwrap(),
File::create(&Path::new(&out_dir).join("windows-874.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-windows-1250.txt").unwrap(),
File::create(&Path::new(&out_dir).join("windows-1250.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-windows-1251.txt").unwrap(),
File::create(&Path::new(&out_dir).join("windows-1251.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-windows-1252.txt").unwrap(),
File::create(&Path::new(&out_dir).join("windows-1252.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-windows-1253.txt").unwrap(),
File::create(&Path::new(&out_dir).join("windows-1253.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-windows-1254.txt").unwrap(),
File::create(&Path::new(&out_dir).join("windows-1254.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-windows-1255.txt").unwrap(),
File::create(&Path::new(&out_dir).join("windows-1255.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-windows-1256.txt").unwrap(),
File::create(&Path::new(&out_dir).join("windows-1256.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-windows-1257.txt").unwrap(),
File::create(&Path::new(&out_dir).join("windows-1257.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-windows-1258.txt").unwrap(),
File::create(&Path::new(&out_dir).join("windows-1258.rs")).unwrap(),
).unwrap();
generate_single_byte_encoding_from_index(
File::open("encoding_tables/index-x-mac-cyrillic.txt").unwrap(),
File::create(&Path::new(&out_dir).join("x-mac-cyrillic.rs")).unwrap(),
).unwrap();
}
}
fn generate_single_byte_encoding_from_index<R: Read, W: Write>(
in_file: R,
mut out_file: W,
) -> std::io::Result<()> {
let in_file = std::io::BufReader::new(in_file);
// Collect the table.
let table = {
let mut table = ['<27>'; 128];
for line in in_file.lines() {
let tmp = line.unwrap();
let line = tmp.trim();
if line.starts_with("#") || line == "" {
continue;
}
let elements: Vec<_> = line.split_whitespace().collect();
if elements.len() >= 2 {
let index = elements[0].parse::<usize>().unwrap();
assert!(index <= 127);
let code = std::char::from_u32(u32::from_str_radix(&elements[1][2..], 16).unwrap())
.unwrap();
table[index] = code;
}
}
table
};
// Build the reverse table.
let rev_table = {
let mut rev_table = vec![];
for (i, c) in table.iter().enumerate() {
rev_table.push((c, 128 + i));
}
rev_table.sort_by_key(|x| x.0);
rev_table
};
// Write shared code.
out_file.write_all(
format!(
r#"
use {{DecodeResult, EncodeResult}};
pub fn encode_from_str<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {{
super::single_byte_encode_from_str(&ENCODE_TABLE, input, output)
}}
pub fn decode_to_str<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a> {{
super::single_byte_decode_to_str(&DECODE_TABLE, input, output)
}}
"#
).as_bytes(),
)?;
// Write encode table.
out_file.write_all(
format!(
r#"
const ENCODE_TABLE: [(char, u8); {}] = [
"#,
rev_table.len()
).as_bytes(),
)?;
for (c, i) in rev_table.iter() {
out_file.write_all(format!("('\\u{{{:04X}}}', 0x{:02X}), ", **c as u32, i).as_bytes())?;
}
out_file.write_all(
format!(
r#"
];
"#
).as_bytes(),
)?;
// Write decode table.
out_file.write_all(
format!(
r#"
const DECODE_TABLE: [char; 128] = [
"#
).as_bytes(),
)?;
for c in table.iter() {
out_file.write_all(format!("'\\u{{{:04X}}}', ", *c as u32).as_bytes())?;
}
out_file.write_all(
format!(
r#"
];
"#
).as_bytes(),
)?;
Ok(())
}

View File

@ -1,134 +0,0 @@
# For details on index index-ibm866.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: db6fe14a559d1601a7667338d83704773d5708dbc641e1ad3c5e21405770f05e
# Date: 2018-01-06
0 0x0410 А (CYRILLIC CAPITAL LETTER A)
1 0x0411 Б (CYRILLIC CAPITAL LETTER BE)
2 0x0412 В (CYRILLIC CAPITAL LETTER VE)
3 0x0413 Г (CYRILLIC CAPITAL LETTER GHE)
4 0x0414 Д (CYRILLIC CAPITAL LETTER DE)
5 0x0415 Е (CYRILLIC CAPITAL LETTER IE)
6 0x0416 Ж (CYRILLIC CAPITAL LETTER ZHE)
7 0x0417 З (CYRILLIC CAPITAL LETTER ZE)
8 0x0418 И (CYRILLIC CAPITAL LETTER I)
9 0x0419 Й (CYRILLIC CAPITAL LETTER SHORT I)
10 0x041A К (CYRILLIC CAPITAL LETTER KA)
11 0x041B Л (CYRILLIC CAPITAL LETTER EL)
12 0x041C М (CYRILLIC CAPITAL LETTER EM)
13 0x041D Н (CYRILLIC CAPITAL LETTER EN)
14 0x041E О (CYRILLIC CAPITAL LETTER O)
15 0x041F П (CYRILLIC CAPITAL LETTER PE)
16 0x0420 Р (CYRILLIC CAPITAL LETTER ER)
17 0x0421 С (CYRILLIC CAPITAL LETTER ES)
18 0x0422 Т (CYRILLIC CAPITAL LETTER TE)
19 0x0423 У (CYRILLIC CAPITAL LETTER U)
20 0x0424 Ф (CYRILLIC CAPITAL LETTER EF)
21 0x0425 Х (CYRILLIC CAPITAL LETTER HA)
22 0x0426 Ц (CYRILLIC CAPITAL LETTER TSE)
23 0x0427 Ч (CYRILLIC CAPITAL LETTER CHE)
24 0x0428 Ш (CYRILLIC CAPITAL LETTER SHA)
25 0x0429 Щ (CYRILLIC CAPITAL LETTER SHCHA)
26 0x042A Ъ (CYRILLIC CAPITAL LETTER HARD SIGN)
27 0x042B Ы (CYRILLIC CAPITAL LETTER YERU)
28 0x042C Ь (CYRILLIC CAPITAL LETTER SOFT SIGN)
29 0x042D Э (CYRILLIC CAPITAL LETTER E)
30 0x042E Ю (CYRILLIC CAPITAL LETTER YU)
31 0x042F Я (CYRILLIC CAPITAL LETTER YA)
32 0x0430 а (CYRILLIC SMALL LETTER A)
33 0x0431 б (CYRILLIC SMALL LETTER BE)
34 0x0432 в (CYRILLIC SMALL LETTER VE)
35 0x0433 г (CYRILLIC SMALL LETTER GHE)
36 0x0434 д (CYRILLIC SMALL LETTER DE)
37 0x0435 е (CYRILLIC SMALL LETTER IE)
38 0x0436 ж (CYRILLIC SMALL LETTER ZHE)
39 0x0437 з (CYRILLIC SMALL LETTER ZE)
40 0x0438 и (CYRILLIC SMALL LETTER I)
41 0x0439 й (CYRILLIC SMALL LETTER SHORT I)
42 0x043A к (CYRILLIC SMALL LETTER KA)
43 0x043B л (CYRILLIC SMALL LETTER EL)
44 0x043C м (CYRILLIC SMALL LETTER EM)
45 0x043D н (CYRILLIC SMALL LETTER EN)
46 0x043E о (CYRILLIC SMALL LETTER O)
47 0x043F п (CYRILLIC SMALL LETTER PE)
48 0x2591 ░ (LIGHT SHADE)
49 0x2592 ▒ (MEDIUM SHADE)
50 0x2593 ▓ (DARK SHADE)
51 0x2502 │ (BOX DRAWINGS LIGHT VERTICAL)
52 0x2524 ┤ (BOX DRAWINGS LIGHT VERTICAL AND LEFT)
53 0x2561 ╡ (BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE)
54 0x2562 ╢ (BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE)
55 0x2556 ╖ (BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE)
56 0x2555 ╕ (BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE)
57 0x2563 ╣ (BOX DRAWINGS DOUBLE VERTICAL AND LEFT)
58 0x2551 ║ (BOX DRAWINGS DOUBLE VERTICAL)
59 0x2557 ╗ (BOX DRAWINGS DOUBLE DOWN AND LEFT)
60 0x255D ╝ (BOX DRAWINGS DOUBLE UP AND LEFT)
61 0x255C ╜ (BOX DRAWINGS UP DOUBLE AND LEFT SINGLE)
62 0x255B ╛ (BOX DRAWINGS UP SINGLE AND LEFT DOUBLE)
63 0x2510 ┐ (BOX DRAWINGS LIGHT DOWN AND LEFT)
64 0x2514 └ (BOX DRAWINGS LIGHT UP AND RIGHT)
65 0x2534 ┴ (BOX DRAWINGS LIGHT UP AND HORIZONTAL)
66 0x252C ┬ (BOX DRAWINGS LIGHT DOWN AND HORIZONTAL)
67 0x251C ├ (BOX DRAWINGS LIGHT VERTICAL AND RIGHT)
68 0x2500 ─ (BOX DRAWINGS LIGHT HORIZONTAL)
69 0x253C ┼ (BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL)
70 0x255E ╞ (BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE)
71 0x255F ╟ (BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE)
72 0x255A ╚ (BOX DRAWINGS DOUBLE UP AND RIGHT)
73 0x2554 ╔ (BOX DRAWINGS DOUBLE DOWN AND RIGHT)
74 0x2569 ╩ (BOX DRAWINGS DOUBLE UP AND HORIZONTAL)
75 0x2566 ╦ (BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL)
76 0x2560 ╠ (BOX DRAWINGS DOUBLE VERTICAL AND RIGHT)
77 0x2550 ═ (BOX DRAWINGS DOUBLE HORIZONTAL)
78 0x256C ╬ (BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL)
79 0x2567 ╧ (BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE)
80 0x2568 ╨ (BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE)
81 0x2564 ╤ (BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE)
82 0x2565 ╥ (BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE)
83 0x2559 ╙ (BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE)
84 0x2558 ╘ (BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE)
85 0x2552 ╒ (BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE)
86 0x2553 ╓ (BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE)
87 0x256B ╫ (BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE)
88 0x256A ╪ (BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE)
89 0x2518 ┘ (BOX DRAWINGS LIGHT UP AND LEFT)
90 0x250C ┌ (BOX DRAWINGS LIGHT DOWN AND RIGHT)
91 0x2588 █ (FULL BLOCK)
92 0x2584 ▄ (LOWER HALF BLOCK)
93 0x258C ▌ (LEFT HALF BLOCK)
94 0x2590 ▐ (RIGHT HALF BLOCK)
95 0x2580 ▀ (UPPER HALF BLOCK)
96 0x0440 р (CYRILLIC SMALL LETTER ER)
97 0x0441 с (CYRILLIC SMALL LETTER ES)
98 0x0442 т (CYRILLIC SMALL LETTER TE)
99 0x0443 у (CYRILLIC SMALL LETTER U)
100 0x0444 ф (CYRILLIC SMALL LETTER EF)
101 0x0445 х (CYRILLIC SMALL LETTER HA)
102 0x0446 ц (CYRILLIC SMALL LETTER TSE)
103 0x0447 ч (CYRILLIC SMALL LETTER CHE)
104 0x0448 ш (CYRILLIC SMALL LETTER SHA)
105 0x0449 щ (CYRILLIC SMALL LETTER SHCHA)
106 0x044A ъ (CYRILLIC SMALL LETTER HARD SIGN)
107 0x044B ы (CYRILLIC SMALL LETTER YERU)
108 0x044C ь (CYRILLIC SMALL LETTER SOFT SIGN)
109 0x044D э (CYRILLIC SMALL LETTER E)
110 0x044E ю (CYRILLIC SMALL LETTER YU)
111 0x044F я (CYRILLIC SMALL LETTER YA)
112 0x0401 Ё (CYRILLIC CAPITAL LETTER IO)
113 0x0451 ё (CYRILLIC SMALL LETTER IO)
114 0x0404 Є (CYRILLIC CAPITAL LETTER UKRAINIAN IE)
115 0x0454 є (CYRILLIC SMALL LETTER UKRAINIAN IE)
116 0x0407 Ї (CYRILLIC CAPITAL LETTER YI)
117 0x0457 ї (CYRILLIC SMALL LETTER YI)
118 0x040E Ў (CYRILLIC CAPITAL LETTER SHORT U)
119 0x045E ў (CYRILLIC SMALL LETTER SHORT U)
120 0x00B0 ° (DEGREE SIGN)
121 0x2219 ∙ (BULLET OPERATOR)
122 0x00B7 · (MIDDLE DOT)
123 0x221A √ (SQUARE ROOT)
124 0x2116 № (NUMERO SIGN)
125 0x00A4 ¤ (CURRENCY SIGN)
126 0x25A0 ■ (BLACK SQUARE)
127 0x00A0   (NO-BREAK SPACE)

View File

@ -1,134 +0,0 @@
# For details on index index-iso-8859-10.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: 02c2b5590d8ccda9931008c471f6ee2c590b2c8fe5e6ccb3b08638115d778507
# Date: 2018-01-06
0 0x0080 € (<control>)
1 0x0081  (<control>)
2 0x0082 ‚ (<control>)
3 0x0083 ƒ (<control>)
4 0x0084 „ (<control>)
5 0x0085 … (<control>)
6 0x0086 † (<control>)
7 0x0087 ‡ (<control>)
8 0x0088 ˆ (<control>)
9 0x0089 ‰ (<control>)
10 0x008A Š (<control>)
11 0x008B ‹ (<control>)
12 0x008C Œ (<control>)
13 0x008D  (<control>)
14 0x008E Ž (<control>)
15 0x008F  (<control>)
16 0x0090  (<control>)
17 0x0091 ‘ (<control>)
18 0x0092 ’ (<control>)
19 0x0093 “ (<control>)
20 0x0094 ” (<control>)
21 0x0095 • (<control>)
22 0x0096 – (<control>)
23 0x0097 — (<control>)
24 0x0098 ˜ (<control>)
25 0x0099 ™ (<control>)
26 0x009A š (<control>)
27 0x009B › (<control>)
28 0x009C œ (<control>)
29 0x009D  (<control>)
30 0x009E ž (<control>)
31 0x009F Ÿ (<control>)
32 0x00A0   (NO-BREAK SPACE)
33 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK)
34 0x0112 Ē (LATIN CAPITAL LETTER E WITH MACRON)
35 0x0122 Ģ (LATIN CAPITAL LETTER G WITH CEDILLA)
36 0x012A Ī (LATIN CAPITAL LETTER I WITH MACRON)
37 0x0128 Ĩ (LATIN CAPITAL LETTER I WITH TILDE)
38 0x0136 Ķ (LATIN CAPITAL LETTER K WITH CEDILLA)
39 0x00A7 § (SECTION SIGN)
40 0x013B Ļ (LATIN CAPITAL LETTER L WITH CEDILLA)
41 0x0110 Đ (LATIN CAPITAL LETTER D WITH STROKE)
42 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON)
43 0x0166 Ŧ (LATIN CAPITAL LETTER T WITH STROKE)
44 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON)
45 0x00AD ­ (SOFT HYPHEN)
46 0x016A Ū (LATIN CAPITAL LETTER U WITH MACRON)
47 0x014A Ŋ (LATIN CAPITAL LETTER ENG)
48 0x00B0 ° (DEGREE SIGN)
49 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK)
50 0x0113 ē (LATIN SMALL LETTER E WITH MACRON)
51 0x0123 ģ (LATIN SMALL LETTER G WITH CEDILLA)
52 0x012B ī (LATIN SMALL LETTER I WITH MACRON)
53 0x0129 ĩ (LATIN SMALL LETTER I WITH TILDE)
54 0x0137 ķ (LATIN SMALL LETTER K WITH CEDILLA)
55 0x00B7 · (MIDDLE DOT)
56 0x013C ļ (LATIN SMALL LETTER L WITH CEDILLA)
57 0x0111 đ (LATIN SMALL LETTER D WITH STROKE)
58 0x0161 š (LATIN SMALL LETTER S WITH CARON)
59 0x0167 ŧ (LATIN SMALL LETTER T WITH STROKE)
60 0x017E ž (LATIN SMALL LETTER Z WITH CARON)
61 0x2015 ― (HORIZONTAL BAR)
62 0x016B ū (LATIN SMALL LETTER U WITH MACRON)
63 0x014B ŋ (LATIN SMALL LETTER ENG)
64 0x0100 Ā (LATIN CAPITAL LETTER A WITH MACRON)
65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE)
66 0x00C2 Â (LATIN CAPITAL LETTER A WITH CIRCUMFLEX)
67 0x00C3 Ã (LATIN CAPITAL LETTER A WITH TILDE)
68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS)
69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE)
70 0x00C6 Æ (LATIN CAPITAL LETTER AE)
71 0x012E Į (LATIN CAPITAL LETTER I WITH OGONEK)
72 0x010C Č (LATIN CAPITAL LETTER C WITH CARON)
73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE)
74 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK)
75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS)
76 0x0116 Ė (LATIN CAPITAL LETTER E WITH DOT ABOVE)
77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE)
78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX)
79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS)
80 0x00D0 Ð (LATIN CAPITAL LETTER ETH)
81 0x0145 Ņ (LATIN CAPITAL LETTER N WITH CEDILLA)
82 0x014C Ō (LATIN CAPITAL LETTER O WITH MACRON)
83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE)
84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX)
85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE)
86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS)
87 0x0168 Ũ (LATIN CAPITAL LETTER U WITH TILDE)
88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE)
89 0x0172 Ų (LATIN CAPITAL LETTER U WITH OGONEK)
90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE)
91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX)
92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS)
93 0x00DD Ý (LATIN CAPITAL LETTER Y WITH ACUTE)
94 0x00DE Þ (LATIN CAPITAL LETTER THORN)
95 0x00DF ß (LATIN SMALL LETTER SHARP S)
96 0x0101 ā (LATIN SMALL LETTER A WITH MACRON)
97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE)
98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX)
99 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE)
100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS)
101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE)
102 0x00E6 æ (LATIN SMALL LETTER AE)
103 0x012F į (LATIN SMALL LETTER I WITH OGONEK)
104 0x010D č (LATIN SMALL LETTER C WITH CARON)
105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE)
106 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK)
107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS)
108 0x0117 ė (LATIN SMALL LETTER E WITH DOT ABOVE)
109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE)
110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX)
111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS)
112 0x00F0 ð (LATIN SMALL LETTER ETH)
113 0x0146 ņ (LATIN SMALL LETTER N WITH CEDILLA)
114 0x014D ō (LATIN SMALL LETTER O WITH MACRON)
115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE)
116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX)
117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE)
118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS)
119 0x0169 ũ (LATIN SMALL LETTER U WITH TILDE)
120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE)
121 0x0173 ų (LATIN SMALL LETTER U WITH OGONEK)
122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE)
123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX)
124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS)
125 0x00FD ý (LATIN SMALL LETTER Y WITH ACUTE)
126 0x00FE þ (LATIN SMALL LETTER THORN)
127 0x0138 ĸ (LATIN SMALL LETTER KRA)

View File

@ -1,134 +0,0 @@
# For details on index index-iso-8859-13.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: 40736338e964ab520407cebcb01329f8d450abf6ce12bf88b74b655b60e43300
# Date: 2018-01-06
0 0x0080 € (<control>)
1 0x0081  (<control>)
2 0x0082 ‚ (<control>)
3 0x0083 ƒ (<control>)
4 0x0084 „ (<control>)
5 0x0085 … (<control>)
6 0x0086 † (<control>)
7 0x0087 ‡ (<control>)
8 0x0088 ˆ (<control>)
9 0x0089 ‰ (<control>)
10 0x008A Š (<control>)
11 0x008B ‹ (<control>)
12 0x008C Œ (<control>)
13 0x008D  (<control>)
14 0x008E Ž (<control>)
15 0x008F  (<control>)
16 0x0090  (<control>)
17 0x0091 ‘ (<control>)
18 0x0092 ’ (<control>)
19 0x0093 “ (<control>)
20 0x0094 ” (<control>)
21 0x0095 • (<control>)
22 0x0096 – (<control>)
23 0x0097 — (<control>)
24 0x0098 ˜ (<control>)
25 0x0099 ™ (<control>)
26 0x009A š (<control>)
27 0x009B › (<control>)
28 0x009C œ (<control>)
29 0x009D  (<control>)
30 0x009E ž (<control>)
31 0x009F Ÿ (<control>)
32 0x00A0   (NO-BREAK SPACE)
33 0x201D ” (RIGHT DOUBLE QUOTATION MARK)
34 0x00A2 ¢ (CENT SIGN)
35 0x00A3 £ (POUND SIGN)
36 0x00A4 ¤ (CURRENCY SIGN)
37 0x201E „ (DOUBLE LOW-9 QUOTATION MARK)
38 0x00A6 ¦ (BROKEN BAR)
39 0x00A7 § (SECTION SIGN)
40 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE)
41 0x00A9 © (COPYRIGHT SIGN)
42 0x0156 Ŗ (LATIN CAPITAL LETTER R WITH CEDILLA)
43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK)
44 0x00AC ¬ (NOT SIGN)
45 0x00AD ­ (SOFT HYPHEN)
46 0x00AE ® (REGISTERED SIGN)
47 0x00C6 Æ (LATIN CAPITAL LETTER AE)
48 0x00B0 ° (DEGREE SIGN)
49 0x00B1 ± (PLUS-MINUS SIGN)
50 0x00B2 ² (SUPERSCRIPT TWO)
51 0x00B3 ³ (SUPERSCRIPT THREE)
52 0x201C “ (LEFT DOUBLE QUOTATION MARK)
53 0x00B5 µ (MICRO SIGN)
54 0x00B6 ¶ (PILCROW SIGN)
55 0x00B7 · (MIDDLE DOT)
56 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE)
57 0x00B9 ¹ (SUPERSCRIPT ONE)
58 0x0157 ŗ (LATIN SMALL LETTER R WITH CEDILLA)
59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK)
60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER)
61 0x00BD ½ (VULGAR FRACTION ONE HALF)
62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS)
63 0x00E6 æ (LATIN SMALL LETTER AE)
64 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK)
65 0x012E Į (LATIN CAPITAL LETTER I WITH OGONEK)
66 0x0100 Ā (LATIN CAPITAL LETTER A WITH MACRON)
67 0x0106 Ć (LATIN CAPITAL LETTER C WITH ACUTE)
68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS)
69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE)
70 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK)
71 0x0112 Ē (LATIN CAPITAL LETTER E WITH MACRON)
72 0x010C Č (LATIN CAPITAL LETTER C WITH CARON)
73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE)
74 0x0179 Ź (LATIN CAPITAL LETTER Z WITH ACUTE)
75 0x0116 Ė (LATIN CAPITAL LETTER E WITH DOT ABOVE)
76 0x0122 Ģ (LATIN CAPITAL LETTER G WITH CEDILLA)
77 0x0136 Ķ (LATIN CAPITAL LETTER K WITH CEDILLA)
78 0x012A Ī (LATIN CAPITAL LETTER I WITH MACRON)
79 0x013B Ļ (LATIN CAPITAL LETTER L WITH CEDILLA)
80 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON)
81 0x0143 Ń (LATIN CAPITAL LETTER N WITH ACUTE)
82 0x0145 Ņ (LATIN CAPITAL LETTER N WITH CEDILLA)
83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE)
84 0x014C Ō (LATIN CAPITAL LETTER O WITH MACRON)
85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE)
86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS)
87 0x00D7 × (MULTIPLICATION SIGN)
88 0x0172 Ų (LATIN CAPITAL LETTER U WITH OGONEK)
89 0x0141 Ł (LATIN CAPITAL LETTER L WITH STROKE)
90 0x015A Ś (LATIN CAPITAL LETTER S WITH ACUTE)
91 0x016A Ū (LATIN CAPITAL LETTER U WITH MACRON)
92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS)
93 0x017B Ż (LATIN CAPITAL LETTER Z WITH DOT ABOVE)
94 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON)
95 0x00DF ß (LATIN SMALL LETTER SHARP S)
96 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK)
97 0x012F į (LATIN SMALL LETTER I WITH OGONEK)
98 0x0101 ā (LATIN SMALL LETTER A WITH MACRON)
99 0x0107 ć (LATIN SMALL LETTER C WITH ACUTE)
100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS)
101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE)
102 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK)
103 0x0113 ē (LATIN SMALL LETTER E WITH MACRON)
104 0x010D č (LATIN SMALL LETTER C WITH CARON)
105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE)
106 0x017A ź (LATIN SMALL LETTER Z WITH ACUTE)
107 0x0117 ė (LATIN SMALL LETTER E WITH DOT ABOVE)
108 0x0123 ģ (LATIN SMALL LETTER G WITH CEDILLA)
109 0x0137 ķ (LATIN SMALL LETTER K WITH CEDILLA)
110 0x012B ī (LATIN SMALL LETTER I WITH MACRON)
111 0x013C ļ (LATIN SMALL LETTER L WITH CEDILLA)
112 0x0161 š (LATIN SMALL LETTER S WITH CARON)
113 0x0144 ń (LATIN SMALL LETTER N WITH ACUTE)
114 0x0146 ņ (LATIN SMALL LETTER N WITH CEDILLA)
115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE)
116 0x014D ō (LATIN SMALL LETTER O WITH MACRON)
117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE)
118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS)
119 0x00F7 ÷ (DIVISION SIGN)
120 0x0173 ų (LATIN SMALL LETTER U WITH OGONEK)
121 0x0142 ł (LATIN SMALL LETTER L WITH STROKE)
122 0x015B ś (LATIN SMALL LETTER S WITH ACUTE)
123 0x016B ū (LATIN SMALL LETTER U WITH MACRON)
124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS)
125 0x017C ż (LATIN SMALL LETTER Z WITH DOT ABOVE)
126 0x017E ž (LATIN SMALL LETTER Z WITH CARON)
127 0x2019 (RIGHT SINGLE QUOTATION MARK)

View File

@ -1,134 +0,0 @@
# For details on index index-iso-8859-14.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: 2c8651cfc08b1f35b17919ee5379f2fa006af3ec809f11b3b7f470785580542b
# Date: 2018-01-06
0 0x0080 € (<control>)
1 0x0081  (<control>)
2 0x0082 ‚ (<control>)
3 0x0083 ƒ (<control>)
4 0x0084 „ (<control>)
5 0x0085 … (<control>)
6 0x0086 † (<control>)
7 0x0087 ‡ (<control>)
8 0x0088 ˆ (<control>)
9 0x0089 ‰ (<control>)
10 0x008A Š (<control>)
11 0x008B ‹ (<control>)
12 0x008C Œ (<control>)
13 0x008D  (<control>)
14 0x008E Ž (<control>)
15 0x008F  (<control>)
16 0x0090  (<control>)
17 0x0091 ‘ (<control>)
18 0x0092 ’ (<control>)
19 0x0093 “ (<control>)
20 0x0094 ” (<control>)
21 0x0095 • (<control>)
22 0x0096 – (<control>)
23 0x0097 — (<control>)
24 0x0098 ˜ (<control>)
25 0x0099 ™ (<control>)
26 0x009A š (<control>)
27 0x009B › (<control>)
28 0x009C œ (<control>)
29 0x009D  (<control>)
30 0x009E ž (<control>)
31 0x009F Ÿ (<control>)
32 0x00A0   (NO-BREAK SPACE)
33 0x1E02 Ḃ (LATIN CAPITAL LETTER B WITH DOT ABOVE)
34 0x1E03 ḃ (LATIN SMALL LETTER B WITH DOT ABOVE)
35 0x00A3 £ (POUND SIGN)
36 0x010A Ċ (LATIN CAPITAL LETTER C WITH DOT ABOVE)
37 0x010B ċ (LATIN SMALL LETTER C WITH DOT ABOVE)
38 0x1E0A Ḋ (LATIN CAPITAL LETTER D WITH DOT ABOVE)
39 0x00A7 § (SECTION SIGN)
40 0x1E80 Ẁ (LATIN CAPITAL LETTER W WITH GRAVE)
41 0x00A9 © (COPYRIGHT SIGN)
42 0x1E82 Ẃ (LATIN CAPITAL LETTER W WITH ACUTE)
43 0x1E0B ḋ (LATIN SMALL LETTER D WITH DOT ABOVE)
44 0x1EF2 Ỳ (LATIN CAPITAL LETTER Y WITH GRAVE)
45 0x00AD ­ (SOFT HYPHEN)
46 0x00AE ® (REGISTERED SIGN)
47 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS)
48 0x1E1E Ḟ (LATIN CAPITAL LETTER F WITH DOT ABOVE)
49 0x1E1F ḟ (LATIN SMALL LETTER F WITH DOT ABOVE)
50 0x0120 Ġ (LATIN CAPITAL LETTER G WITH DOT ABOVE)
51 0x0121 ġ (LATIN SMALL LETTER G WITH DOT ABOVE)
52 0x1E40 Ṁ (LATIN CAPITAL LETTER M WITH DOT ABOVE)
53 0x1E41 ṁ (LATIN SMALL LETTER M WITH DOT ABOVE)
54 0x00B6 ¶ (PILCROW SIGN)
55 0x1E56 Ṗ (LATIN CAPITAL LETTER P WITH DOT ABOVE)
56 0x1E81 ẁ (LATIN SMALL LETTER W WITH GRAVE)
57 0x1E57 ṗ (LATIN SMALL LETTER P WITH DOT ABOVE)
58 0x1E83 ẃ (LATIN SMALL LETTER W WITH ACUTE)
59 0x1E60 Ṡ (LATIN CAPITAL LETTER S WITH DOT ABOVE)
60 0x1EF3 ỳ (LATIN SMALL LETTER Y WITH GRAVE)
61 0x1E84 Ẅ (LATIN CAPITAL LETTER W WITH DIAERESIS)
62 0x1E85 ẅ (LATIN SMALL LETTER W WITH DIAERESIS)
63 0x1E61 ṡ (LATIN SMALL LETTER S WITH DOT ABOVE)
64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE)
65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE)
66 0x00C2 Â (LATIN CAPITAL LETTER A WITH CIRCUMFLEX)
67 0x00C3 Ã (LATIN CAPITAL LETTER A WITH TILDE)
68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS)
69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE)
70 0x00C6 Æ (LATIN CAPITAL LETTER AE)
71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA)
72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE)
73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE)
74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX)
75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS)
76 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE)
77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE)
78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX)
79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS)
80 0x0174 Ŵ (LATIN CAPITAL LETTER W WITH CIRCUMFLEX)
81 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE)
82 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE)
83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE)
84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX)
85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE)
86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS)
87 0x1E6A Ṫ (LATIN CAPITAL LETTER T WITH DOT ABOVE)
88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE)
89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE)
90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE)
91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX)
92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS)
93 0x00DD Ý (LATIN CAPITAL LETTER Y WITH ACUTE)
94 0x0176 Ŷ (LATIN CAPITAL LETTER Y WITH CIRCUMFLEX)
95 0x00DF ß (LATIN SMALL LETTER SHARP S)
96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE)
97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE)
98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX)
99 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE)
100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS)
101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE)
102 0x00E6 æ (LATIN SMALL LETTER AE)
103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA)
104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE)
105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE)
106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX)
107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS)
108 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE)
109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE)
110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX)
111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS)
112 0x0175 ŵ (LATIN SMALL LETTER W WITH CIRCUMFLEX)
113 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE)
114 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE)
115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE)
116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX)
117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE)
118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS)
119 0x1E6B ṫ (LATIN SMALL LETTER T WITH DOT ABOVE)
120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE)
121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE)
122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE)
123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX)
124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS)
125 0x00FD ý (LATIN SMALL LETTER Y WITH ACUTE)
126 0x0177 ŷ (LATIN SMALL LETTER Y WITH CIRCUMFLEX)
127 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS)

View File

@ -1,134 +0,0 @@
# For details on index index-iso-8859-15.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: a560aba47bccd7510a6ac77f671fe75dca3800f05cf6d676910c311a8f8ff079
# Date: 2018-01-06
0 0x0080 € (<control>)
1 0x0081  (<control>)
2 0x0082 ‚ (<control>)
3 0x0083 ƒ (<control>)
4 0x0084 „ (<control>)
5 0x0085 … (<control>)
6 0x0086 † (<control>)
7 0x0087 ‡ (<control>)
8 0x0088 ˆ (<control>)
9 0x0089 ‰ (<control>)
10 0x008A Š (<control>)
11 0x008B ‹ (<control>)
12 0x008C Œ (<control>)
13 0x008D  (<control>)
14 0x008E Ž (<control>)
15 0x008F  (<control>)
16 0x0090  (<control>)
17 0x0091 ‘ (<control>)
18 0x0092 ’ (<control>)
19 0x0093 “ (<control>)
20 0x0094 ” (<control>)
21 0x0095 • (<control>)
22 0x0096 – (<control>)
23 0x0097 — (<control>)
24 0x0098 ˜ (<control>)
25 0x0099 ™ (<control>)
26 0x009A š (<control>)
27 0x009B › (<control>)
28 0x009C œ (<control>)
29 0x009D  (<control>)
30 0x009E ž (<control>)
31 0x009F Ÿ (<control>)
32 0x00A0   (NO-BREAK SPACE)
33 0x00A1 ¡ (INVERTED EXCLAMATION MARK)
34 0x00A2 ¢ (CENT SIGN)
35 0x00A3 £ (POUND SIGN)
36 0x20AC € (EURO SIGN)
37 0x00A5 ¥ (YEN SIGN)
38 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON)
39 0x00A7 § (SECTION SIGN)
40 0x0161 š (LATIN SMALL LETTER S WITH CARON)
41 0x00A9 © (COPYRIGHT SIGN)
42 0x00AA ª (FEMININE ORDINAL INDICATOR)
43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK)
44 0x00AC ¬ (NOT SIGN)
45 0x00AD ­ (SOFT HYPHEN)
46 0x00AE ® (REGISTERED SIGN)
47 0x00AF ¯ (MACRON)
48 0x00B0 ° (DEGREE SIGN)
49 0x00B1 ± (PLUS-MINUS SIGN)
50 0x00B2 ² (SUPERSCRIPT TWO)
51 0x00B3 ³ (SUPERSCRIPT THREE)
52 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON)
53 0x00B5 µ (MICRO SIGN)
54 0x00B6 ¶ (PILCROW SIGN)
55 0x00B7 · (MIDDLE DOT)
56 0x017E ž (LATIN SMALL LETTER Z WITH CARON)
57 0x00B9 ¹ (SUPERSCRIPT ONE)
58 0x00BA º (MASCULINE ORDINAL INDICATOR)
59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK)
60 0x0152 Œ (LATIN CAPITAL LIGATURE OE)
61 0x0153 œ (LATIN SMALL LIGATURE OE)
62 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS)
63 0x00BF ¿ (INVERTED QUESTION MARK)
64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE)
65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE)
66 0x00C2 Â (LATIN CAPITAL LETTER A WITH CIRCUMFLEX)
67 0x00C3 Ã (LATIN CAPITAL LETTER A WITH TILDE)
68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS)
69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE)
70 0x00C6 Æ (LATIN CAPITAL LETTER AE)
71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA)
72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE)
73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE)
74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX)
75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS)
76 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE)
77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE)
78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX)
79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS)
80 0x00D0 Ð (LATIN CAPITAL LETTER ETH)
81 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE)
82 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE)
83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE)
84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX)
85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE)
86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS)
87 0x00D7 × (MULTIPLICATION SIGN)
88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE)
89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE)
90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE)
91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX)
92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS)
93 0x00DD Ý (LATIN CAPITAL LETTER Y WITH ACUTE)
94 0x00DE Þ (LATIN CAPITAL LETTER THORN)
95 0x00DF ß (LATIN SMALL LETTER SHARP S)
96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE)
97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE)
98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX)
99 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE)
100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS)
101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE)
102 0x00E6 æ (LATIN SMALL LETTER AE)
103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA)
104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE)
105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE)
106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX)
107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS)
108 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE)
109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE)
110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX)
111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS)
112 0x00F0 ð (LATIN SMALL LETTER ETH)
113 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE)
114 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE)
115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE)
116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX)
117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE)
118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS)
119 0x00F7 ÷ (DIVISION SIGN)
120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE)
121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE)
122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE)
123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX)
124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS)
125 0x00FD ý (LATIN SMALL LETTER Y WITH ACUTE)
126 0x00FE þ (LATIN SMALL LETTER THORN)
127 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS)

View File

@ -1,134 +0,0 @@
# For details on index index-iso-8859-16.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: 55676320d2d1b6e6909f5b3d741a7cf0cefc84e920aa4474afc091459111c2e3
# Date: 2018-01-06
0 0x0080 € (<control>)
1 0x0081  (<control>)
2 0x0082 ‚ (<control>)
3 0x0083 ƒ (<control>)
4 0x0084 „ (<control>)
5 0x0085 … (<control>)
6 0x0086 † (<control>)
7 0x0087 ‡ (<control>)
8 0x0088 ˆ (<control>)
9 0x0089 ‰ (<control>)
10 0x008A Š (<control>)
11 0x008B ‹ (<control>)
12 0x008C Œ (<control>)
13 0x008D  (<control>)
14 0x008E Ž (<control>)
15 0x008F  (<control>)
16 0x0090  (<control>)
17 0x0091 ‘ (<control>)
18 0x0092 ’ (<control>)
19 0x0093 “ (<control>)
20 0x0094 ” (<control>)
21 0x0095 • (<control>)
22 0x0096 – (<control>)
23 0x0097 — (<control>)
24 0x0098 ˜ (<control>)
25 0x0099 ™ (<control>)
26 0x009A š (<control>)
27 0x009B › (<control>)
28 0x009C œ (<control>)
29 0x009D  (<control>)
30 0x009E ž (<control>)
31 0x009F Ÿ (<control>)
32 0x00A0   (NO-BREAK SPACE)
33 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK)
34 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK)
35 0x0141 Ł (LATIN CAPITAL LETTER L WITH STROKE)
36 0x20AC € (EURO SIGN)
37 0x201E „ (DOUBLE LOW-9 QUOTATION MARK)
38 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON)
39 0x00A7 § (SECTION SIGN)
40 0x0161 š (LATIN SMALL LETTER S WITH CARON)
41 0x00A9 © (COPYRIGHT SIGN)
42 0x0218 Ș (LATIN CAPITAL LETTER S WITH COMMA BELOW)
43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK)
44 0x0179 Ź (LATIN CAPITAL LETTER Z WITH ACUTE)
45 0x00AD ­ (SOFT HYPHEN)
46 0x017A ź (LATIN SMALL LETTER Z WITH ACUTE)
47 0x017B Ż (LATIN CAPITAL LETTER Z WITH DOT ABOVE)
48 0x00B0 ° (DEGREE SIGN)
49 0x00B1 ± (PLUS-MINUS SIGN)
50 0x010C Č (LATIN CAPITAL LETTER C WITH CARON)
51 0x0142 ł (LATIN SMALL LETTER L WITH STROKE)
52 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON)
53 0x201D ” (RIGHT DOUBLE QUOTATION MARK)
54 0x00B6 ¶ (PILCROW SIGN)
55 0x00B7 · (MIDDLE DOT)
56 0x017E ž (LATIN SMALL LETTER Z WITH CARON)
57 0x010D č (LATIN SMALL LETTER C WITH CARON)
58 0x0219 ș (LATIN SMALL LETTER S WITH COMMA BELOW)
59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK)
60 0x0152 Œ (LATIN CAPITAL LIGATURE OE)
61 0x0153 œ (LATIN SMALL LIGATURE OE)
62 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS)
63 0x017C ż (LATIN SMALL LETTER Z WITH DOT ABOVE)
64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE)
65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE)
66 0x00C2 Â (LATIN CAPITAL LETTER A WITH CIRCUMFLEX)
67 0x0102 Ă (LATIN CAPITAL LETTER A WITH BREVE)
68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS)
69 0x0106 Ć (LATIN CAPITAL LETTER C WITH ACUTE)
70 0x00C6 Æ (LATIN CAPITAL LETTER AE)
71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA)
72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE)
73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE)
74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX)
75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS)
76 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE)
77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE)
78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX)
79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS)
80 0x0110 Đ (LATIN CAPITAL LETTER D WITH STROKE)
81 0x0143 Ń (LATIN CAPITAL LETTER N WITH ACUTE)
82 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE)
83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE)
84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX)
85 0x0150 Ő (LATIN CAPITAL LETTER O WITH DOUBLE ACUTE)
86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS)
87 0x015A Ś (LATIN CAPITAL LETTER S WITH ACUTE)
88 0x0170 Ű (LATIN CAPITAL LETTER U WITH DOUBLE ACUTE)
89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE)
90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE)
91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX)
92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS)
93 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK)
94 0x021A Ț (LATIN CAPITAL LETTER T WITH COMMA BELOW)
95 0x00DF ß (LATIN SMALL LETTER SHARP S)
96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE)
97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE)
98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX)
99 0x0103 ă (LATIN SMALL LETTER A WITH BREVE)
100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS)
101 0x0107 ć (LATIN SMALL LETTER C WITH ACUTE)
102 0x00E6 æ (LATIN SMALL LETTER AE)
103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA)
104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE)
105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE)
106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX)
107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS)
108 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE)
109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE)
110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX)
111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS)
112 0x0111 đ (LATIN SMALL LETTER D WITH STROKE)
113 0x0144 ń (LATIN SMALL LETTER N WITH ACUTE)
114 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE)
115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE)
116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX)
117 0x0151 ő (LATIN SMALL LETTER O WITH DOUBLE ACUTE)
118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS)
119 0x015B ś (LATIN SMALL LETTER S WITH ACUTE)
120 0x0171 ű (LATIN SMALL LETTER U WITH DOUBLE ACUTE)
121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE)
122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE)
123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX)
124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS)
125 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK)
126 0x021B ț (LATIN SMALL LETTER T WITH COMMA BELOW)
127 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS)

View File

@ -1,134 +0,0 @@
# For details on index index-iso-8859-2.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: 9569c67f22d0b57790e1c407c6eecf227e4562322dc296de43cdab7a0152ec73
# Date: 2018-01-06
0 0x0080 € (<control>)
1 0x0081  (<control>)
2 0x0082 ‚ (<control>)
3 0x0083 ƒ (<control>)
4 0x0084 „ (<control>)
5 0x0085 … (<control>)
6 0x0086 † (<control>)
7 0x0087 ‡ (<control>)
8 0x0088 ˆ (<control>)
9 0x0089 ‰ (<control>)
10 0x008A Š (<control>)
11 0x008B ‹ (<control>)
12 0x008C Œ (<control>)
13 0x008D  (<control>)
14 0x008E Ž (<control>)
15 0x008F  (<control>)
16 0x0090  (<control>)
17 0x0091 ‘ (<control>)
18 0x0092 ’ (<control>)
19 0x0093 “ (<control>)
20 0x0094 ” (<control>)
21 0x0095 • (<control>)
22 0x0096 – (<control>)
23 0x0097 — (<control>)
24 0x0098 ˜ (<control>)
25 0x0099 ™ (<control>)
26 0x009A š (<control>)
27 0x009B › (<control>)
28 0x009C œ (<control>)
29 0x009D  (<control>)
30 0x009E ž (<control>)
31 0x009F Ÿ (<control>)
32 0x00A0   (NO-BREAK SPACE)
33 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK)
34 0x02D8 ˘ (BREVE)
35 0x0141 Ł (LATIN CAPITAL LETTER L WITH STROKE)
36 0x00A4 ¤ (CURRENCY SIGN)
37 0x013D Ľ (LATIN CAPITAL LETTER L WITH CARON)
38 0x015A Ś (LATIN CAPITAL LETTER S WITH ACUTE)
39 0x00A7 § (SECTION SIGN)
40 0x00A8 ¨ (DIAERESIS)
41 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON)
42 0x015E Ş (LATIN CAPITAL LETTER S WITH CEDILLA)
43 0x0164 Ť (LATIN CAPITAL LETTER T WITH CARON)
44 0x0179 Ź (LATIN CAPITAL LETTER Z WITH ACUTE)
45 0x00AD ­ (SOFT HYPHEN)
46 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON)
47 0x017B Ż (LATIN CAPITAL LETTER Z WITH DOT ABOVE)
48 0x00B0 ° (DEGREE SIGN)
49 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK)
50 0x02DB ˛ (OGONEK)
51 0x0142 ł (LATIN SMALL LETTER L WITH STROKE)
52 0x00B4 ´ (ACUTE ACCENT)
53 0x013E ľ (LATIN SMALL LETTER L WITH CARON)
54 0x015B ś (LATIN SMALL LETTER S WITH ACUTE)
55 0x02C7 ˇ (CARON)
56 0x00B8 ¸ (CEDILLA)
57 0x0161 š (LATIN SMALL LETTER S WITH CARON)
58 0x015F ş (LATIN SMALL LETTER S WITH CEDILLA)
59 0x0165 ť (LATIN SMALL LETTER T WITH CARON)
60 0x017A ź (LATIN SMALL LETTER Z WITH ACUTE)
61 0x02DD ˝ (DOUBLE ACUTE ACCENT)
62 0x017E ž (LATIN SMALL LETTER Z WITH CARON)
63 0x017C ż (LATIN SMALL LETTER Z WITH DOT ABOVE)
64 0x0154 Ŕ (LATIN CAPITAL LETTER R WITH ACUTE)
65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE)
66 0x00C2 Â (LATIN CAPITAL LETTER A WITH CIRCUMFLEX)
67 0x0102 Ă (LATIN CAPITAL LETTER A WITH BREVE)
68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS)
69 0x0139 Ĺ (LATIN CAPITAL LETTER L WITH ACUTE)
70 0x0106 Ć (LATIN CAPITAL LETTER C WITH ACUTE)
71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA)
72 0x010C Č (LATIN CAPITAL LETTER C WITH CARON)
73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE)
74 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK)
75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS)
76 0x011A Ě (LATIN CAPITAL LETTER E WITH CARON)
77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE)
78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX)
79 0x010E Ď (LATIN CAPITAL LETTER D WITH CARON)
80 0x0110 Đ (LATIN CAPITAL LETTER D WITH STROKE)
81 0x0143 Ń (LATIN CAPITAL LETTER N WITH ACUTE)
82 0x0147 Ň (LATIN CAPITAL LETTER N WITH CARON)
83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE)
84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX)
85 0x0150 Ő (LATIN CAPITAL LETTER O WITH DOUBLE ACUTE)
86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS)
87 0x00D7 × (MULTIPLICATION SIGN)
88 0x0158 Ř (LATIN CAPITAL LETTER R WITH CARON)
89 0x016E Ů (LATIN CAPITAL LETTER U WITH RING ABOVE)
90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE)
91 0x0170 Ű (LATIN CAPITAL LETTER U WITH DOUBLE ACUTE)
92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS)
93 0x00DD Ý (LATIN CAPITAL LETTER Y WITH ACUTE)
94 0x0162 Ţ (LATIN CAPITAL LETTER T WITH CEDILLA)
95 0x00DF ß (LATIN SMALL LETTER SHARP S)
96 0x0155 ŕ (LATIN SMALL LETTER R WITH ACUTE)
97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE)
98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX)
99 0x0103 ă (LATIN SMALL LETTER A WITH BREVE)
100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS)
101 0x013A ĺ (LATIN SMALL LETTER L WITH ACUTE)
102 0x0107 ć (LATIN SMALL LETTER C WITH ACUTE)
103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA)
104 0x010D č (LATIN SMALL LETTER C WITH CARON)
105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE)
106 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK)
107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS)
108 0x011B ě (LATIN SMALL LETTER E WITH CARON)
109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE)
110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX)
111 0x010F ď (LATIN SMALL LETTER D WITH CARON)
112 0x0111 đ (LATIN SMALL LETTER D WITH STROKE)
113 0x0144 ń (LATIN SMALL LETTER N WITH ACUTE)
114 0x0148 ň (LATIN SMALL LETTER N WITH CARON)
115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE)
116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX)
117 0x0151 ő (LATIN SMALL LETTER O WITH DOUBLE ACUTE)
118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS)
119 0x00F7 ÷ (DIVISION SIGN)
120 0x0159 ř (LATIN SMALL LETTER R WITH CARON)
121 0x016F ů (LATIN SMALL LETTER U WITH RING ABOVE)
122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE)
123 0x0171 ű (LATIN SMALL LETTER U WITH DOUBLE ACUTE)
124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS)
125 0x00FD ý (LATIN SMALL LETTER Y WITH ACUTE)
126 0x0163 ţ (LATIN SMALL LETTER T WITH CEDILLA)
127 0x02D9 ˙ (DOT ABOVE)

View File

@ -1,127 +0,0 @@
# For details on index index-iso-8859-3.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: af8f1e12df79b768322b5e83613698cdc619438270a2fc359554331c805054a3
# Date: 2018-01-06
0 0x0080 € (<control>)
1 0x0081  (<control>)
2 0x0082 ‚ (<control>)
3 0x0083 ƒ (<control>)
4 0x0084 „ (<control>)
5 0x0085 … (<control>)
6 0x0086 † (<control>)
7 0x0087 ‡ (<control>)
8 0x0088 ˆ (<control>)
9 0x0089 ‰ (<control>)
10 0x008A Š (<control>)
11 0x008B ‹ (<control>)
12 0x008C Œ (<control>)
13 0x008D  (<control>)
14 0x008E Ž (<control>)
15 0x008F  (<control>)
16 0x0090  (<control>)
17 0x0091 ‘ (<control>)
18 0x0092 ’ (<control>)
19 0x0093 “ (<control>)
20 0x0094 ” (<control>)
21 0x0095 • (<control>)
22 0x0096 – (<control>)
23 0x0097 — (<control>)
24 0x0098 ˜ (<control>)
25 0x0099 ™ (<control>)
26 0x009A š (<control>)
27 0x009B › (<control>)
28 0x009C œ (<control>)
29 0x009D  (<control>)
30 0x009E ž (<control>)
31 0x009F Ÿ (<control>)
32 0x00A0   (NO-BREAK SPACE)
33 0x0126 Ħ (LATIN CAPITAL LETTER H WITH STROKE)
34 0x02D8 ˘ (BREVE)
35 0x00A3 £ (POUND SIGN)
36 0x00A4 ¤ (CURRENCY SIGN)
38 0x0124 Ĥ (LATIN CAPITAL LETTER H WITH CIRCUMFLEX)
39 0x00A7 § (SECTION SIGN)
40 0x00A8 ¨ (DIAERESIS)
41 0x0130 İ (LATIN CAPITAL LETTER I WITH DOT ABOVE)
42 0x015E Ş (LATIN CAPITAL LETTER S WITH CEDILLA)
43 0x011E Ğ (LATIN CAPITAL LETTER G WITH BREVE)
44 0x0134 Ĵ (LATIN CAPITAL LETTER J WITH CIRCUMFLEX)
45 0x00AD ­ (SOFT HYPHEN)
47 0x017B Ż (LATIN CAPITAL LETTER Z WITH DOT ABOVE)
48 0x00B0 ° (DEGREE SIGN)
49 0x0127 ħ (LATIN SMALL LETTER H WITH STROKE)
50 0x00B2 ² (SUPERSCRIPT TWO)
51 0x00B3 ³ (SUPERSCRIPT THREE)
52 0x00B4 ´ (ACUTE ACCENT)
53 0x00B5 µ (MICRO SIGN)
54 0x0125 ĥ (LATIN SMALL LETTER H WITH CIRCUMFLEX)
55 0x00B7 · (MIDDLE DOT)
56 0x00B8 ¸ (CEDILLA)
57 0x0131 ı (LATIN SMALL LETTER DOTLESS I)
58 0x015F ş (LATIN SMALL LETTER S WITH CEDILLA)
59 0x011F ğ (LATIN SMALL LETTER G WITH BREVE)
60 0x0135 ĵ (LATIN SMALL LETTER J WITH CIRCUMFLEX)
61 0x00BD ½ (VULGAR FRACTION ONE HALF)
63 0x017C ż (LATIN SMALL LETTER Z WITH DOT ABOVE)
64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE)
65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE)
66 0x00C2 Â (LATIN CAPITAL LETTER A WITH CIRCUMFLEX)
68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS)
69 0x010A Ċ (LATIN CAPITAL LETTER C WITH DOT ABOVE)
70 0x0108 Ĉ (LATIN CAPITAL LETTER C WITH CIRCUMFLEX)
71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA)
72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE)
73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE)
74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX)
75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS)
76 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE)
77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE)
78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX)
79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS)
81 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE)
82 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE)
83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE)
84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX)
85 0x0120 Ġ (LATIN CAPITAL LETTER G WITH DOT ABOVE)
86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS)
87 0x00D7 × (MULTIPLICATION SIGN)
88 0x011C Ĝ (LATIN CAPITAL LETTER G WITH CIRCUMFLEX)
89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE)
90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE)
91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX)
92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS)
93 0x016C Ŭ (LATIN CAPITAL LETTER U WITH BREVE)
94 0x015C Ŝ (LATIN CAPITAL LETTER S WITH CIRCUMFLEX)
95 0x00DF ß (LATIN SMALL LETTER SHARP S)
96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE)
97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE)
98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX)
100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS)
101 0x010B ċ (LATIN SMALL LETTER C WITH DOT ABOVE)
102 0x0109 ĉ (LATIN SMALL LETTER C WITH CIRCUMFLEX)
103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA)
104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE)
105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE)
106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX)
107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS)
108 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE)
109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE)
110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX)
111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS)
113 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE)
114 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE)
115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE)
116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX)
117 0x0121 ġ (LATIN SMALL LETTER G WITH DOT ABOVE)
118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS)
119 0x00F7 ÷ (DIVISION SIGN)
120 0x011D ĝ (LATIN SMALL LETTER G WITH CIRCUMFLEX)
121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE)
122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE)
123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX)
124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS)
125 0x016D ŭ (LATIN SMALL LETTER U WITH BREVE)
126 0x015D ŝ (LATIN SMALL LETTER S WITH CIRCUMFLEX)
127 0x02D9 ˙ (DOT ABOVE)

View File

@ -1,134 +0,0 @@
# For details on index index-iso-8859-4.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: 72f29c92344d351fe9e74a946e7e0468d76d542c6894ff82982cb652ebe0feb7
# Date: 2018-01-06
0 0x0080 € (<control>)
1 0x0081  (<control>)
2 0x0082 ‚ (<control>)
3 0x0083 ƒ (<control>)
4 0x0084 „ (<control>)
5 0x0085 … (<control>)
6 0x0086 † (<control>)
7 0x0087 ‡ (<control>)
8 0x0088 ˆ (<control>)
9 0x0089 ‰ (<control>)
10 0x008A Š (<control>)
11 0x008B ‹ (<control>)
12 0x008C Œ (<control>)
13 0x008D  (<control>)
14 0x008E Ž (<control>)
15 0x008F  (<control>)
16 0x0090  (<control>)
17 0x0091 ‘ (<control>)
18 0x0092 ’ (<control>)
19 0x0093 “ (<control>)
20 0x0094 ” (<control>)
21 0x0095 • (<control>)
22 0x0096 – (<control>)
23 0x0097 — (<control>)
24 0x0098 ˜ (<control>)
25 0x0099 ™ (<control>)
26 0x009A š (<control>)
27 0x009B › (<control>)
28 0x009C œ (<control>)
29 0x009D  (<control>)
30 0x009E ž (<control>)
31 0x009F Ÿ (<control>)
32 0x00A0   (NO-BREAK SPACE)
33 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK)
34 0x0138 ĸ (LATIN SMALL LETTER KRA)
35 0x0156 Ŗ (LATIN CAPITAL LETTER R WITH CEDILLA)
36 0x00A4 ¤ (CURRENCY SIGN)
37 0x0128 Ĩ (LATIN CAPITAL LETTER I WITH TILDE)
38 0x013B Ļ (LATIN CAPITAL LETTER L WITH CEDILLA)
39 0x00A7 § (SECTION SIGN)
40 0x00A8 ¨ (DIAERESIS)
41 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON)
42 0x0112 Ē (LATIN CAPITAL LETTER E WITH MACRON)
43 0x0122 Ģ (LATIN CAPITAL LETTER G WITH CEDILLA)
44 0x0166 Ŧ (LATIN CAPITAL LETTER T WITH STROKE)
45 0x00AD ­ (SOFT HYPHEN)
46 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON)
47 0x00AF ¯ (MACRON)
48 0x00B0 ° (DEGREE SIGN)
49 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK)
50 0x02DB ˛ (OGONEK)
51 0x0157 ŗ (LATIN SMALL LETTER R WITH CEDILLA)
52 0x00B4 ´ (ACUTE ACCENT)
53 0x0129 ĩ (LATIN SMALL LETTER I WITH TILDE)
54 0x013C ļ (LATIN SMALL LETTER L WITH CEDILLA)
55 0x02C7 ˇ (CARON)
56 0x00B8 ¸ (CEDILLA)
57 0x0161 š (LATIN SMALL LETTER S WITH CARON)
58 0x0113 ē (LATIN SMALL LETTER E WITH MACRON)
59 0x0123 ģ (LATIN SMALL LETTER G WITH CEDILLA)
60 0x0167 ŧ (LATIN SMALL LETTER T WITH STROKE)
61 0x014A Ŋ (LATIN CAPITAL LETTER ENG)
62 0x017E ž (LATIN SMALL LETTER Z WITH CARON)
63 0x014B ŋ (LATIN SMALL LETTER ENG)
64 0x0100 Ā (LATIN CAPITAL LETTER A WITH MACRON)
65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE)
66 0x00C2 Â (LATIN CAPITAL LETTER A WITH CIRCUMFLEX)
67 0x00C3 Ã (LATIN CAPITAL LETTER A WITH TILDE)
68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS)
69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE)
70 0x00C6 Æ (LATIN CAPITAL LETTER AE)
71 0x012E Į (LATIN CAPITAL LETTER I WITH OGONEK)
72 0x010C Č (LATIN CAPITAL LETTER C WITH CARON)
73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE)
74 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK)
75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS)
76 0x0116 Ė (LATIN CAPITAL LETTER E WITH DOT ABOVE)
77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE)
78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX)
79 0x012A Ī (LATIN CAPITAL LETTER I WITH MACRON)
80 0x0110 Đ (LATIN CAPITAL LETTER D WITH STROKE)
81 0x0145 Ņ (LATIN CAPITAL LETTER N WITH CEDILLA)
82 0x014C Ō (LATIN CAPITAL LETTER O WITH MACRON)
83 0x0136 Ķ (LATIN CAPITAL LETTER K WITH CEDILLA)
84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX)
85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE)
86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS)
87 0x00D7 × (MULTIPLICATION SIGN)
88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE)
89 0x0172 Ų (LATIN CAPITAL LETTER U WITH OGONEK)
90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE)
91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX)
92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS)
93 0x0168 Ũ (LATIN CAPITAL LETTER U WITH TILDE)
94 0x016A Ū (LATIN CAPITAL LETTER U WITH MACRON)
95 0x00DF ß (LATIN SMALL LETTER SHARP S)
96 0x0101 ā (LATIN SMALL LETTER A WITH MACRON)
97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE)
98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX)
99 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE)
100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS)
101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE)
102 0x00E6 æ (LATIN SMALL LETTER AE)
103 0x012F į (LATIN SMALL LETTER I WITH OGONEK)
104 0x010D č (LATIN SMALL LETTER C WITH CARON)
105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE)
106 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK)
107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS)
108 0x0117 ė (LATIN SMALL LETTER E WITH DOT ABOVE)
109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE)
110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX)
111 0x012B ī (LATIN SMALL LETTER I WITH MACRON)
112 0x0111 đ (LATIN SMALL LETTER D WITH STROKE)
113 0x0146 ņ (LATIN SMALL LETTER N WITH CEDILLA)
114 0x014D ō (LATIN SMALL LETTER O WITH MACRON)
115 0x0137 ķ (LATIN SMALL LETTER K WITH CEDILLA)
116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX)
117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE)
118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS)
119 0x00F7 ÷ (DIVISION SIGN)
120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE)
121 0x0173 ų (LATIN SMALL LETTER U WITH OGONEK)
122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE)
123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX)
124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS)
125 0x0169 ũ (LATIN SMALL LETTER U WITH TILDE)
126 0x016B ū (LATIN SMALL LETTER U WITH MACRON)
127 0x02D9 ˙ (DOT ABOVE)

View File

@ -1,134 +0,0 @@
# For details on index index-iso-8859-5.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: fa9b1f3f5242df43e2e7bca80e9b6997c67944f20a4af91ee06bacc4e132d9c9
# Date: 2018-01-06
0 0x0080 € (<control>)
1 0x0081  (<control>)
2 0x0082 ‚ (<control>)
3 0x0083 ƒ (<control>)
4 0x0084 „ (<control>)
5 0x0085 … (<control>)
6 0x0086 † (<control>)
7 0x0087 ‡ (<control>)
8 0x0088 ˆ (<control>)
9 0x0089 ‰ (<control>)
10 0x008A Š (<control>)
11 0x008B ‹ (<control>)
12 0x008C Œ (<control>)
13 0x008D  (<control>)
14 0x008E Ž (<control>)
15 0x008F  (<control>)
16 0x0090  (<control>)
17 0x0091 ‘ (<control>)
18 0x0092 ’ (<control>)
19 0x0093 “ (<control>)
20 0x0094 ” (<control>)
21 0x0095 • (<control>)
22 0x0096 – (<control>)
23 0x0097 — (<control>)
24 0x0098 ˜ (<control>)
25 0x0099 ™ (<control>)
26 0x009A š (<control>)
27 0x009B › (<control>)
28 0x009C œ (<control>)
29 0x009D  (<control>)
30 0x009E ž (<control>)
31 0x009F Ÿ (<control>)
32 0x00A0   (NO-BREAK SPACE)
33 0x0401 Ё (CYRILLIC CAPITAL LETTER IO)
34 0x0402 Ђ (CYRILLIC CAPITAL LETTER DJE)
35 0x0403 Ѓ (CYRILLIC CAPITAL LETTER GJE)
36 0x0404 Є (CYRILLIC CAPITAL LETTER UKRAINIAN IE)
37 0x0405 Ѕ (CYRILLIC CAPITAL LETTER DZE)
38 0x0406 І (CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I)
39 0x0407 Ї (CYRILLIC CAPITAL LETTER YI)
40 0x0408 Ј (CYRILLIC CAPITAL LETTER JE)
41 0x0409 Љ (CYRILLIC CAPITAL LETTER LJE)
42 0x040A Њ (CYRILLIC CAPITAL LETTER NJE)
43 0x040B Ћ (CYRILLIC CAPITAL LETTER TSHE)
44 0x040C Ќ (CYRILLIC CAPITAL LETTER KJE)
45 0x00AD ­ (SOFT HYPHEN)
46 0x040E Ў (CYRILLIC CAPITAL LETTER SHORT U)
47 0x040F Џ (CYRILLIC CAPITAL LETTER DZHE)
48 0x0410 А (CYRILLIC CAPITAL LETTER A)
49 0x0411 Б (CYRILLIC CAPITAL LETTER BE)
50 0x0412 В (CYRILLIC CAPITAL LETTER VE)
51 0x0413 Г (CYRILLIC CAPITAL LETTER GHE)
52 0x0414 Д (CYRILLIC CAPITAL LETTER DE)
53 0x0415 Е (CYRILLIC CAPITAL LETTER IE)
54 0x0416 Ж (CYRILLIC CAPITAL LETTER ZHE)
55 0x0417 З (CYRILLIC CAPITAL LETTER ZE)
56 0x0418 И (CYRILLIC CAPITAL LETTER I)
57 0x0419 Й (CYRILLIC CAPITAL LETTER SHORT I)
58 0x041A К (CYRILLIC CAPITAL LETTER KA)
59 0x041B Л (CYRILLIC CAPITAL LETTER EL)
60 0x041C М (CYRILLIC CAPITAL LETTER EM)
61 0x041D Н (CYRILLIC CAPITAL LETTER EN)
62 0x041E О (CYRILLIC CAPITAL LETTER O)
63 0x041F П (CYRILLIC CAPITAL LETTER PE)
64 0x0420 Р (CYRILLIC CAPITAL LETTER ER)
65 0x0421 С (CYRILLIC CAPITAL LETTER ES)
66 0x0422 Т (CYRILLIC CAPITAL LETTER TE)
67 0x0423 У (CYRILLIC CAPITAL LETTER U)
68 0x0424 Ф (CYRILLIC CAPITAL LETTER EF)
69 0x0425 Х (CYRILLIC CAPITAL LETTER HA)
70 0x0426 Ц (CYRILLIC CAPITAL LETTER TSE)
71 0x0427 Ч (CYRILLIC CAPITAL LETTER CHE)
72 0x0428 Ш (CYRILLIC CAPITAL LETTER SHA)
73 0x0429 Щ (CYRILLIC CAPITAL LETTER SHCHA)
74 0x042A Ъ (CYRILLIC CAPITAL LETTER HARD SIGN)
75 0x042B Ы (CYRILLIC CAPITAL LETTER YERU)
76 0x042C Ь (CYRILLIC CAPITAL LETTER SOFT SIGN)
77 0x042D Э (CYRILLIC CAPITAL LETTER E)
78 0x042E Ю (CYRILLIC CAPITAL LETTER YU)
79 0x042F Я (CYRILLIC CAPITAL LETTER YA)
80 0x0430 а (CYRILLIC SMALL LETTER A)
81 0x0431 б (CYRILLIC SMALL LETTER BE)
82 0x0432 в (CYRILLIC SMALL LETTER VE)
83 0x0433 г (CYRILLIC SMALL LETTER GHE)
84 0x0434 д (CYRILLIC SMALL LETTER DE)
85 0x0435 е (CYRILLIC SMALL LETTER IE)
86 0x0436 ж (CYRILLIC SMALL LETTER ZHE)
87 0x0437 з (CYRILLIC SMALL LETTER ZE)
88 0x0438 и (CYRILLIC SMALL LETTER I)
89 0x0439 й (CYRILLIC SMALL LETTER SHORT I)
90 0x043A к (CYRILLIC SMALL LETTER KA)
91 0x043B л (CYRILLIC SMALL LETTER EL)
92 0x043C м (CYRILLIC SMALL LETTER EM)
93 0x043D н (CYRILLIC SMALL LETTER EN)
94 0x043E о (CYRILLIC SMALL LETTER O)
95 0x043F п (CYRILLIC SMALL LETTER PE)
96 0x0440 р (CYRILLIC SMALL LETTER ER)
97 0x0441 с (CYRILLIC SMALL LETTER ES)
98 0x0442 т (CYRILLIC SMALL LETTER TE)
99 0x0443 у (CYRILLIC SMALL LETTER U)
100 0x0444 ф (CYRILLIC SMALL LETTER EF)
101 0x0445 х (CYRILLIC SMALL LETTER HA)
102 0x0446 ц (CYRILLIC SMALL LETTER TSE)
103 0x0447 ч (CYRILLIC SMALL LETTER CHE)
104 0x0448 ш (CYRILLIC SMALL LETTER SHA)
105 0x0449 щ (CYRILLIC SMALL LETTER SHCHA)
106 0x044A ъ (CYRILLIC SMALL LETTER HARD SIGN)
107 0x044B ы (CYRILLIC SMALL LETTER YERU)
108 0x044C ь (CYRILLIC SMALL LETTER SOFT SIGN)
109 0x044D э (CYRILLIC SMALL LETTER E)
110 0x044E ю (CYRILLIC SMALL LETTER YU)
111 0x044F я (CYRILLIC SMALL LETTER YA)
112 0x2116 № (NUMERO SIGN)
113 0x0451 ё (CYRILLIC SMALL LETTER IO)
114 0x0452 ђ (CYRILLIC SMALL LETTER DJE)
115 0x0453 ѓ (CYRILLIC SMALL LETTER GJE)
116 0x0454 є (CYRILLIC SMALL LETTER UKRAINIAN IE)
117 0x0455 ѕ (CYRILLIC SMALL LETTER DZE)
118 0x0456 і (CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I)
119 0x0457 ї (CYRILLIC SMALL LETTER YI)
120 0x0458 ј (CYRILLIC SMALL LETTER JE)
121 0x0459 љ (CYRILLIC SMALL LETTER LJE)
122 0x045A њ (CYRILLIC SMALL LETTER NJE)
123 0x045B ћ (CYRILLIC SMALL LETTER TSHE)
124 0x045C ќ (CYRILLIC SMALL LETTER KJE)
125 0x00A7 § (SECTION SIGN)
126 0x045E ў (CYRILLIC SMALL LETTER SHORT U)
127 0x045F џ (CYRILLIC SMALL LETTER DZHE)

View File

@ -1,89 +0,0 @@
# For details on index index-iso-8859-6.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: 85bb7b5c2dc75975afebe5743935ba4ed5a09c1e9e34e9bfb2ff80293f5d8bbc
# Date: 2018-01-06
0 0x0080 € (<control>)
1 0x0081  (<control>)
2 0x0082 ‚ (<control>)
3 0x0083 ƒ (<control>)
4 0x0084 „ (<control>)
5 0x0085 … (<control>)
6 0x0086 † (<control>)
7 0x0087 ‡ (<control>)
8 0x0088 ˆ (<control>)
9 0x0089 ‰ (<control>)
10 0x008A Š (<control>)
11 0x008B ‹ (<control>)
12 0x008C Œ (<control>)
13 0x008D  (<control>)
14 0x008E Ž (<control>)
15 0x008F  (<control>)
16 0x0090  (<control>)
17 0x0091 ‘ (<control>)
18 0x0092 ’ (<control>)
19 0x0093 “ (<control>)
20 0x0094 ” (<control>)
21 0x0095 • (<control>)
22 0x0096 – (<control>)
23 0x0097 — (<control>)
24 0x0098 ˜ (<control>)
25 0x0099 ™ (<control>)
26 0x009A š (<control>)
27 0x009B › (<control>)
28 0x009C œ (<control>)
29 0x009D  (<control>)
30 0x009E ž (<control>)
31 0x009F Ÿ (<control>)
32 0x00A0   (NO-BREAK SPACE)
36 0x00A4 ¤ (CURRENCY SIGN)
44 0x060C ، (ARABIC COMMA)
45 0x00AD ­ (SOFT HYPHEN)
59 0x061B ؛ (ARABIC SEMICOLON)
63 0x061F ؟ (ARABIC QUESTION MARK)
65 0x0621 ء (ARABIC LETTER HAMZA)
66 0x0622 آ (ARABIC LETTER ALEF WITH MADDA ABOVE)
67 0x0623 أ (ARABIC LETTER ALEF WITH HAMZA ABOVE)
68 0x0624 ؤ (ARABIC LETTER WAW WITH HAMZA ABOVE)
69 0x0625 إ (ARABIC LETTER ALEF WITH HAMZA BELOW)
70 0x0626 ئ (ARABIC LETTER YEH WITH HAMZA ABOVE)
71 0x0627 ا (ARABIC LETTER ALEF)
72 0x0628 ب (ARABIC LETTER BEH)
73 0x0629 ة (ARABIC LETTER TEH MARBUTA)
74 0x062A ت (ARABIC LETTER TEH)
75 0x062B ث (ARABIC LETTER THEH)
76 0x062C ج (ARABIC LETTER JEEM)
77 0x062D ح (ARABIC LETTER HAH)
78 0x062E خ (ARABIC LETTER KHAH)
79 0x062F د (ARABIC LETTER DAL)
80 0x0630 ذ (ARABIC LETTER THAL)
81 0x0631 ر (ARABIC LETTER REH)
82 0x0632 ز (ARABIC LETTER ZAIN)
83 0x0633 س (ARABIC LETTER SEEN)
84 0x0634 ش (ARABIC LETTER SHEEN)
85 0x0635 ص (ARABIC LETTER SAD)
86 0x0636 ض (ARABIC LETTER DAD)
87 0x0637 ط (ARABIC LETTER TAH)
88 0x0638 ظ (ARABIC LETTER ZAH)
89 0x0639 ع (ARABIC LETTER AIN)
90 0x063A غ (ARABIC LETTER GHAIN)
96 0x0640 ـ (ARABIC TATWEEL)
97 0x0641 ف (ARABIC LETTER FEH)
98 0x0642 ق (ARABIC LETTER QAF)
99 0x0643 ك (ARABIC LETTER KAF)
100 0x0644 ل (ARABIC LETTER LAM)
101 0x0645 م (ARABIC LETTER MEEM)
102 0x0646 ن (ARABIC LETTER NOON)
103 0x0647 ه (ARABIC LETTER HEH)
104 0x0648 و (ARABIC LETTER WAW)
105 0x0649 ى (ARABIC LETTER ALEF MAKSURA)
106 0x064A ي (ARABIC LETTER YEH)
107 0x064B ً (ARABIC FATHATAN)
108 0x064C ٌ (ARABIC DAMMATAN)
109 0x064D ٍ (ARABIC KASRATAN)
110 0x064E َ (ARABIC FATHA)
111 0x064F ُ (ARABIC DAMMA)
112 0x0650 ِ (ARABIC KASRA)
113 0x0651 ّ (ARABIC SHADDA)
114 0x0652 ْ (ARABIC SUKUN)

View File

@ -1,131 +0,0 @@
# For details on index index-iso-8859-7.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: f53d8aeba36314ef950eef02ffcf11dff540638ce27dfe7a86b6ccc6875afb24
# Date: 2018-01-06
0 0x0080 € (<control>)
1 0x0081  (<control>)
2 0x0082 ‚ (<control>)
3 0x0083 ƒ (<control>)
4 0x0084 „ (<control>)
5 0x0085 … (<control>)
6 0x0086 † (<control>)
7 0x0087 ‡ (<control>)
8 0x0088 ˆ (<control>)
9 0x0089 ‰ (<control>)
10 0x008A Š (<control>)
11 0x008B ‹ (<control>)
12 0x008C Œ (<control>)
13 0x008D  (<control>)
14 0x008E Ž (<control>)
15 0x008F  (<control>)
16 0x0090  (<control>)
17 0x0091 ‘ (<control>)
18 0x0092 ’ (<control>)
19 0x0093 “ (<control>)
20 0x0094 ” (<control>)
21 0x0095 • (<control>)
22 0x0096 – (<control>)
23 0x0097 — (<control>)
24 0x0098 ˜ (<control>)
25 0x0099 ™ (<control>)
26 0x009A š (<control>)
27 0x009B › (<control>)
28 0x009C œ (<control>)
29 0x009D  (<control>)
30 0x009E ž (<control>)
31 0x009F Ÿ (<control>)
32 0x00A0   (NO-BREAK SPACE)
33 0x2018 (LEFT SINGLE QUOTATION MARK)
34 0x2019 (RIGHT SINGLE QUOTATION MARK)
35 0x00A3 £ (POUND SIGN)
36 0x20AC € (EURO SIGN)
37 0x20AF ₯ (DRACHMA SIGN)
38 0x00A6 ¦ (BROKEN BAR)
39 0x00A7 § (SECTION SIGN)
40 0x00A8 ¨ (DIAERESIS)
41 0x00A9 © (COPYRIGHT SIGN)
42 0x037A ͺ (GREEK YPOGEGRAMMENI)
43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK)
44 0x00AC ¬ (NOT SIGN)
45 0x00AD ­ (SOFT HYPHEN)
47 0x2015 ― (HORIZONTAL BAR)
48 0x00B0 ° (DEGREE SIGN)
49 0x00B1 ± (PLUS-MINUS SIGN)
50 0x00B2 ² (SUPERSCRIPT TWO)
51 0x00B3 ³ (SUPERSCRIPT THREE)
52 0x0384 ΄ (GREEK TONOS)
53 0x0385 ΅ (GREEK DIALYTIKA TONOS)
54 0x0386 Ά (GREEK CAPITAL LETTER ALPHA WITH TONOS)
55 0x00B7 · (MIDDLE DOT)
56 0x0388 Έ (GREEK CAPITAL LETTER EPSILON WITH TONOS)
57 0x0389 Ή (GREEK CAPITAL LETTER ETA WITH TONOS)
58 0x038A Ί (GREEK CAPITAL LETTER IOTA WITH TONOS)
59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK)
60 0x038C Ό (GREEK CAPITAL LETTER OMICRON WITH TONOS)
61 0x00BD ½ (VULGAR FRACTION ONE HALF)
62 0x038E Ύ (GREEK CAPITAL LETTER UPSILON WITH TONOS)
63 0x038F Ώ (GREEK CAPITAL LETTER OMEGA WITH TONOS)
64 0x0390 ΐ (GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS)
65 0x0391 Α (GREEK CAPITAL LETTER ALPHA)
66 0x0392 Β (GREEK CAPITAL LETTER BETA)
67 0x0393 Γ (GREEK CAPITAL LETTER GAMMA)
68 0x0394 Δ (GREEK CAPITAL LETTER DELTA)
69 0x0395 Ε (GREEK CAPITAL LETTER EPSILON)
70 0x0396 Ζ (GREEK CAPITAL LETTER ZETA)
71 0x0397 Η (GREEK CAPITAL LETTER ETA)
72 0x0398 Θ (GREEK CAPITAL LETTER THETA)
73 0x0399 Ι (GREEK CAPITAL LETTER IOTA)
74 0x039A Κ (GREEK CAPITAL LETTER KAPPA)
75 0x039B Λ (GREEK CAPITAL LETTER LAMDA)
76 0x039C Μ (GREEK CAPITAL LETTER MU)
77 0x039D Ν (GREEK CAPITAL LETTER NU)
78 0x039E Ξ (GREEK CAPITAL LETTER XI)
79 0x039F Ο (GREEK CAPITAL LETTER OMICRON)
80 0x03A0 Π (GREEK CAPITAL LETTER PI)
81 0x03A1 Ρ (GREEK CAPITAL LETTER RHO)
83 0x03A3 Σ (GREEK CAPITAL LETTER SIGMA)
84 0x03A4 Τ (GREEK CAPITAL LETTER TAU)
85 0x03A5 Υ (GREEK CAPITAL LETTER UPSILON)
86 0x03A6 Φ (GREEK CAPITAL LETTER PHI)
87 0x03A7 Χ (GREEK CAPITAL LETTER CHI)
88 0x03A8 Ψ (GREEK CAPITAL LETTER PSI)
89 0x03A9 Ω (GREEK CAPITAL LETTER OMEGA)
90 0x03AA Ϊ (GREEK CAPITAL LETTER IOTA WITH DIALYTIKA)
91 0x03AB Ϋ (GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA)
92 0x03AC ά (GREEK SMALL LETTER ALPHA WITH TONOS)
93 0x03AD έ (GREEK SMALL LETTER EPSILON WITH TONOS)
94 0x03AE ή (GREEK SMALL LETTER ETA WITH TONOS)
95 0x03AF ί (GREEK SMALL LETTER IOTA WITH TONOS)
96 0x03B0 ΰ (GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS)
97 0x03B1 α (GREEK SMALL LETTER ALPHA)
98 0x03B2 β (GREEK SMALL LETTER BETA)
99 0x03B3 γ (GREEK SMALL LETTER GAMMA)
100 0x03B4 δ (GREEK SMALL LETTER DELTA)
101 0x03B5 ε (GREEK SMALL LETTER EPSILON)
102 0x03B6 ζ (GREEK SMALL LETTER ZETA)
103 0x03B7 η (GREEK SMALL LETTER ETA)
104 0x03B8 θ (GREEK SMALL LETTER THETA)
105 0x03B9 ι (GREEK SMALL LETTER IOTA)
106 0x03BA κ (GREEK SMALL LETTER KAPPA)
107 0x03BB λ (GREEK SMALL LETTER LAMDA)
108 0x03BC μ (GREEK SMALL LETTER MU)
109 0x03BD ν (GREEK SMALL LETTER NU)
110 0x03BE ξ (GREEK SMALL LETTER XI)
111 0x03BF ο (GREEK SMALL LETTER OMICRON)
112 0x03C0 π (GREEK SMALL LETTER PI)
113 0x03C1 ρ (GREEK SMALL LETTER RHO)
114 0x03C2 ς (GREEK SMALL LETTER FINAL SIGMA)
115 0x03C3 σ (GREEK SMALL LETTER SIGMA)
116 0x03C4 τ (GREEK SMALL LETTER TAU)
117 0x03C5 υ (GREEK SMALL LETTER UPSILON)
118 0x03C6 φ (GREEK SMALL LETTER PHI)
119 0x03C7 χ (GREEK SMALL LETTER CHI)
120 0x03C8 ψ (GREEK SMALL LETTER PSI)
121 0x03C9 ω (GREEK SMALL LETTER OMEGA)
122 0x03CA ϊ (GREEK SMALL LETTER IOTA WITH DIALYTIKA)
123 0x03CB ϋ (GREEK SMALL LETTER UPSILON WITH DIALYTIKA)
124 0x03CC ό (GREEK SMALL LETTER OMICRON WITH TONOS)
125 0x03CD ύ (GREEK SMALL LETTER UPSILON WITH TONOS)
126 0x03CE ώ (GREEK SMALL LETTER OMEGA WITH TONOS)

View File

@ -1,98 +0,0 @@
# For details on index index-iso-8859-8.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: 7657a9ca3fa875990da960d3f812eea28dcd0ae6ed55a18d5394303c86f5484b
# Date: 2018-01-06
0 0x0080 € (<control>)
1 0x0081  (<control>)
2 0x0082 ‚ (<control>)
3 0x0083 ƒ (<control>)
4 0x0084 „ (<control>)
5 0x0085 … (<control>)
6 0x0086 † (<control>)
7 0x0087 ‡ (<control>)
8 0x0088 ˆ (<control>)
9 0x0089 ‰ (<control>)
10 0x008A Š (<control>)
11 0x008B ‹ (<control>)
12 0x008C Œ (<control>)
13 0x008D  (<control>)
14 0x008E Ž (<control>)
15 0x008F  (<control>)
16 0x0090  (<control>)
17 0x0091 ‘ (<control>)
18 0x0092 ’ (<control>)
19 0x0093 “ (<control>)
20 0x0094 ” (<control>)
21 0x0095 • (<control>)
22 0x0096 – (<control>)
23 0x0097 — (<control>)
24 0x0098 ˜ (<control>)
25 0x0099 ™ (<control>)
26 0x009A š (<control>)
27 0x009B › (<control>)
28 0x009C œ (<control>)
29 0x009D  (<control>)
30 0x009E ž (<control>)
31 0x009F Ÿ (<control>)
32 0x00A0   (NO-BREAK SPACE)
34 0x00A2 ¢ (CENT SIGN)
35 0x00A3 £ (POUND SIGN)
36 0x00A4 ¤ (CURRENCY SIGN)
37 0x00A5 ¥ (YEN SIGN)
38 0x00A6 ¦ (BROKEN BAR)
39 0x00A7 § (SECTION SIGN)
40 0x00A8 ¨ (DIAERESIS)
41 0x00A9 © (COPYRIGHT SIGN)
42 0x00D7 × (MULTIPLICATION SIGN)
43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK)
44 0x00AC ¬ (NOT SIGN)
45 0x00AD ­ (SOFT HYPHEN)
46 0x00AE ® (REGISTERED SIGN)
47 0x00AF ¯ (MACRON)
48 0x00B0 ° (DEGREE SIGN)
49 0x00B1 ± (PLUS-MINUS SIGN)
50 0x00B2 ² (SUPERSCRIPT TWO)
51 0x00B3 ³ (SUPERSCRIPT THREE)
52 0x00B4 ´ (ACUTE ACCENT)
53 0x00B5 µ (MICRO SIGN)
54 0x00B6 ¶ (PILCROW SIGN)
55 0x00B7 · (MIDDLE DOT)
56 0x00B8 ¸ (CEDILLA)
57 0x00B9 ¹ (SUPERSCRIPT ONE)
58 0x00F7 ÷ (DIVISION SIGN)
59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK)
60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER)
61 0x00BD ½ (VULGAR FRACTION ONE HALF)
62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS)
95 0x2017 ‗ (DOUBLE LOW LINE)
96 0x05D0 א (HEBREW LETTER ALEF)
97 0x05D1 ב (HEBREW LETTER BET)
98 0x05D2 ג (HEBREW LETTER GIMEL)
99 0x05D3 ד (HEBREW LETTER DALET)
100 0x05D4 ה (HEBREW LETTER HE)
101 0x05D5 ו (HEBREW LETTER VAV)
102 0x05D6 ז (HEBREW LETTER ZAYIN)
103 0x05D7 ח (HEBREW LETTER HET)
104 0x05D8 ט (HEBREW LETTER TET)
105 0x05D9 י (HEBREW LETTER YOD)
106 0x05DA ך (HEBREW LETTER FINAL KAF)
107 0x05DB כ (HEBREW LETTER KAF)
108 0x05DC ל (HEBREW LETTER LAMED)
109 0x05DD ם (HEBREW LETTER FINAL MEM)
110 0x05DE מ (HEBREW LETTER MEM)
111 0x05DF ן (HEBREW LETTER FINAL NUN)
112 0x05E0 נ (HEBREW LETTER NUN)
113 0x05E1 ס (HEBREW LETTER SAMEKH)
114 0x05E2 ע (HEBREW LETTER AYIN)
115 0x05E3 ף (HEBREW LETTER FINAL PE)
116 0x05E4 פ (HEBREW LETTER PE)
117 0x05E5 ץ (HEBREW LETTER FINAL TSADI)
118 0x05E6 צ (HEBREW LETTER TSADI)
119 0x05E7 ק (HEBREW LETTER QOF)
120 0x05E8 ר (HEBREW LETTER RESH)
121 0x05E9 ש (HEBREW LETTER SHIN)
122 0x05EA ת (HEBREW LETTER TAV)
125 0x200E (LEFT-TO-RIGHT MARK)
126 0x200F (RIGHT-TO-LEFT MARK)

View File

@ -1,134 +0,0 @@
# For details on index index-koi8-r.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: c5497cd9071cb352c0e56b219154e539badf63de40b71578f09e2e11fe7d50ae
# Date: 2018-01-06
0 0x2500 ─ (BOX DRAWINGS LIGHT HORIZONTAL)
1 0x2502 │ (BOX DRAWINGS LIGHT VERTICAL)
2 0x250C ┌ (BOX DRAWINGS LIGHT DOWN AND RIGHT)
3 0x2510 ┐ (BOX DRAWINGS LIGHT DOWN AND LEFT)
4 0x2514 └ (BOX DRAWINGS LIGHT UP AND RIGHT)
5 0x2518 ┘ (BOX DRAWINGS LIGHT UP AND LEFT)
6 0x251C ├ (BOX DRAWINGS LIGHT VERTICAL AND RIGHT)
7 0x2524 ┤ (BOX DRAWINGS LIGHT VERTICAL AND LEFT)
8 0x252C ┬ (BOX DRAWINGS LIGHT DOWN AND HORIZONTAL)
9 0x2534 ┴ (BOX DRAWINGS LIGHT UP AND HORIZONTAL)
10 0x253C ┼ (BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL)
11 0x2580 ▀ (UPPER HALF BLOCK)
12 0x2584 ▄ (LOWER HALF BLOCK)
13 0x2588 █ (FULL BLOCK)
14 0x258C ▌ (LEFT HALF BLOCK)
15 0x2590 ▐ (RIGHT HALF BLOCK)
16 0x2591 ░ (LIGHT SHADE)
17 0x2592 ▒ (MEDIUM SHADE)
18 0x2593 ▓ (DARK SHADE)
19 0x2320 ⌠ (TOP HALF INTEGRAL)
20 0x25A0 ■ (BLACK SQUARE)
21 0x2219 ∙ (BULLET OPERATOR)
22 0x221A √ (SQUARE ROOT)
23 0x2248 ≈ (ALMOST EQUAL TO)
24 0x2264 ≤ (LESS-THAN OR EQUAL TO)
25 0x2265 ≥ (GREATER-THAN OR EQUAL TO)
26 0x00A0   (NO-BREAK SPACE)
27 0x2321 ⌡ (BOTTOM HALF INTEGRAL)
28 0x00B0 ° (DEGREE SIGN)
29 0x00B2 ² (SUPERSCRIPT TWO)
30 0x00B7 · (MIDDLE DOT)
31 0x00F7 ÷ (DIVISION SIGN)
32 0x2550 ═ (BOX DRAWINGS DOUBLE HORIZONTAL)
33 0x2551 ║ (BOX DRAWINGS DOUBLE VERTICAL)
34 0x2552 ╒ (BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE)
35 0x0451 ё (CYRILLIC SMALL LETTER IO)
36 0x2553 ╓ (BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE)
37 0x2554 ╔ (BOX DRAWINGS DOUBLE DOWN AND RIGHT)
38 0x2555 ╕ (BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE)
39 0x2556 ╖ (BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE)
40 0x2557 ╗ (BOX DRAWINGS DOUBLE DOWN AND LEFT)
41 0x2558 ╘ (BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE)
42 0x2559 ╙ (BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE)
43 0x255A ╚ (BOX DRAWINGS DOUBLE UP AND RIGHT)
44 0x255B ╛ (BOX DRAWINGS UP SINGLE AND LEFT DOUBLE)
45 0x255C ╜ (BOX DRAWINGS UP DOUBLE AND LEFT SINGLE)
46 0x255D ╝ (BOX DRAWINGS DOUBLE UP AND LEFT)
47 0x255E ╞ (BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE)
48 0x255F ╟ (BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE)
49 0x2560 ╠ (BOX DRAWINGS DOUBLE VERTICAL AND RIGHT)
50 0x2561 ╡ (BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE)
51 0x0401 Ё (CYRILLIC CAPITAL LETTER IO)
52 0x2562 ╢ (BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE)
53 0x2563 ╣ (BOX DRAWINGS DOUBLE VERTICAL AND LEFT)
54 0x2564 ╤ (BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE)
55 0x2565 ╥ (BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE)
56 0x2566 ╦ (BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL)
57 0x2567 ╧ (BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE)
58 0x2568 ╨ (BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE)
59 0x2569 ╩ (BOX DRAWINGS DOUBLE UP AND HORIZONTAL)
60 0x256A ╪ (BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE)
61 0x256B ╫ (BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE)
62 0x256C ╬ (BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL)
63 0x00A9 © (COPYRIGHT SIGN)
64 0x044E ю (CYRILLIC SMALL LETTER YU)
65 0x0430 а (CYRILLIC SMALL LETTER A)
66 0x0431 б (CYRILLIC SMALL LETTER BE)
67 0x0446 ц (CYRILLIC SMALL LETTER TSE)
68 0x0434 д (CYRILLIC SMALL LETTER DE)
69 0x0435 е (CYRILLIC SMALL LETTER IE)
70 0x0444 ф (CYRILLIC SMALL LETTER EF)
71 0x0433 г (CYRILLIC SMALL LETTER GHE)
72 0x0445 х (CYRILLIC SMALL LETTER HA)
73 0x0438 и (CYRILLIC SMALL LETTER I)
74 0x0439 й (CYRILLIC SMALL LETTER SHORT I)
75 0x043A к (CYRILLIC SMALL LETTER KA)
76 0x043B л (CYRILLIC SMALL LETTER EL)
77 0x043C м (CYRILLIC SMALL LETTER EM)
78 0x043D н (CYRILLIC SMALL LETTER EN)
79 0x043E о (CYRILLIC SMALL LETTER O)
80 0x043F п (CYRILLIC SMALL LETTER PE)
81 0x044F я (CYRILLIC SMALL LETTER YA)
82 0x0440 р (CYRILLIC SMALL LETTER ER)
83 0x0441 с (CYRILLIC SMALL LETTER ES)
84 0x0442 т (CYRILLIC SMALL LETTER TE)
85 0x0443 у (CYRILLIC SMALL LETTER U)
86 0x0436 ж (CYRILLIC SMALL LETTER ZHE)
87 0x0432 в (CYRILLIC SMALL LETTER VE)
88 0x044C ь (CYRILLIC SMALL LETTER SOFT SIGN)
89 0x044B ы (CYRILLIC SMALL LETTER YERU)
90 0x0437 з (CYRILLIC SMALL LETTER ZE)
91 0x0448 ш (CYRILLIC SMALL LETTER SHA)
92 0x044D э (CYRILLIC SMALL LETTER E)
93 0x0449 щ (CYRILLIC SMALL LETTER SHCHA)
94 0x0447 ч (CYRILLIC SMALL LETTER CHE)
95 0x044A ъ (CYRILLIC SMALL LETTER HARD SIGN)
96 0x042E Ю (CYRILLIC CAPITAL LETTER YU)
97 0x0410 А (CYRILLIC CAPITAL LETTER A)
98 0x0411 Б (CYRILLIC CAPITAL LETTER BE)
99 0x0426 Ц (CYRILLIC CAPITAL LETTER TSE)
100 0x0414 Д (CYRILLIC CAPITAL LETTER DE)
101 0x0415 Е (CYRILLIC CAPITAL LETTER IE)
102 0x0424 Ф (CYRILLIC CAPITAL LETTER EF)
103 0x0413 Г (CYRILLIC CAPITAL LETTER GHE)
104 0x0425 Х (CYRILLIC CAPITAL LETTER HA)
105 0x0418 И (CYRILLIC CAPITAL LETTER I)
106 0x0419 Й (CYRILLIC CAPITAL LETTER SHORT I)
107 0x041A К (CYRILLIC CAPITAL LETTER KA)
108 0x041B Л (CYRILLIC CAPITAL LETTER EL)
109 0x041C М (CYRILLIC CAPITAL LETTER EM)
110 0x041D Н (CYRILLIC CAPITAL LETTER EN)
111 0x041E О (CYRILLIC CAPITAL LETTER O)
112 0x041F П (CYRILLIC CAPITAL LETTER PE)
113 0x042F Я (CYRILLIC CAPITAL LETTER YA)
114 0x0420 Р (CYRILLIC CAPITAL LETTER ER)
115 0x0421 С (CYRILLIC CAPITAL LETTER ES)
116 0x0422 Т (CYRILLIC CAPITAL LETTER TE)
117 0x0423 У (CYRILLIC CAPITAL LETTER U)
118 0x0416 Ж (CYRILLIC CAPITAL LETTER ZHE)
119 0x0412 В (CYRILLIC CAPITAL LETTER VE)
120 0x042C Ь (CYRILLIC CAPITAL LETTER SOFT SIGN)
121 0x042B Ы (CYRILLIC CAPITAL LETTER YERU)
122 0x0417 З (CYRILLIC CAPITAL LETTER ZE)
123 0x0428 Ш (CYRILLIC CAPITAL LETTER SHA)
124 0x042D Э (CYRILLIC CAPITAL LETTER E)
125 0x0429 Щ (CYRILLIC CAPITAL LETTER SHCHA)
126 0x0427 Ч (CYRILLIC CAPITAL LETTER CHE)
127 0x042A Ъ (CYRILLIC CAPITAL LETTER HARD SIGN)

View File

@ -1,134 +0,0 @@
# For details on index index-koi8-u.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: 19a4da2c3f245118bbc8019326f45a07832949938ff903f03d62ac4da1f61f40
# Date: 2018-01-06
0 0x2500 ─ (BOX DRAWINGS LIGHT HORIZONTAL)
1 0x2502 │ (BOX DRAWINGS LIGHT VERTICAL)
2 0x250C ┌ (BOX DRAWINGS LIGHT DOWN AND RIGHT)
3 0x2510 ┐ (BOX DRAWINGS LIGHT DOWN AND LEFT)
4 0x2514 └ (BOX DRAWINGS LIGHT UP AND RIGHT)
5 0x2518 ┘ (BOX DRAWINGS LIGHT UP AND LEFT)
6 0x251C ├ (BOX DRAWINGS LIGHT VERTICAL AND RIGHT)
7 0x2524 ┤ (BOX DRAWINGS LIGHT VERTICAL AND LEFT)
8 0x252C ┬ (BOX DRAWINGS LIGHT DOWN AND HORIZONTAL)
9 0x2534 ┴ (BOX DRAWINGS LIGHT UP AND HORIZONTAL)
10 0x253C ┼ (BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL)
11 0x2580 ▀ (UPPER HALF BLOCK)
12 0x2584 ▄ (LOWER HALF BLOCK)
13 0x2588 █ (FULL BLOCK)
14 0x258C ▌ (LEFT HALF BLOCK)
15 0x2590 ▐ (RIGHT HALF BLOCK)
16 0x2591 ░ (LIGHT SHADE)
17 0x2592 ▒ (MEDIUM SHADE)
18 0x2593 ▓ (DARK SHADE)
19 0x2320 ⌠ (TOP HALF INTEGRAL)
20 0x25A0 ■ (BLACK SQUARE)
21 0x2219 ∙ (BULLET OPERATOR)
22 0x221A √ (SQUARE ROOT)
23 0x2248 ≈ (ALMOST EQUAL TO)
24 0x2264 ≤ (LESS-THAN OR EQUAL TO)
25 0x2265 ≥ (GREATER-THAN OR EQUAL TO)
26 0x00A0   (NO-BREAK SPACE)
27 0x2321 ⌡ (BOTTOM HALF INTEGRAL)
28 0x00B0 ° (DEGREE SIGN)
29 0x00B2 ² (SUPERSCRIPT TWO)
30 0x00B7 · (MIDDLE DOT)
31 0x00F7 ÷ (DIVISION SIGN)
32 0x2550 ═ (BOX DRAWINGS DOUBLE HORIZONTAL)
33 0x2551 ║ (BOX DRAWINGS DOUBLE VERTICAL)
34 0x2552 ╒ (BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE)
35 0x0451 ё (CYRILLIC SMALL LETTER IO)
36 0x0454 є (CYRILLIC SMALL LETTER UKRAINIAN IE)
37 0x2554 ╔ (BOX DRAWINGS DOUBLE DOWN AND RIGHT)
38 0x0456 і (CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I)
39 0x0457 ї (CYRILLIC SMALL LETTER YI)
40 0x2557 ╗ (BOX DRAWINGS DOUBLE DOWN AND LEFT)
41 0x2558 ╘ (BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE)
42 0x2559 ╙ (BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE)
43 0x255A ╚ (BOX DRAWINGS DOUBLE UP AND RIGHT)
44 0x255B ╛ (BOX DRAWINGS UP SINGLE AND LEFT DOUBLE)
45 0x0491 ґ (CYRILLIC SMALL LETTER GHE WITH UPTURN)
46 0x045E ў (CYRILLIC SMALL LETTER SHORT U)
47 0x255E ╞ (BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE)
48 0x255F ╟ (BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE)
49 0x2560 ╠ (BOX DRAWINGS DOUBLE VERTICAL AND RIGHT)
50 0x2561 ╡ (BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE)
51 0x0401 Ё (CYRILLIC CAPITAL LETTER IO)
52 0x0404 Є (CYRILLIC CAPITAL LETTER UKRAINIAN IE)
53 0x2563 ╣ (BOX DRAWINGS DOUBLE VERTICAL AND LEFT)
54 0x0406 І (CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I)
55 0x0407 Ї (CYRILLIC CAPITAL LETTER YI)
56 0x2566 ╦ (BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL)
57 0x2567 ╧ (BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE)
58 0x2568 ╨ (BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE)
59 0x2569 ╩ (BOX DRAWINGS DOUBLE UP AND HORIZONTAL)
60 0x256A ╪ (BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE)
61 0x0490 Ґ (CYRILLIC CAPITAL LETTER GHE WITH UPTURN)
62 0x040E Ў (CYRILLIC CAPITAL LETTER SHORT U)
63 0x00A9 © (COPYRIGHT SIGN)
64 0x044E ю (CYRILLIC SMALL LETTER YU)
65 0x0430 а (CYRILLIC SMALL LETTER A)
66 0x0431 б (CYRILLIC SMALL LETTER BE)
67 0x0446 ц (CYRILLIC SMALL LETTER TSE)
68 0x0434 д (CYRILLIC SMALL LETTER DE)
69 0x0435 е (CYRILLIC SMALL LETTER IE)
70 0x0444 ф (CYRILLIC SMALL LETTER EF)
71 0x0433 г (CYRILLIC SMALL LETTER GHE)
72 0x0445 х (CYRILLIC SMALL LETTER HA)
73 0x0438 и (CYRILLIC SMALL LETTER I)
74 0x0439 й (CYRILLIC SMALL LETTER SHORT I)
75 0x043A к (CYRILLIC SMALL LETTER KA)
76 0x043B л (CYRILLIC SMALL LETTER EL)
77 0x043C м (CYRILLIC SMALL LETTER EM)
78 0x043D н (CYRILLIC SMALL LETTER EN)
79 0x043E о (CYRILLIC SMALL LETTER O)
80 0x043F п (CYRILLIC SMALL LETTER PE)
81 0x044F я (CYRILLIC SMALL LETTER YA)
82 0x0440 р (CYRILLIC SMALL LETTER ER)
83 0x0441 с (CYRILLIC SMALL LETTER ES)
84 0x0442 т (CYRILLIC SMALL LETTER TE)
85 0x0443 у (CYRILLIC SMALL LETTER U)
86 0x0436 ж (CYRILLIC SMALL LETTER ZHE)
87 0x0432 в (CYRILLIC SMALL LETTER VE)
88 0x044C ь (CYRILLIC SMALL LETTER SOFT SIGN)
89 0x044B ы (CYRILLIC SMALL LETTER YERU)
90 0x0437 з (CYRILLIC SMALL LETTER ZE)
91 0x0448 ш (CYRILLIC SMALL LETTER SHA)
92 0x044D э (CYRILLIC SMALL LETTER E)
93 0x0449 щ (CYRILLIC SMALL LETTER SHCHA)
94 0x0447 ч (CYRILLIC SMALL LETTER CHE)
95 0x044A ъ (CYRILLIC SMALL LETTER HARD SIGN)
96 0x042E Ю (CYRILLIC CAPITAL LETTER YU)
97 0x0410 А (CYRILLIC CAPITAL LETTER A)
98 0x0411 Б (CYRILLIC CAPITAL LETTER BE)
99 0x0426 Ц (CYRILLIC CAPITAL LETTER TSE)
100 0x0414 Д (CYRILLIC CAPITAL LETTER DE)
101 0x0415 Е (CYRILLIC CAPITAL LETTER IE)
102 0x0424 Ф (CYRILLIC CAPITAL LETTER EF)
103 0x0413 Г (CYRILLIC CAPITAL LETTER GHE)
104 0x0425 Х (CYRILLIC CAPITAL LETTER HA)
105 0x0418 И (CYRILLIC CAPITAL LETTER I)
106 0x0419 Й (CYRILLIC CAPITAL LETTER SHORT I)
107 0x041A К (CYRILLIC CAPITAL LETTER KA)
108 0x041B Л (CYRILLIC CAPITAL LETTER EL)
109 0x041C М (CYRILLIC CAPITAL LETTER EM)
110 0x041D Н (CYRILLIC CAPITAL LETTER EN)
111 0x041E О (CYRILLIC CAPITAL LETTER O)
112 0x041F П (CYRILLIC CAPITAL LETTER PE)
113 0x042F Я (CYRILLIC CAPITAL LETTER YA)
114 0x0420 Р (CYRILLIC CAPITAL LETTER ER)
115 0x0421 С (CYRILLIC CAPITAL LETTER ES)
116 0x0422 Т (CYRILLIC CAPITAL LETTER TE)
117 0x0423 У (CYRILLIC CAPITAL LETTER U)
118 0x0416 Ж (CYRILLIC CAPITAL LETTER ZHE)
119 0x0412 В (CYRILLIC CAPITAL LETTER VE)
120 0x042C Ь (CYRILLIC CAPITAL LETTER SOFT SIGN)
121 0x042B Ы (CYRILLIC CAPITAL LETTER YERU)
122 0x0417 З (CYRILLIC CAPITAL LETTER ZE)
123 0x0428 Ш (CYRILLIC CAPITAL LETTER SHA)
124 0x042D Э (CYRILLIC CAPITAL LETTER E)
125 0x0429 Щ (CYRILLIC CAPITAL LETTER SHCHA)
126 0x0427 Ч (CYRILLIC CAPITAL LETTER CHE)
127 0x042A Ъ (CYRILLIC CAPITAL LETTER HARD SIGN)

View File

@ -1,134 +0,0 @@
# For details on index index-macintosh.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: f2c6a4f6406b3e86a50a5dba4d2b7dd48e2e33c0d82aefe764535c934ec11764
# Date: 2018-01-06
0 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS)
1 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE)
2 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA)
3 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE)
4 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE)
5 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS)
6 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS)
7 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE)
8 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE)
9 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX)
10 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS)
11 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE)
12 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE)
13 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA)
14 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE)
15 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE)
16 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX)
17 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS)
18 0x00ED í (LATIN SMALL LETTER I WITH ACUTE)
19 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE)
20 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX)
21 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS)
22 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE)
23 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE)
24 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE)
25 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX)
26 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS)
27 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE)
28 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE)
29 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE)
30 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX)
31 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS)
32 0x2020 † (DAGGER)
33 0x00B0 ° (DEGREE SIGN)
34 0x00A2 ¢ (CENT SIGN)
35 0x00A3 £ (POUND SIGN)
36 0x00A7 § (SECTION SIGN)
37 0x2022 • (BULLET)
38 0x00B6 ¶ (PILCROW SIGN)
39 0x00DF ß (LATIN SMALL LETTER SHARP S)
40 0x00AE ® (REGISTERED SIGN)
41 0x00A9 © (COPYRIGHT SIGN)
42 0x2122 ™ (TRADE MARK SIGN)
43 0x00B4 ´ (ACUTE ACCENT)
44 0x00A8 ¨ (DIAERESIS)
45 0x2260 ≠ (NOT EQUAL TO)
46 0x00C6 Æ (LATIN CAPITAL LETTER AE)
47 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE)
48 0x221E ∞ (INFINITY)
49 0x00B1 ± (PLUS-MINUS SIGN)
50 0x2264 ≤ (LESS-THAN OR EQUAL TO)
51 0x2265 ≥ (GREATER-THAN OR EQUAL TO)
52 0x00A5 ¥ (YEN SIGN)
53 0x00B5 µ (MICRO SIGN)
54 0x2202 ∂ (PARTIAL DIFFERENTIAL)
55 0x2211 ∑ (N-ARY SUMMATION)
56 0x220F ∏ (N-ARY PRODUCT)
57 0x03C0 π (GREEK SMALL LETTER PI)
58 0x222B ∫ (INTEGRAL)
59 0x00AA ª (FEMININE ORDINAL INDICATOR)
60 0x00BA º (MASCULINE ORDINAL INDICATOR)
61 0x03A9 Ω (GREEK CAPITAL LETTER OMEGA)
62 0x00E6 æ (LATIN SMALL LETTER AE)
63 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE)
64 0x00BF ¿ (INVERTED QUESTION MARK)
65 0x00A1 ¡ (INVERTED EXCLAMATION MARK)
66 0x00AC ¬ (NOT SIGN)
67 0x221A √ (SQUARE ROOT)
68 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK)
69 0x2248 ≈ (ALMOST EQUAL TO)
70 0x2206 ∆ (INCREMENT)
71 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK)
72 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK)
73 0x2026 … (HORIZONTAL ELLIPSIS)
74 0x00A0   (NO-BREAK SPACE)
75 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE)
76 0x00C3 Ã (LATIN CAPITAL LETTER A WITH TILDE)
77 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE)
78 0x0152 Œ (LATIN CAPITAL LIGATURE OE)
79 0x0153 œ (LATIN SMALL LIGATURE OE)
80 0x2013 (EN DASH)
81 0x2014 — (EM DASH)
82 0x201C “ (LEFT DOUBLE QUOTATION MARK)
83 0x201D ” (RIGHT DOUBLE QUOTATION MARK)
84 0x2018 (LEFT SINGLE QUOTATION MARK)
85 0x2019 (RIGHT SINGLE QUOTATION MARK)
86 0x00F7 ÷ (DIVISION SIGN)
87 0x25CA ◊ (LOZENGE)
88 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS)
89 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS)
90 0x2044 (FRACTION SLASH)
91 0x20AC € (EURO SIGN)
92 0x2039 (SINGLE LEFT-POINTING ANGLE QUOTATION MARK)
93 0x203A (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK)
94 0xFB01 fi (LATIN SMALL LIGATURE FI)
95 0xFB02 fl (LATIN SMALL LIGATURE FL)
96 0x2021 ‡ (DOUBLE DAGGER)
97 0x00B7 · (MIDDLE DOT)
98 0x201A (SINGLE LOW-9 QUOTATION MARK)
99 0x201E „ (DOUBLE LOW-9 QUOTATION MARK)
100 0x2030 ‰ (PER MILLE SIGN)
101 0x00C2 Â (LATIN CAPITAL LETTER A WITH CIRCUMFLEX)
102 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX)
103 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE)
104 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS)
105 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE)
106 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE)
107 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX)
108 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS)
109 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE)
110 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE)
111 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX)
112 0xF8FF  (<Private Use>)
113 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE)
114 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE)
115 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX)
116 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE)
117 0x0131 ı (LATIN SMALL LETTER DOTLESS I)
118 0x02C6 ˆ (MODIFIER LETTER CIRCUMFLEX ACCENT)
119 0x02DC ˜ (SMALL TILDE)
120 0x00AF ¯ (MACRON)
121 0x02D8 ˘ (BREVE)
122 0x02D9 ˙ (DOT ABOVE)
123 0x02DA ˚ (RING ABOVE)
124 0x00B8 ¸ (CEDILLA)
125 0x02DD ˝ (DOUBLE ACUTE ACCENT)
126 0x02DB ˛ (OGONEK)
127 0x02C7 ˇ (CARON)

View File

@ -1,134 +0,0 @@
# For details on index index-windows-1250.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: 0669455a7a1c70ba6003ea737991e8ee9adc455125c13cfe6705a361358de5fa
# Date: 2018-01-06
0 0x20AC € (EURO SIGN)
1 0x0081  (<control>)
2 0x201A (SINGLE LOW-9 QUOTATION MARK)
3 0x0083 ƒ (<control>)
4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK)
5 0x2026 … (HORIZONTAL ELLIPSIS)
6 0x2020 † (DAGGER)
7 0x2021 ‡ (DOUBLE DAGGER)
8 0x0088 ˆ (<control>)
9 0x2030 ‰ (PER MILLE SIGN)
10 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON)
11 0x2039 (SINGLE LEFT-POINTING ANGLE QUOTATION MARK)
12 0x015A Ś (LATIN CAPITAL LETTER S WITH ACUTE)
13 0x0164 Ť (LATIN CAPITAL LETTER T WITH CARON)
14 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON)
15 0x0179 Ź (LATIN CAPITAL LETTER Z WITH ACUTE)
16 0x0090  (<control>)
17 0x2018 (LEFT SINGLE QUOTATION MARK)
18 0x2019 (RIGHT SINGLE QUOTATION MARK)
19 0x201C “ (LEFT DOUBLE QUOTATION MARK)
20 0x201D ” (RIGHT DOUBLE QUOTATION MARK)
21 0x2022 • (BULLET)
22 0x2013 (EN DASH)
23 0x2014 — (EM DASH)
24 0x0098 ˜ (<control>)
25 0x2122 ™ (TRADE MARK SIGN)
26 0x0161 š (LATIN SMALL LETTER S WITH CARON)
27 0x203A (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK)
28 0x015B ś (LATIN SMALL LETTER S WITH ACUTE)
29 0x0165 ť (LATIN SMALL LETTER T WITH CARON)
30 0x017E ž (LATIN SMALL LETTER Z WITH CARON)
31 0x017A ź (LATIN SMALL LETTER Z WITH ACUTE)
32 0x00A0   (NO-BREAK SPACE)
33 0x02C7 ˇ (CARON)
34 0x02D8 ˘ (BREVE)
35 0x0141 Ł (LATIN CAPITAL LETTER L WITH STROKE)
36 0x00A4 ¤ (CURRENCY SIGN)
37 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK)
38 0x00A6 ¦ (BROKEN BAR)
39 0x00A7 § (SECTION SIGN)
40 0x00A8 ¨ (DIAERESIS)
41 0x00A9 © (COPYRIGHT SIGN)
42 0x015E Ş (LATIN CAPITAL LETTER S WITH CEDILLA)
43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK)
44 0x00AC ¬ (NOT SIGN)
45 0x00AD ­ (SOFT HYPHEN)
46 0x00AE ® (REGISTERED SIGN)
47 0x017B Ż (LATIN CAPITAL LETTER Z WITH DOT ABOVE)
48 0x00B0 ° (DEGREE SIGN)
49 0x00B1 ± (PLUS-MINUS SIGN)
50 0x02DB ˛ (OGONEK)
51 0x0142 ł (LATIN SMALL LETTER L WITH STROKE)
52 0x00B4 ´ (ACUTE ACCENT)
53 0x00B5 µ (MICRO SIGN)
54 0x00B6 ¶ (PILCROW SIGN)
55 0x00B7 · (MIDDLE DOT)
56 0x00B8 ¸ (CEDILLA)
57 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK)
58 0x015F ş (LATIN SMALL LETTER S WITH CEDILLA)
59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK)
60 0x013D Ľ (LATIN CAPITAL LETTER L WITH CARON)
61 0x02DD ˝ (DOUBLE ACUTE ACCENT)
62 0x013E ľ (LATIN SMALL LETTER L WITH CARON)
63 0x017C ż (LATIN SMALL LETTER Z WITH DOT ABOVE)
64 0x0154 Ŕ (LATIN CAPITAL LETTER R WITH ACUTE)
65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE)
66 0x00C2 Â (LATIN CAPITAL LETTER A WITH CIRCUMFLEX)
67 0x0102 Ă (LATIN CAPITAL LETTER A WITH BREVE)
68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS)
69 0x0139 Ĺ (LATIN CAPITAL LETTER L WITH ACUTE)
70 0x0106 Ć (LATIN CAPITAL LETTER C WITH ACUTE)
71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA)
72 0x010C Č (LATIN CAPITAL LETTER C WITH CARON)
73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE)
74 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK)
75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS)
76 0x011A Ě (LATIN CAPITAL LETTER E WITH CARON)
77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE)
78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX)
79 0x010E Ď (LATIN CAPITAL LETTER D WITH CARON)
80 0x0110 Đ (LATIN CAPITAL LETTER D WITH STROKE)
81 0x0143 Ń (LATIN CAPITAL LETTER N WITH ACUTE)
82 0x0147 Ň (LATIN CAPITAL LETTER N WITH CARON)
83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE)
84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX)
85 0x0150 Ő (LATIN CAPITAL LETTER O WITH DOUBLE ACUTE)
86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS)
87 0x00D7 × (MULTIPLICATION SIGN)
88 0x0158 Ř (LATIN CAPITAL LETTER R WITH CARON)
89 0x016E Ů (LATIN CAPITAL LETTER U WITH RING ABOVE)
90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE)
91 0x0170 Ű (LATIN CAPITAL LETTER U WITH DOUBLE ACUTE)
92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS)
93 0x00DD Ý (LATIN CAPITAL LETTER Y WITH ACUTE)
94 0x0162 Ţ (LATIN CAPITAL LETTER T WITH CEDILLA)
95 0x00DF ß (LATIN SMALL LETTER SHARP S)
96 0x0155 ŕ (LATIN SMALL LETTER R WITH ACUTE)
97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE)
98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX)
99 0x0103 ă (LATIN SMALL LETTER A WITH BREVE)
100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS)
101 0x013A ĺ (LATIN SMALL LETTER L WITH ACUTE)
102 0x0107 ć (LATIN SMALL LETTER C WITH ACUTE)
103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA)
104 0x010D č (LATIN SMALL LETTER C WITH CARON)
105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE)
106 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK)
107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS)
108 0x011B ě (LATIN SMALL LETTER E WITH CARON)
109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE)
110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX)
111 0x010F ď (LATIN SMALL LETTER D WITH CARON)
112 0x0111 đ (LATIN SMALL LETTER D WITH STROKE)
113 0x0144 ń (LATIN SMALL LETTER N WITH ACUTE)
114 0x0148 ň (LATIN SMALL LETTER N WITH CARON)
115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE)
116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX)
117 0x0151 ő (LATIN SMALL LETTER O WITH DOUBLE ACUTE)
118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS)
119 0x00F7 ÷ (DIVISION SIGN)
120 0x0159 ř (LATIN SMALL LETTER R WITH CARON)
121 0x016F ů (LATIN SMALL LETTER U WITH RING ABOVE)
122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE)
123 0x0171 ű (LATIN SMALL LETTER U WITH DOUBLE ACUTE)
124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS)
125 0x00FD ý (LATIN SMALL LETTER Y WITH ACUTE)
126 0x0163 ţ (LATIN SMALL LETTER T WITH CEDILLA)
127 0x02D9 ˙ (DOT ABOVE)

View File

@ -1,134 +0,0 @@
# For details on index index-windows-1251.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: 7592ef921679ba168b00a9e9afa3b4eebd67bf13dc7e84c4b6e120de856826e0
# Date: 2018-01-06
0 0x0402 Ђ (CYRILLIC CAPITAL LETTER DJE)
1 0x0403 Ѓ (CYRILLIC CAPITAL LETTER GJE)
2 0x201A (SINGLE LOW-9 QUOTATION MARK)
3 0x0453 ѓ (CYRILLIC SMALL LETTER GJE)
4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK)
5 0x2026 … (HORIZONTAL ELLIPSIS)
6 0x2020 † (DAGGER)
7 0x2021 ‡ (DOUBLE DAGGER)
8 0x20AC € (EURO SIGN)
9 0x2030 ‰ (PER MILLE SIGN)
10 0x0409 Љ (CYRILLIC CAPITAL LETTER LJE)
11 0x2039 (SINGLE LEFT-POINTING ANGLE QUOTATION MARK)
12 0x040A Њ (CYRILLIC CAPITAL LETTER NJE)
13 0x040C Ќ (CYRILLIC CAPITAL LETTER KJE)
14 0x040B Ћ (CYRILLIC CAPITAL LETTER TSHE)
15 0x040F Џ (CYRILLIC CAPITAL LETTER DZHE)
16 0x0452 ђ (CYRILLIC SMALL LETTER DJE)
17 0x2018 (LEFT SINGLE QUOTATION MARK)
18 0x2019 (RIGHT SINGLE QUOTATION MARK)
19 0x201C “ (LEFT DOUBLE QUOTATION MARK)
20 0x201D ” (RIGHT DOUBLE QUOTATION MARK)
21 0x2022 • (BULLET)
22 0x2013 (EN DASH)
23 0x2014 — (EM DASH)
24 0x0098 ˜ (<control>)
25 0x2122 ™ (TRADE MARK SIGN)
26 0x0459 љ (CYRILLIC SMALL LETTER LJE)
27 0x203A (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK)
28 0x045A њ (CYRILLIC SMALL LETTER NJE)
29 0x045C ќ (CYRILLIC SMALL LETTER KJE)
30 0x045B ћ (CYRILLIC SMALL LETTER TSHE)
31 0x045F џ (CYRILLIC SMALL LETTER DZHE)
32 0x00A0   (NO-BREAK SPACE)
33 0x040E Ў (CYRILLIC CAPITAL LETTER SHORT U)
34 0x045E ў (CYRILLIC SMALL LETTER SHORT U)
35 0x0408 Ј (CYRILLIC CAPITAL LETTER JE)
36 0x00A4 ¤ (CURRENCY SIGN)
37 0x0490 Ґ (CYRILLIC CAPITAL LETTER GHE WITH UPTURN)
38 0x00A6 ¦ (BROKEN BAR)
39 0x00A7 § (SECTION SIGN)
40 0x0401 Ё (CYRILLIC CAPITAL LETTER IO)
41 0x00A9 © (COPYRIGHT SIGN)
42 0x0404 Є (CYRILLIC CAPITAL LETTER UKRAINIAN IE)
43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK)
44 0x00AC ¬ (NOT SIGN)
45 0x00AD ­ (SOFT HYPHEN)
46 0x00AE ® (REGISTERED SIGN)
47 0x0407 Ї (CYRILLIC CAPITAL LETTER YI)
48 0x00B0 ° (DEGREE SIGN)
49 0x00B1 ± (PLUS-MINUS SIGN)
50 0x0406 І (CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I)
51 0x0456 і (CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I)
52 0x0491 ґ (CYRILLIC SMALL LETTER GHE WITH UPTURN)
53 0x00B5 µ (MICRO SIGN)
54 0x00B6 ¶ (PILCROW SIGN)
55 0x00B7 · (MIDDLE DOT)
56 0x0451 ё (CYRILLIC SMALL LETTER IO)
57 0x2116 № (NUMERO SIGN)
58 0x0454 є (CYRILLIC SMALL LETTER UKRAINIAN IE)
59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK)
60 0x0458 ј (CYRILLIC SMALL LETTER JE)
61 0x0405 Ѕ (CYRILLIC CAPITAL LETTER DZE)
62 0x0455 ѕ (CYRILLIC SMALL LETTER DZE)
63 0x0457 ї (CYRILLIC SMALL LETTER YI)
64 0x0410 А (CYRILLIC CAPITAL LETTER A)
65 0x0411 Б (CYRILLIC CAPITAL LETTER BE)
66 0x0412 В (CYRILLIC CAPITAL LETTER VE)
67 0x0413 Г (CYRILLIC CAPITAL LETTER GHE)
68 0x0414 Д (CYRILLIC CAPITAL LETTER DE)
69 0x0415 Е (CYRILLIC CAPITAL LETTER IE)
70 0x0416 Ж (CYRILLIC CAPITAL LETTER ZHE)
71 0x0417 З (CYRILLIC CAPITAL LETTER ZE)
72 0x0418 И (CYRILLIC CAPITAL LETTER I)
73 0x0419 Й (CYRILLIC CAPITAL LETTER SHORT I)
74 0x041A К (CYRILLIC CAPITAL LETTER KA)
75 0x041B Л (CYRILLIC CAPITAL LETTER EL)
76 0x041C М (CYRILLIC CAPITAL LETTER EM)
77 0x041D Н (CYRILLIC CAPITAL LETTER EN)
78 0x041E О (CYRILLIC CAPITAL LETTER O)
79 0x041F П (CYRILLIC CAPITAL LETTER PE)
80 0x0420 Р (CYRILLIC CAPITAL LETTER ER)
81 0x0421 С (CYRILLIC CAPITAL LETTER ES)
82 0x0422 Т (CYRILLIC CAPITAL LETTER TE)
83 0x0423 У (CYRILLIC CAPITAL LETTER U)
84 0x0424 Ф (CYRILLIC CAPITAL LETTER EF)
85 0x0425 Х (CYRILLIC CAPITAL LETTER HA)
86 0x0426 Ц (CYRILLIC CAPITAL LETTER TSE)
87 0x0427 Ч (CYRILLIC CAPITAL LETTER CHE)
88 0x0428 Ш (CYRILLIC CAPITAL LETTER SHA)
89 0x0429 Щ (CYRILLIC CAPITAL LETTER SHCHA)
90 0x042A Ъ (CYRILLIC CAPITAL LETTER HARD SIGN)
91 0x042B Ы (CYRILLIC CAPITAL LETTER YERU)
92 0x042C Ь (CYRILLIC CAPITAL LETTER SOFT SIGN)
93 0x042D Э (CYRILLIC CAPITAL LETTER E)
94 0x042E Ю (CYRILLIC CAPITAL LETTER YU)
95 0x042F Я (CYRILLIC CAPITAL LETTER YA)
96 0x0430 а (CYRILLIC SMALL LETTER A)
97 0x0431 б (CYRILLIC SMALL LETTER BE)
98 0x0432 в (CYRILLIC SMALL LETTER VE)
99 0x0433 г (CYRILLIC SMALL LETTER GHE)
100 0x0434 д (CYRILLIC SMALL LETTER DE)
101 0x0435 е (CYRILLIC SMALL LETTER IE)
102 0x0436 ж (CYRILLIC SMALL LETTER ZHE)
103 0x0437 з (CYRILLIC SMALL LETTER ZE)
104 0x0438 и (CYRILLIC SMALL LETTER I)
105 0x0439 й (CYRILLIC SMALL LETTER SHORT I)
106 0x043A к (CYRILLIC SMALL LETTER KA)
107 0x043B л (CYRILLIC SMALL LETTER EL)
108 0x043C м (CYRILLIC SMALL LETTER EM)
109 0x043D н (CYRILLIC SMALL LETTER EN)
110 0x043E о (CYRILLIC SMALL LETTER O)
111 0x043F п (CYRILLIC SMALL LETTER PE)
112 0x0440 р (CYRILLIC SMALL LETTER ER)
113 0x0441 с (CYRILLIC SMALL LETTER ES)
114 0x0442 т (CYRILLIC SMALL LETTER TE)
115 0x0443 у (CYRILLIC SMALL LETTER U)
116 0x0444 ф (CYRILLIC SMALL LETTER EF)
117 0x0445 х (CYRILLIC SMALL LETTER HA)
118 0x0446 ц (CYRILLIC SMALL LETTER TSE)
119 0x0447 ч (CYRILLIC SMALL LETTER CHE)
120 0x0448 ш (CYRILLIC SMALL LETTER SHA)
121 0x0449 щ (CYRILLIC SMALL LETTER SHCHA)
122 0x044A ъ (CYRILLIC SMALL LETTER HARD SIGN)
123 0x044B ы (CYRILLIC SMALL LETTER YERU)
124 0x044C ь (CYRILLIC SMALL LETTER SOFT SIGN)
125 0x044D э (CYRILLIC SMALL LETTER E)
126 0x044E ю (CYRILLIC SMALL LETTER YU)
127 0x044F я (CYRILLIC SMALL LETTER YA)

View File

@ -1,134 +0,0 @@
# For details on index index-windows-1252.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: e56d49d9176e9a412283cf29ac9bd613f5620462f2a080a84eceaf974cfa18b7
# Date: 2018-01-06
0 0x20AC € (EURO SIGN)
1 0x0081  (<control>)
2 0x201A (SINGLE LOW-9 QUOTATION MARK)
3 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK)
4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK)
5 0x2026 … (HORIZONTAL ELLIPSIS)
6 0x2020 † (DAGGER)
7 0x2021 ‡ (DOUBLE DAGGER)
8 0x02C6 ˆ (MODIFIER LETTER CIRCUMFLEX ACCENT)
9 0x2030 ‰ (PER MILLE SIGN)
10 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON)
11 0x2039 (SINGLE LEFT-POINTING ANGLE QUOTATION MARK)
12 0x0152 Œ (LATIN CAPITAL LIGATURE OE)
13 0x008D  (<control>)
14 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON)
15 0x008F  (<control>)
16 0x0090  (<control>)
17 0x2018 (LEFT SINGLE QUOTATION MARK)
18 0x2019 (RIGHT SINGLE QUOTATION MARK)
19 0x201C “ (LEFT DOUBLE QUOTATION MARK)
20 0x201D ” (RIGHT DOUBLE QUOTATION MARK)
21 0x2022 • (BULLET)
22 0x2013 (EN DASH)
23 0x2014 — (EM DASH)
24 0x02DC ˜ (SMALL TILDE)
25 0x2122 ™ (TRADE MARK SIGN)
26 0x0161 š (LATIN SMALL LETTER S WITH CARON)
27 0x203A (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK)
28 0x0153 œ (LATIN SMALL LIGATURE OE)
29 0x009D  (<control>)
30 0x017E ž (LATIN SMALL LETTER Z WITH CARON)
31 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS)
32 0x00A0   (NO-BREAK SPACE)
33 0x00A1 ¡ (INVERTED EXCLAMATION MARK)
34 0x00A2 ¢ (CENT SIGN)
35 0x00A3 £ (POUND SIGN)
36 0x00A4 ¤ (CURRENCY SIGN)
37 0x00A5 ¥ (YEN SIGN)
38 0x00A6 ¦ (BROKEN BAR)
39 0x00A7 § (SECTION SIGN)
40 0x00A8 ¨ (DIAERESIS)
41 0x00A9 © (COPYRIGHT SIGN)
42 0x00AA ª (FEMININE ORDINAL INDICATOR)
43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK)
44 0x00AC ¬ (NOT SIGN)
45 0x00AD ­ (SOFT HYPHEN)
46 0x00AE ® (REGISTERED SIGN)
47 0x00AF ¯ (MACRON)
48 0x00B0 ° (DEGREE SIGN)
49 0x00B1 ± (PLUS-MINUS SIGN)
50 0x00B2 ² (SUPERSCRIPT TWO)
51 0x00B3 ³ (SUPERSCRIPT THREE)
52 0x00B4 ´ (ACUTE ACCENT)
53 0x00B5 µ (MICRO SIGN)
54 0x00B6 ¶ (PILCROW SIGN)
55 0x00B7 · (MIDDLE DOT)
56 0x00B8 ¸ (CEDILLA)
57 0x00B9 ¹ (SUPERSCRIPT ONE)
58 0x00BA º (MASCULINE ORDINAL INDICATOR)
59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK)
60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER)
61 0x00BD ½ (VULGAR FRACTION ONE HALF)
62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS)
63 0x00BF ¿ (INVERTED QUESTION MARK)
64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE)
65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE)
66 0x00C2 Â (LATIN CAPITAL LETTER A WITH CIRCUMFLEX)
67 0x00C3 Ã (LATIN CAPITAL LETTER A WITH TILDE)
68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS)
69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE)
70 0x00C6 Æ (LATIN CAPITAL LETTER AE)
71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA)
72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE)
73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE)
74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX)
75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS)
76 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE)
77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE)
78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX)
79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS)
80 0x00D0 Ð (LATIN CAPITAL LETTER ETH)
81 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE)
82 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE)
83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE)
84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX)
85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE)
86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS)
87 0x00D7 × (MULTIPLICATION SIGN)
88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE)
89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE)
90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE)
91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX)
92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS)
93 0x00DD Ý (LATIN CAPITAL LETTER Y WITH ACUTE)
94 0x00DE Þ (LATIN CAPITAL LETTER THORN)
95 0x00DF ß (LATIN SMALL LETTER SHARP S)
96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE)
97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE)
98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX)
99 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE)
100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS)
101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE)
102 0x00E6 æ (LATIN SMALL LETTER AE)
103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA)
104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE)
105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE)
106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX)
107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS)
108 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE)
109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE)
110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX)
111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS)
112 0x00F0 ð (LATIN SMALL LETTER ETH)
113 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE)
114 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE)
115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE)
116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX)
117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE)
118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS)
119 0x00F7 ÷ (DIVISION SIGN)
120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE)
121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE)
122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE)
123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX)
124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS)
125 0x00FD ý (LATIN SMALL LETTER Y WITH ACUTE)
126 0x00FE þ (LATIN SMALL LETTER THORN)
127 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS)

View File

@ -1,131 +0,0 @@
# For details on index index-windows-1253.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: 49fdc881a3488904dd1e8dfba9aef3258454249958b611bcded1d4c981ab5561
# Date: 2018-01-06
0 0x20AC € (EURO SIGN)
1 0x0081  (<control>)
2 0x201A (SINGLE LOW-9 QUOTATION MARK)
3 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK)
4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK)
5 0x2026 … (HORIZONTAL ELLIPSIS)
6 0x2020 † (DAGGER)
7 0x2021 ‡ (DOUBLE DAGGER)
8 0x0088 ˆ (<control>)
9 0x2030 ‰ (PER MILLE SIGN)
10 0x008A Š (<control>)
11 0x2039 (SINGLE LEFT-POINTING ANGLE QUOTATION MARK)
12 0x008C Œ (<control>)
13 0x008D  (<control>)
14 0x008E Ž (<control>)
15 0x008F  (<control>)
16 0x0090  (<control>)
17 0x2018 (LEFT SINGLE QUOTATION MARK)
18 0x2019 (RIGHT SINGLE QUOTATION MARK)
19 0x201C “ (LEFT DOUBLE QUOTATION MARK)
20 0x201D ” (RIGHT DOUBLE QUOTATION MARK)
21 0x2022 • (BULLET)
22 0x2013 (EN DASH)
23 0x2014 — (EM DASH)
24 0x0098 ˜ (<control>)
25 0x2122 ™ (TRADE MARK SIGN)
26 0x009A š (<control>)
27 0x203A (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK)
28 0x009C œ (<control>)
29 0x009D  (<control>)
30 0x009E ž (<control>)
31 0x009F Ÿ (<control>)
32 0x00A0   (NO-BREAK SPACE)
33 0x0385 ΅ (GREEK DIALYTIKA TONOS)
34 0x0386 Ά (GREEK CAPITAL LETTER ALPHA WITH TONOS)
35 0x00A3 £ (POUND SIGN)
36 0x00A4 ¤ (CURRENCY SIGN)
37 0x00A5 ¥ (YEN SIGN)
38 0x00A6 ¦ (BROKEN BAR)
39 0x00A7 § (SECTION SIGN)
40 0x00A8 ¨ (DIAERESIS)
41 0x00A9 © (COPYRIGHT SIGN)
43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK)
44 0x00AC ¬ (NOT SIGN)
45 0x00AD ­ (SOFT HYPHEN)
46 0x00AE ® (REGISTERED SIGN)
47 0x2015 ― (HORIZONTAL BAR)
48 0x00B0 ° (DEGREE SIGN)
49 0x00B1 ± (PLUS-MINUS SIGN)
50 0x00B2 ² (SUPERSCRIPT TWO)
51 0x00B3 ³ (SUPERSCRIPT THREE)
52 0x0384 ΄ (GREEK TONOS)
53 0x00B5 µ (MICRO SIGN)
54 0x00B6 ¶ (PILCROW SIGN)
55 0x00B7 · (MIDDLE DOT)
56 0x0388 Έ (GREEK CAPITAL LETTER EPSILON WITH TONOS)
57 0x0389 Ή (GREEK CAPITAL LETTER ETA WITH TONOS)
58 0x038A Ί (GREEK CAPITAL LETTER IOTA WITH TONOS)
59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK)
60 0x038C Ό (GREEK CAPITAL LETTER OMICRON WITH TONOS)
61 0x00BD ½ (VULGAR FRACTION ONE HALF)
62 0x038E Ύ (GREEK CAPITAL LETTER UPSILON WITH TONOS)
63 0x038F Ώ (GREEK CAPITAL LETTER OMEGA WITH TONOS)
64 0x0390 ΐ (GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS)
65 0x0391 Α (GREEK CAPITAL LETTER ALPHA)
66 0x0392 Β (GREEK CAPITAL LETTER BETA)
67 0x0393 Γ (GREEK CAPITAL LETTER GAMMA)
68 0x0394 Δ (GREEK CAPITAL LETTER DELTA)
69 0x0395 Ε (GREEK CAPITAL LETTER EPSILON)
70 0x0396 Ζ (GREEK CAPITAL LETTER ZETA)
71 0x0397 Η (GREEK CAPITAL LETTER ETA)
72 0x0398 Θ (GREEK CAPITAL LETTER THETA)
73 0x0399 Ι (GREEK CAPITAL LETTER IOTA)
74 0x039A Κ (GREEK CAPITAL LETTER KAPPA)
75 0x039B Λ (GREEK CAPITAL LETTER LAMDA)
76 0x039C Μ (GREEK CAPITAL LETTER MU)
77 0x039D Ν (GREEK CAPITAL LETTER NU)
78 0x039E Ξ (GREEK CAPITAL LETTER XI)
79 0x039F Ο (GREEK CAPITAL LETTER OMICRON)
80 0x03A0 Π (GREEK CAPITAL LETTER PI)
81 0x03A1 Ρ (GREEK CAPITAL LETTER RHO)
83 0x03A3 Σ (GREEK CAPITAL LETTER SIGMA)
84 0x03A4 Τ (GREEK CAPITAL LETTER TAU)
85 0x03A5 Υ (GREEK CAPITAL LETTER UPSILON)
86 0x03A6 Φ (GREEK CAPITAL LETTER PHI)
87 0x03A7 Χ (GREEK CAPITAL LETTER CHI)
88 0x03A8 Ψ (GREEK CAPITAL LETTER PSI)
89 0x03A9 Ω (GREEK CAPITAL LETTER OMEGA)
90 0x03AA Ϊ (GREEK CAPITAL LETTER IOTA WITH DIALYTIKA)
91 0x03AB Ϋ (GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA)
92 0x03AC ά (GREEK SMALL LETTER ALPHA WITH TONOS)
93 0x03AD έ (GREEK SMALL LETTER EPSILON WITH TONOS)
94 0x03AE ή (GREEK SMALL LETTER ETA WITH TONOS)
95 0x03AF ί (GREEK SMALL LETTER IOTA WITH TONOS)
96 0x03B0 ΰ (GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS)
97 0x03B1 α (GREEK SMALL LETTER ALPHA)
98 0x03B2 β (GREEK SMALL LETTER BETA)
99 0x03B3 γ (GREEK SMALL LETTER GAMMA)
100 0x03B4 δ (GREEK SMALL LETTER DELTA)
101 0x03B5 ε (GREEK SMALL LETTER EPSILON)
102 0x03B6 ζ (GREEK SMALL LETTER ZETA)
103 0x03B7 η (GREEK SMALL LETTER ETA)
104 0x03B8 θ (GREEK SMALL LETTER THETA)
105 0x03B9 ι (GREEK SMALL LETTER IOTA)
106 0x03BA κ (GREEK SMALL LETTER KAPPA)
107 0x03BB λ (GREEK SMALL LETTER LAMDA)
108 0x03BC μ (GREEK SMALL LETTER MU)
109 0x03BD ν (GREEK SMALL LETTER NU)
110 0x03BE ξ (GREEK SMALL LETTER XI)
111 0x03BF ο (GREEK SMALL LETTER OMICRON)
112 0x03C0 π (GREEK SMALL LETTER PI)
113 0x03C1 ρ (GREEK SMALL LETTER RHO)
114 0x03C2 ς (GREEK SMALL LETTER FINAL SIGMA)
115 0x03C3 σ (GREEK SMALL LETTER SIGMA)
116 0x03C4 τ (GREEK SMALL LETTER TAU)
117 0x03C5 υ (GREEK SMALL LETTER UPSILON)
118 0x03C6 φ (GREEK SMALL LETTER PHI)
119 0x03C7 χ (GREEK SMALL LETTER CHI)
120 0x03C8 ψ (GREEK SMALL LETTER PSI)
121 0x03C9 ω (GREEK SMALL LETTER OMEGA)
122 0x03CA ϊ (GREEK SMALL LETTER IOTA WITH DIALYTIKA)
123 0x03CB ϋ (GREEK SMALL LETTER UPSILON WITH DIALYTIKA)
124 0x03CC ό (GREEK SMALL LETTER OMICRON WITH TONOS)
125 0x03CD ύ (GREEK SMALL LETTER UPSILON WITH TONOS)
126 0x03CE ώ (GREEK SMALL LETTER OMEGA WITH TONOS)

View File

@ -1,134 +0,0 @@
# For details on index index-windows-1254.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: e80a27adf377438be8ba5bd223875ea56d6a4d47f958cce1c957a2c446825caa
# Date: 2018-01-06
0 0x20AC € (EURO SIGN)
1 0x0081  (<control>)
2 0x201A (SINGLE LOW-9 QUOTATION MARK)
3 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK)
4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK)
5 0x2026 … (HORIZONTAL ELLIPSIS)
6 0x2020 † (DAGGER)
7 0x2021 ‡ (DOUBLE DAGGER)
8 0x02C6 ˆ (MODIFIER LETTER CIRCUMFLEX ACCENT)
9 0x2030 ‰ (PER MILLE SIGN)
10 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON)
11 0x2039 (SINGLE LEFT-POINTING ANGLE QUOTATION MARK)
12 0x0152 Œ (LATIN CAPITAL LIGATURE OE)
13 0x008D  (<control>)
14 0x008E Ž (<control>)
15 0x008F  (<control>)
16 0x0090  (<control>)
17 0x2018 (LEFT SINGLE QUOTATION MARK)
18 0x2019 (RIGHT SINGLE QUOTATION MARK)
19 0x201C “ (LEFT DOUBLE QUOTATION MARK)
20 0x201D ” (RIGHT DOUBLE QUOTATION MARK)
21 0x2022 • (BULLET)
22 0x2013 (EN DASH)
23 0x2014 — (EM DASH)
24 0x02DC ˜ (SMALL TILDE)
25 0x2122 ™ (TRADE MARK SIGN)
26 0x0161 š (LATIN SMALL LETTER S WITH CARON)
27 0x203A (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK)
28 0x0153 œ (LATIN SMALL LIGATURE OE)
29 0x009D  (<control>)
30 0x009E ž (<control>)
31 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS)
32 0x00A0   (NO-BREAK SPACE)
33 0x00A1 ¡ (INVERTED EXCLAMATION MARK)
34 0x00A2 ¢ (CENT SIGN)
35 0x00A3 £ (POUND SIGN)
36 0x00A4 ¤ (CURRENCY SIGN)
37 0x00A5 ¥ (YEN SIGN)
38 0x00A6 ¦ (BROKEN BAR)
39 0x00A7 § (SECTION SIGN)
40 0x00A8 ¨ (DIAERESIS)
41 0x00A9 © (COPYRIGHT SIGN)
42 0x00AA ª (FEMININE ORDINAL INDICATOR)
43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK)
44 0x00AC ¬ (NOT SIGN)
45 0x00AD ­ (SOFT HYPHEN)
46 0x00AE ® (REGISTERED SIGN)
47 0x00AF ¯ (MACRON)
48 0x00B0 ° (DEGREE SIGN)
49 0x00B1 ± (PLUS-MINUS SIGN)
50 0x00B2 ² (SUPERSCRIPT TWO)
51 0x00B3 ³ (SUPERSCRIPT THREE)
52 0x00B4 ´ (ACUTE ACCENT)
53 0x00B5 µ (MICRO SIGN)
54 0x00B6 ¶ (PILCROW SIGN)
55 0x00B7 · (MIDDLE DOT)
56 0x00B8 ¸ (CEDILLA)
57 0x00B9 ¹ (SUPERSCRIPT ONE)
58 0x00BA º (MASCULINE ORDINAL INDICATOR)
59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK)
60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER)
61 0x00BD ½ (VULGAR FRACTION ONE HALF)
62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS)
63 0x00BF ¿ (INVERTED QUESTION MARK)
64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE)
65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE)
66 0x00C2 Â (LATIN CAPITAL LETTER A WITH CIRCUMFLEX)
67 0x00C3 Ã (LATIN CAPITAL LETTER A WITH TILDE)
68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS)
69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE)
70 0x00C6 Æ (LATIN CAPITAL LETTER AE)
71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA)
72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE)
73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE)
74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX)
75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS)
76 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE)
77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE)
78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX)
79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS)
80 0x011E Ğ (LATIN CAPITAL LETTER G WITH BREVE)
81 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE)
82 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE)
83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE)
84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX)
85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE)
86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS)
87 0x00D7 × (MULTIPLICATION SIGN)
88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE)
89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE)
90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE)
91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX)
92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS)
93 0x0130 İ (LATIN CAPITAL LETTER I WITH DOT ABOVE)
94 0x015E Ş (LATIN CAPITAL LETTER S WITH CEDILLA)
95 0x00DF ß (LATIN SMALL LETTER SHARP S)
96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE)
97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE)
98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX)
99 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE)
100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS)
101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE)
102 0x00E6 æ (LATIN SMALL LETTER AE)
103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA)
104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE)
105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE)
106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX)
107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS)
108 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE)
109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE)
110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX)
111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS)
112 0x011F ğ (LATIN SMALL LETTER G WITH BREVE)
113 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE)
114 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE)
115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE)
116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX)
117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE)
118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS)
119 0x00F7 ÷ (DIVISION SIGN)
120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE)
121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE)
122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE)
123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX)
124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS)
125 0x0131 ı (LATIN SMALL LETTER DOTLESS I)
126 0x015F ş (LATIN SMALL LETTER S WITH CEDILLA)
127 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS)

View File

@ -1,124 +0,0 @@
# For details on index index-windows-1255.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: cd7fb43c97eefa1651084d92d02af53ad668bd848528c18c3b1af5c06b499651
# Date: 2018-01-06
0 0x20AC € (EURO SIGN)
1 0x0081  (<control>)
2 0x201A (SINGLE LOW-9 QUOTATION MARK)
3 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK)
4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK)
5 0x2026 … (HORIZONTAL ELLIPSIS)
6 0x2020 † (DAGGER)
7 0x2021 ‡ (DOUBLE DAGGER)
8 0x02C6 ˆ (MODIFIER LETTER CIRCUMFLEX ACCENT)
9 0x2030 ‰ (PER MILLE SIGN)
10 0x008A Š (<control>)
11 0x2039 (SINGLE LEFT-POINTING ANGLE QUOTATION MARK)
12 0x008C Œ (<control>)
13 0x008D  (<control>)
14 0x008E Ž (<control>)
15 0x008F  (<control>)
16 0x0090  (<control>)
17 0x2018 (LEFT SINGLE QUOTATION MARK)
18 0x2019 (RIGHT SINGLE QUOTATION MARK)
19 0x201C “ (LEFT DOUBLE QUOTATION MARK)
20 0x201D ” (RIGHT DOUBLE QUOTATION MARK)
21 0x2022 • (BULLET)
22 0x2013 (EN DASH)
23 0x2014 — (EM DASH)
24 0x02DC ˜ (SMALL TILDE)
25 0x2122 ™ (TRADE MARK SIGN)
26 0x009A š (<control>)
27 0x203A (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK)
28 0x009C œ (<control>)
29 0x009D  (<control>)
30 0x009E ž (<control>)
31 0x009F Ÿ (<control>)
32 0x00A0   (NO-BREAK SPACE)
33 0x00A1 ¡ (INVERTED EXCLAMATION MARK)
34 0x00A2 ¢ (CENT SIGN)
35 0x00A3 £ (POUND SIGN)
36 0x20AA ₪ (NEW SHEQEL SIGN)
37 0x00A5 ¥ (YEN SIGN)
38 0x00A6 ¦ (BROKEN BAR)
39 0x00A7 § (SECTION SIGN)
40 0x00A8 ¨ (DIAERESIS)
41 0x00A9 © (COPYRIGHT SIGN)
42 0x00D7 × (MULTIPLICATION SIGN)
43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK)
44 0x00AC ¬ (NOT SIGN)
45 0x00AD ­ (SOFT HYPHEN)
46 0x00AE ® (REGISTERED SIGN)
47 0x00AF ¯ (MACRON)
48 0x00B0 ° (DEGREE SIGN)
49 0x00B1 ± (PLUS-MINUS SIGN)
50 0x00B2 ² (SUPERSCRIPT TWO)
51 0x00B3 ³ (SUPERSCRIPT THREE)
52 0x00B4 ´ (ACUTE ACCENT)
53 0x00B5 µ (MICRO SIGN)
54 0x00B6 ¶ (PILCROW SIGN)
55 0x00B7 · (MIDDLE DOT)
56 0x00B8 ¸ (CEDILLA)
57 0x00B9 ¹ (SUPERSCRIPT ONE)
58 0x00F7 ÷ (DIVISION SIGN)
59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK)
60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER)
61 0x00BD ½ (VULGAR FRACTION ONE HALF)
62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS)
63 0x00BF ¿ (INVERTED QUESTION MARK)
64 0x05B0 ְ (HEBREW POINT SHEVA)
65 0x05B1 ֱ (HEBREW POINT HATAF SEGOL)
66 0x05B2 ֲ (HEBREW POINT HATAF PATAH)
67 0x05B3 ֳ (HEBREW POINT HATAF QAMATS)
68 0x05B4 ִ (HEBREW POINT HIRIQ)
69 0x05B5 ֵ (HEBREW POINT TSERE)
70 0x05B6 ֶ (HEBREW POINT SEGOL)
71 0x05B7 ַ (HEBREW POINT PATAH)
72 0x05B8 ָ (HEBREW POINT QAMATS)
73 0x05B9 ֹ (HEBREW POINT HOLAM)
74 0x05BA ֺ (HEBREW POINT HOLAM HASER FOR VAV)
75 0x05BB ֻ (HEBREW POINT QUBUTS)
76 0x05BC ּ (HEBREW POINT DAGESH OR MAPIQ)
77 0x05BD ֽ (HEBREW POINT METEG)
78 0x05BE ־ (HEBREW PUNCTUATION MAQAF)
79 0x05BF ֿ (HEBREW POINT RAFE)
80 0x05C0 ׀ (HEBREW PUNCTUATION PASEQ)
81 0x05C1 ׁ (HEBREW POINT SHIN DOT)
82 0x05C2 ׂ (HEBREW POINT SIN DOT)
83 0x05C3 ׃ (HEBREW PUNCTUATION SOF PASUQ)
84 0x05F0 װ (HEBREW LIGATURE YIDDISH DOUBLE VAV)
85 0x05F1 ױ (HEBREW LIGATURE YIDDISH VAV YOD)
86 0x05F2 ײ (HEBREW LIGATURE YIDDISH DOUBLE YOD)
87 0x05F3 ׳ (HEBREW PUNCTUATION GERESH)
88 0x05F4 ״ (HEBREW PUNCTUATION GERSHAYIM)
96 0x05D0 א (HEBREW LETTER ALEF)
97 0x05D1 ב (HEBREW LETTER BET)
98 0x05D2 ג (HEBREW LETTER GIMEL)
99 0x05D3 ד (HEBREW LETTER DALET)
100 0x05D4 ה (HEBREW LETTER HE)
101 0x05D5 ו (HEBREW LETTER VAV)
102 0x05D6 ז (HEBREW LETTER ZAYIN)
103 0x05D7 ח (HEBREW LETTER HET)
104 0x05D8 ט (HEBREW LETTER TET)
105 0x05D9 י (HEBREW LETTER YOD)
106 0x05DA ך (HEBREW LETTER FINAL KAF)
107 0x05DB כ (HEBREW LETTER KAF)
108 0x05DC ל (HEBREW LETTER LAMED)
109 0x05DD ם (HEBREW LETTER FINAL MEM)
110 0x05DE מ (HEBREW LETTER MEM)
111 0x05DF ן (HEBREW LETTER FINAL NUN)
112 0x05E0 נ (HEBREW LETTER NUN)
113 0x05E1 ס (HEBREW LETTER SAMEKH)
114 0x05E2 ע (HEBREW LETTER AYIN)
115 0x05E3 ף (HEBREW LETTER FINAL PE)
116 0x05E4 פ (HEBREW LETTER PE)
117 0x05E5 ץ (HEBREW LETTER FINAL TSADI)
118 0x05E6 צ (HEBREW LETTER TSADI)
119 0x05E7 ק (HEBREW LETTER QOF)
120 0x05E8 ר (HEBREW LETTER RESH)
121 0x05E9 ש (HEBREW LETTER SHIN)
122 0x05EA ת (HEBREW LETTER TAV)
125 0x200E (LEFT-TO-RIGHT MARK)
126 0x200F (RIGHT-TO-LEFT MARK)

View File

@ -1,134 +0,0 @@
# For details on index index-windows-1256.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: 161bdb381f16408e8bebcc8f5310c4190af0e359de8d9bbaa3628ce2f0875509
# Date: 2018-01-06
0 0x20AC € (EURO SIGN)
1 0x067E پ (ARABIC LETTER PEH)
2 0x201A (SINGLE LOW-9 QUOTATION MARK)
3 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK)
4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK)
5 0x2026 … (HORIZONTAL ELLIPSIS)
6 0x2020 † (DAGGER)
7 0x2021 ‡ (DOUBLE DAGGER)
8 0x02C6 ˆ (MODIFIER LETTER CIRCUMFLEX ACCENT)
9 0x2030 ‰ (PER MILLE SIGN)
10 0x0679 ٹ (ARABIC LETTER TTEH)
11 0x2039 (SINGLE LEFT-POINTING ANGLE QUOTATION MARK)
12 0x0152 Œ (LATIN CAPITAL LIGATURE OE)
13 0x0686 چ (ARABIC LETTER TCHEH)
14 0x0698 ژ (ARABIC LETTER JEH)
15 0x0688 ڈ (ARABIC LETTER DDAL)
16 0x06AF گ (ARABIC LETTER GAF)
17 0x2018 (LEFT SINGLE QUOTATION MARK)
18 0x2019 (RIGHT SINGLE QUOTATION MARK)
19 0x201C “ (LEFT DOUBLE QUOTATION MARK)
20 0x201D ” (RIGHT DOUBLE QUOTATION MARK)
21 0x2022 • (BULLET)
22 0x2013 (EN DASH)
23 0x2014 — (EM DASH)
24 0x06A9 ک (ARABIC LETTER KEHEH)
25 0x2122 ™ (TRADE MARK SIGN)
26 0x0691 ڑ (ARABIC LETTER RREH)
27 0x203A (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK)
28 0x0153 œ (LATIN SMALL LIGATURE OE)
29 0x200C (ZERO WIDTH NON-JOINER)
30 0x200D (ZERO WIDTH JOINER)
31 0x06BA ں (ARABIC LETTER NOON GHUNNA)
32 0x00A0   (NO-BREAK SPACE)
33 0x060C ، (ARABIC COMMA)
34 0x00A2 ¢ (CENT SIGN)
35 0x00A3 £ (POUND SIGN)
36 0x00A4 ¤ (CURRENCY SIGN)
37 0x00A5 ¥ (YEN SIGN)
38 0x00A6 ¦ (BROKEN BAR)
39 0x00A7 § (SECTION SIGN)
40 0x00A8 ¨ (DIAERESIS)
41 0x00A9 © (COPYRIGHT SIGN)
42 0x06BE ھ (ARABIC LETTER HEH DOACHASHMEE)
43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK)
44 0x00AC ¬ (NOT SIGN)
45 0x00AD ­ (SOFT HYPHEN)
46 0x00AE ® (REGISTERED SIGN)
47 0x00AF ¯ (MACRON)
48 0x00B0 ° (DEGREE SIGN)
49 0x00B1 ± (PLUS-MINUS SIGN)
50 0x00B2 ² (SUPERSCRIPT TWO)
51 0x00B3 ³ (SUPERSCRIPT THREE)
52 0x00B4 ´ (ACUTE ACCENT)
53 0x00B5 µ (MICRO SIGN)
54 0x00B6 ¶ (PILCROW SIGN)
55 0x00B7 · (MIDDLE DOT)
56 0x00B8 ¸ (CEDILLA)
57 0x00B9 ¹ (SUPERSCRIPT ONE)
58 0x061B ؛ (ARABIC SEMICOLON)
59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK)
60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER)
61 0x00BD ½ (VULGAR FRACTION ONE HALF)
62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS)
63 0x061F ؟ (ARABIC QUESTION MARK)
64 0x06C1 ہ (ARABIC LETTER HEH GOAL)
65 0x0621 ء (ARABIC LETTER HAMZA)
66 0x0622 آ (ARABIC LETTER ALEF WITH MADDA ABOVE)
67 0x0623 أ (ARABIC LETTER ALEF WITH HAMZA ABOVE)
68 0x0624 ؤ (ARABIC LETTER WAW WITH HAMZA ABOVE)
69 0x0625 إ (ARABIC LETTER ALEF WITH HAMZA BELOW)
70 0x0626 ئ (ARABIC LETTER YEH WITH HAMZA ABOVE)
71 0x0627 ا (ARABIC LETTER ALEF)
72 0x0628 ب (ARABIC LETTER BEH)
73 0x0629 ة (ARABIC LETTER TEH MARBUTA)
74 0x062A ت (ARABIC LETTER TEH)
75 0x062B ث (ARABIC LETTER THEH)
76 0x062C ج (ARABIC LETTER JEEM)
77 0x062D ح (ARABIC LETTER HAH)
78 0x062E خ (ARABIC LETTER KHAH)
79 0x062F د (ARABIC LETTER DAL)
80 0x0630 ذ (ARABIC LETTER THAL)
81 0x0631 ر (ARABIC LETTER REH)
82 0x0632 ز (ARABIC LETTER ZAIN)
83 0x0633 س (ARABIC LETTER SEEN)
84 0x0634 ش (ARABIC LETTER SHEEN)
85 0x0635 ص (ARABIC LETTER SAD)
86 0x0636 ض (ARABIC LETTER DAD)
87 0x00D7 × (MULTIPLICATION SIGN)
88 0x0637 ط (ARABIC LETTER TAH)
89 0x0638 ظ (ARABIC LETTER ZAH)
90 0x0639 ع (ARABIC LETTER AIN)
91 0x063A غ (ARABIC LETTER GHAIN)
92 0x0640 ـ (ARABIC TATWEEL)
93 0x0641 ف (ARABIC LETTER FEH)
94 0x0642 ق (ARABIC LETTER QAF)
95 0x0643 ك (ARABIC LETTER KAF)
96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE)
97 0x0644 ل (ARABIC LETTER LAM)
98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX)
99 0x0645 م (ARABIC LETTER MEEM)
100 0x0646 ن (ARABIC LETTER NOON)
101 0x0647 ه (ARABIC LETTER HEH)
102 0x0648 و (ARABIC LETTER WAW)
103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA)
104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE)
105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE)
106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX)
107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS)
108 0x0649 ى (ARABIC LETTER ALEF MAKSURA)
109 0x064A ي (ARABIC LETTER YEH)
110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX)
111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS)
112 0x064B ً (ARABIC FATHATAN)
113 0x064C ٌ (ARABIC DAMMATAN)
114 0x064D ٍ (ARABIC KASRATAN)
115 0x064E َ (ARABIC FATHA)
116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX)
117 0x064F ُ (ARABIC DAMMA)
118 0x0650 ِ (ARABIC KASRA)
119 0x00F7 ÷ (DIVISION SIGN)
120 0x0651 ّ (ARABIC SHADDA)
121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE)
122 0x0652 ْ (ARABIC SUKUN)
123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX)
124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS)
125 0x200E (LEFT-TO-RIGHT MARK)
126 0x200F (RIGHT-TO-LEFT MARK)
127 0x06D2 ے (ARABIC LETTER YEH BARREE)

View File

@ -1,132 +0,0 @@
# For details on index index-windows-1257.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: cc7256bdd10a5b8dc7fb6f994659f307dfcae60def9aa6c29d811f85e2842c47
# Date: 2018-01-06
0 0x20AC € (EURO SIGN)
1 0x0081  (<control>)
2 0x201A (SINGLE LOW-9 QUOTATION MARK)
3 0x0083 ƒ (<control>)
4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK)
5 0x2026 … (HORIZONTAL ELLIPSIS)
6 0x2020 † (DAGGER)
7 0x2021 ‡ (DOUBLE DAGGER)
8 0x0088 ˆ (<control>)
9 0x2030 ‰ (PER MILLE SIGN)
10 0x008A Š (<control>)
11 0x2039 (SINGLE LEFT-POINTING ANGLE QUOTATION MARK)
12 0x008C Œ (<control>)
13 0x00A8 ¨ (DIAERESIS)
14 0x02C7 ˇ (CARON)
15 0x00B8 ¸ (CEDILLA)
16 0x0090  (<control>)
17 0x2018 (LEFT SINGLE QUOTATION MARK)
18 0x2019 (RIGHT SINGLE QUOTATION MARK)
19 0x201C “ (LEFT DOUBLE QUOTATION MARK)
20 0x201D ” (RIGHT DOUBLE QUOTATION MARK)
21 0x2022 • (BULLET)
22 0x2013 (EN DASH)
23 0x2014 — (EM DASH)
24 0x0098 ˜ (<control>)
25 0x2122 ™ (TRADE MARK SIGN)
26 0x009A š (<control>)
27 0x203A (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK)
28 0x009C œ (<control>)
29 0x00AF ¯ (MACRON)
30 0x02DB ˛ (OGONEK)
31 0x009F Ÿ (<control>)
32 0x00A0   (NO-BREAK SPACE)
34 0x00A2 ¢ (CENT SIGN)
35 0x00A3 £ (POUND SIGN)
36 0x00A4 ¤ (CURRENCY SIGN)
38 0x00A6 ¦ (BROKEN BAR)
39 0x00A7 § (SECTION SIGN)
40 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE)
41 0x00A9 © (COPYRIGHT SIGN)
42 0x0156 Ŗ (LATIN CAPITAL LETTER R WITH CEDILLA)
43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK)
44 0x00AC ¬ (NOT SIGN)
45 0x00AD ­ (SOFT HYPHEN)
46 0x00AE ® (REGISTERED SIGN)
47 0x00C6 Æ (LATIN CAPITAL LETTER AE)
48 0x00B0 ° (DEGREE SIGN)
49 0x00B1 ± (PLUS-MINUS SIGN)
50 0x00B2 ² (SUPERSCRIPT TWO)
51 0x00B3 ³ (SUPERSCRIPT THREE)
52 0x00B4 ´ (ACUTE ACCENT)
53 0x00B5 µ (MICRO SIGN)
54 0x00B6 ¶ (PILCROW SIGN)
55 0x00B7 · (MIDDLE DOT)
56 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE)
57 0x00B9 ¹ (SUPERSCRIPT ONE)
58 0x0157 ŗ (LATIN SMALL LETTER R WITH CEDILLA)
59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK)
60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER)
61 0x00BD ½ (VULGAR FRACTION ONE HALF)
62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS)
63 0x00E6 æ (LATIN SMALL LETTER AE)
64 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK)
65 0x012E Į (LATIN CAPITAL LETTER I WITH OGONEK)
66 0x0100 Ā (LATIN CAPITAL LETTER A WITH MACRON)
67 0x0106 Ć (LATIN CAPITAL LETTER C WITH ACUTE)
68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS)
69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE)
70 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK)
71 0x0112 Ē (LATIN CAPITAL LETTER E WITH MACRON)
72 0x010C Č (LATIN CAPITAL LETTER C WITH CARON)
73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE)
74 0x0179 Ź (LATIN CAPITAL LETTER Z WITH ACUTE)
75 0x0116 Ė (LATIN CAPITAL LETTER E WITH DOT ABOVE)
76 0x0122 Ģ (LATIN CAPITAL LETTER G WITH CEDILLA)
77 0x0136 Ķ (LATIN CAPITAL LETTER K WITH CEDILLA)
78 0x012A Ī (LATIN CAPITAL LETTER I WITH MACRON)
79 0x013B Ļ (LATIN CAPITAL LETTER L WITH CEDILLA)
80 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON)
81 0x0143 Ń (LATIN CAPITAL LETTER N WITH ACUTE)
82 0x0145 Ņ (LATIN CAPITAL LETTER N WITH CEDILLA)
83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE)
84 0x014C Ō (LATIN CAPITAL LETTER O WITH MACRON)
85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE)
86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS)
87 0x00D7 × (MULTIPLICATION SIGN)
88 0x0172 Ų (LATIN CAPITAL LETTER U WITH OGONEK)
89 0x0141 Ł (LATIN CAPITAL LETTER L WITH STROKE)
90 0x015A Ś (LATIN CAPITAL LETTER S WITH ACUTE)
91 0x016A Ū (LATIN CAPITAL LETTER U WITH MACRON)
92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS)
93 0x017B Ż (LATIN CAPITAL LETTER Z WITH DOT ABOVE)
94 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON)
95 0x00DF ß (LATIN SMALL LETTER SHARP S)
96 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK)
97 0x012F į (LATIN SMALL LETTER I WITH OGONEK)
98 0x0101 ā (LATIN SMALL LETTER A WITH MACRON)
99 0x0107 ć (LATIN SMALL LETTER C WITH ACUTE)
100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS)
101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE)
102 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK)
103 0x0113 ē (LATIN SMALL LETTER E WITH MACRON)
104 0x010D č (LATIN SMALL LETTER C WITH CARON)
105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE)
106 0x017A ź (LATIN SMALL LETTER Z WITH ACUTE)
107 0x0117 ė (LATIN SMALL LETTER E WITH DOT ABOVE)
108 0x0123 ģ (LATIN SMALL LETTER G WITH CEDILLA)
109 0x0137 ķ (LATIN SMALL LETTER K WITH CEDILLA)
110 0x012B ī (LATIN SMALL LETTER I WITH MACRON)
111 0x013C ļ (LATIN SMALL LETTER L WITH CEDILLA)
112 0x0161 š (LATIN SMALL LETTER S WITH CARON)
113 0x0144 ń (LATIN SMALL LETTER N WITH ACUTE)
114 0x0146 ņ (LATIN SMALL LETTER N WITH CEDILLA)
115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE)
116 0x014D ō (LATIN SMALL LETTER O WITH MACRON)
117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE)
118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS)
119 0x00F7 ÷ (DIVISION SIGN)
120 0x0173 ų (LATIN SMALL LETTER U WITH OGONEK)
121 0x0142 ł (LATIN SMALL LETTER L WITH STROKE)
122 0x015B ś (LATIN SMALL LETTER S WITH ACUTE)
123 0x016B ū (LATIN SMALL LETTER U WITH MACRON)
124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS)
125 0x017C ż (LATIN SMALL LETTER Z WITH DOT ABOVE)
126 0x017E ž (LATIN SMALL LETTER Z WITH CARON)
127 0x02D9 ˙ (DOT ABOVE)

View File

@ -1,134 +0,0 @@
# For details on index index-windows-1258.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: 198bacedfcf24390e219240a7b776b6cec34cff070330b08a601a69c67f7eb24
# Date: 2018-01-06
0 0x20AC € (EURO SIGN)
1 0x0081  (<control>)
2 0x201A (SINGLE LOW-9 QUOTATION MARK)
3 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK)
4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK)
5 0x2026 … (HORIZONTAL ELLIPSIS)
6 0x2020 † (DAGGER)
7 0x2021 ‡ (DOUBLE DAGGER)
8 0x02C6 ˆ (MODIFIER LETTER CIRCUMFLEX ACCENT)
9 0x2030 ‰ (PER MILLE SIGN)
10 0x008A Š (<control>)
11 0x2039 (SINGLE LEFT-POINTING ANGLE QUOTATION MARK)
12 0x0152 Œ (LATIN CAPITAL LIGATURE OE)
13 0x008D  (<control>)
14 0x008E Ž (<control>)
15 0x008F  (<control>)
16 0x0090  (<control>)
17 0x2018 (LEFT SINGLE QUOTATION MARK)
18 0x2019 (RIGHT SINGLE QUOTATION MARK)
19 0x201C “ (LEFT DOUBLE QUOTATION MARK)
20 0x201D ” (RIGHT DOUBLE QUOTATION MARK)
21 0x2022 • (BULLET)
22 0x2013 (EN DASH)
23 0x2014 — (EM DASH)
24 0x02DC ˜ (SMALL TILDE)
25 0x2122 ™ (TRADE MARK SIGN)
26 0x009A š (<control>)
27 0x203A (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK)
28 0x0153 œ (LATIN SMALL LIGATURE OE)
29 0x009D  (<control>)
30 0x009E ž (<control>)
31 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS)
32 0x00A0   (NO-BREAK SPACE)
33 0x00A1 ¡ (INVERTED EXCLAMATION MARK)
34 0x00A2 ¢ (CENT SIGN)
35 0x00A3 £ (POUND SIGN)
36 0x00A4 ¤ (CURRENCY SIGN)
37 0x00A5 ¥ (YEN SIGN)
38 0x00A6 ¦ (BROKEN BAR)
39 0x00A7 § (SECTION SIGN)
40 0x00A8 ¨ (DIAERESIS)
41 0x00A9 © (COPYRIGHT SIGN)
42 0x00AA ª (FEMININE ORDINAL INDICATOR)
43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK)
44 0x00AC ¬ (NOT SIGN)
45 0x00AD ­ (SOFT HYPHEN)
46 0x00AE ® (REGISTERED SIGN)
47 0x00AF ¯ (MACRON)
48 0x00B0 ° (DEGREE SIGN)
49 0x00B1 ± (PLUS-MINUS SIGN)
50 0x00B2 ² (SUPERSCRIPT TWO)
51 0x00B3 ³ (SUPERSCRIPT THREE)
52 0x00B4 ´ (ACUTE ACCENT)
53 0x00B5 µ (MICRO SIGN)
54 0x00B6 ¶ (PILCROW SIGN)
55 0x00B7 · (MIDDLE DOT)
56 0x00B8 ¸ (CEDILLA)
57 0x00B9 ¹ (SUPERSCRIPT ONE)
58 0x00BA º (MASCULINE ORDINAL INDICATOR)
59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK)
60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER)
61 0x00BD ½ (VULGAR FRACTION ONE HALF)
62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS)
63 0x00BF ¿ (INVERTED QUESTION MARK)
64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE)
65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE)
66 0x00C2 Â (LATIN CAPITAL LETTER A WITH CIRCUMFLEX)
67 0x0102 Ă (LATIN CAPITAL LETTER A WITH BREVE)
68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS)
69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE)
70 0x00C6 Æ (LATIN CAPITAL LETTER AE)
71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA)
72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE)
73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE)
74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX)
75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS)
76 0x0300 ̀ (COMBINING GRAVE ACCENT)
77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE)
78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX)
79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS)
80 0x0110 Đ (LATIN CAPITAL LETTER D WITH STROKE)
81 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE)
82 0x0309 ̉ (COMBINING HOOK ABOVE)
83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE)
84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX)
85 0x01A0 Ơ (LATIN CAPITAL LETTER O WITH HORN)
86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS)
87 0x00D7 × (MULTIPLICATION SIGN)
88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE)
89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE)
90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE)
91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX)
92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS)
93 0x01AF Ư (LATIN CAPITAL LETTER U WITH HORN)
94 0x0303 ̃ (COMBINING TILDE)
95 0x00DF ß (LATIN SMALL LETTER SHARP S)
96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE)
97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE)
98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX)
99 0x0103 ă (LATIN SMALL LETTER A WITH BREVE)
100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS)
101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE)
102 0x00E6 æ (LATIN SMALL LETTER AE)
103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA)
104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE)
105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE)
106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX)
107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS)
108 0x0301 ́ (COMBINING ACUTE ACCENT)
109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE)
110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX)
111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS)
112 0x0111 đ (LATIN SMALL LETTER D WITH STROKE)
113 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE)
114 0x0323 ̣ (COMBINING DOT BELOW)
115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE)
116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX)
117 0x01A1 ơ (LATIN SMALL LETTER O WITH HORN)
118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS)
119 0x00F7 ÷ (DIVISION SIGN)
120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE)
121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE)
122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE)
123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX)
124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS)
125 0x01B0 ư (LATIN SMALL LETTER U WITH HORN)
126 0x20AB ₫ (DONG SIGN)
127 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS)

View File

@ -1,126 +0,0 @@
# For details on index index-windows-874.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: b416583ce125e38474381b31b401a98b19ecf2e57e0998e78a1e18b14894905d
# Date: 2018-01-06
0 0x20AC € (EURO SIGN)
1 0x0081  (<control>)
2 0x0082 ‚ (<control>)
3 0x0083 ƒ (<control>)
4 0x0084 „ (<control>)
5 0x2026 … (HORIZONTAL ELLIPSIS)
6 0x0086 † (<control>)
7 0x0087 ‡ (<control>)
8 0x0088 ˆ (<control>)
9 0x0089 ‰ (<control>)
10 0x008A Š (<control>)
11 0x008B ‹ (<control>)
12 0x008C Œ (<control>)
13 0x008D  (<control>)
14 0x008E Ž (<control>)
15 0x008F  (<control>)
16 0x0090  (<control>)
17 0x2018 (LEFT SINGLE QUOTATION MARK)
18 0x2019 (RIGHT SINGLE QUOTATION MARK)
19 0x201C “ (LEFT DOUBLE QUOTATION MARK)
20 0x201D ” (RIGHT DOUBLE QUOTATION MARK)
21 0x2022 • (BULLET)
22 0x2013 (EN DASH)
23 0x2014 — (EM DASH)
24 0x0098 ˜ (<control>)
25 0x0099 ™ (<control>)
26 0x009A š (<control>)
27 0x009B › (<control>)
28 0x009C œ (<control>)
29 0x009D  (<control>)
30 0x009E ž (<control>)
31 0x009F Ÿ (<control>)
32 0x00A0   (NO-BREAK SPACE)
33 0x0E01 ก (THAI CHARACTER KO KAI)
34 0x0E02 ข (THAI CHARACTER KHO KHAI)
35 0x0E03 ฃ (THAI CHARACTER KHO KHUAT)
36 0x0E04 ค (THAI CHARACTER KHO KHWAI)
37 0x0E05 ฅ (THAI CHARACTER KHO KHON)
38 0x0E06 ฆ (THAI CHARACTER KHO RAKHANG)
39 0x0E07 ง (THAI CHARACTER NGO NGU)
40 0x0E08 จ (THAI CHARACTER CHO CHAN)
41 0x0E09 ฉ (THAI CHARACTER CHO CHING)
42 0x0E0A ช (THAI CHARACTER CHO CHANG)
43 0x0E0B ซ (THAI CHARACTER SO SO)
44 0x0E0C ฌ (THAI CHARACTER CHO CHOE)
45 0x0E0D ญ (THAI CHARACTER YO YING)
46 0x0E0E ฎ (THAI CHARACTER DO CHADA)
47 0x0E0F ฏ (THAI CHARACTER TO PATAK)
48 0x0E10 ฐ (THAI CHARACTER THO THAN)
49 0x0E11 ฑ (THAI CHARACTER THO NANGMONTHO)
50 0x0E12 ฒ (THAI CHARACTER THO PHUTHAO)
51 0x0E13 ณ (THAI CHARACTER NO NEN)
52 0x0E14 ด (THAI CHARACTER DO DEK)
53 0x0E15 ต (THAI CHARACTER TO TAO)
54 0x0E16 ถ (THAI CHARACTER THO THUNG)
55 0x0E17 ท (THAI CHARACTER THO THAHAN)
56 0x0E18 ธ (THAI CHARACTER THO THONG)
57 0x0E19 น (THAI CHARACTER NO NU)
58 0x0E1A บ (THAI CHARACTER BO BAIMAI)
59 0x0E1B ป (THAI CHARACTER PO PLA)
60 0x0E1C ผ (THAI CHARACTER PHO PHUNG)
61 0x0E1D ฝ (THAI CHARACTER FO FA)
62 0x0E1E พ (THAI CHARACTER PHO PHAN)
63 0x0E1F ฟ (THAI CHARACTER FO FAN)
64 0x0E20 ภ (THAI CHARACTER PHO SAMPHAO)
65 0x0E21 ม (THAI CHARACTER MO MA)
66 0x0E22 ย (THAI CHARACTER YO YAK)
67 0x0E23 ร (THAI CHARACTER RO RUA)
68 0x0E24 ฤ (THAI CHARACTER RU)
69 0x0E25 ล (THAI CHARACTER LO LING)
70 0x0E26 ฦ (THAI CHARACTER LU)
71 0x0E27 ว (THAI CHARACTER WO WAEN)
72 0x0E28 ศ (THAI CHARACTER SO SALA)
73 0x0E29 ษ (THAI CHARACTER SO RUSI)
74 0x0E2A ส (THAI CHARACTER SO SUA)
75 0x0E2B ห (THAI CHARACTER HO HIP)
76 0x0E2C ฬ (THAI CHARACTER LO CHULA)
77 0x0E2D อ (THAI CHARACTER O ANG)
78 0x0E2E ฮ (THAI CHARACTER HO NOKHUK)
79 0x0E2F ฯ (THAI CHARACTER PAIYANNOI)
80 0x0E30 ะ (THAI CHARACTER SARA A)
81 0x0E31 ั (THAI CHARACTER MAI HAN-AKAT)
82 0x0E32 า (THAI CHARACTER SARA AA)
83 0x0E33 ำ (THAI CHARACTER SARA AM)
84 0x0E34 ิ (THAI CHARACTER SARA I)
85 0x0E35 ี (THAI CHARACTER SARA II)
86 0x0E36 ึ (THAI CHARACTER SARA UE)
87 0x0E37 ื (THAI CHARACTER SARA UEE)
88 0x0E38 ุ (THAI CHARACTER SARA U)
89 0x0E39 ู (THAI CHARACTER SARA UU)
90 0x0E3A ฺ (THAI CHARACTER PHINTHU)
95 0x0E3F ฿ (THAI CURRENCY SYMBOL BAHT)
96 0x0E40 เ (THAI CHARACTER SARA E)
97 0x0E41 แ (THAI CHARACTER SARA AE)
98 0x0E42 โ (THAI CHARACTER SARA O)
99 0x0E43 ใ (THAI CHARACTER SARA AI MAIMUAN)
100 0x0E44 ไ (THAI CHARACTER SARA AI MAIMALAI)
101 0x0E45 ๅ (THAI CHARACTER LAKKHANGYAO)
102 0x0E46 ๆ (THAI CHARACTER MAIYAMOK)
103 0x0E47 ็ (THAI CHARACTER MAITAIKHU)
104 0x0E48 ่ (THAI CHARACTER MAI EK)
105 0x0E49 ้ (THAI CHARACTER MAI THO)
106 0x0E4A ๊ (THAI CHARACTER MAI TRI)
107 0x0E4B ๋ (THAI CHARACTER MAI CHATTAWA)
108 0x0E4C ์ (THAI CHARACTER THANTHAKHAT)
109 0x0E4D ํ (THAI CHARACTER NIKHAHIT)
110 0x0E4E ๎ (THAI CHARACTER YAMAKKAN)
111 0x0E4F ๏ (THAI CHARACTER FONGMAN)
112 0x0E50 (THAI DIGIT ZERO)
113 0x0E51 ๑ (THAI DIGIT ONE)
114 0x0E52 ๒ (THAI DIGIT TWO)
115 0x0E53 ๓ (THAI DIGIT THREE)
116 0x0E54 ๔ (THAI DIGIT FOUR)
117 0x0E55 ๕ (THAI DIGIT FIVE)
118 0x0E56 ๖ (THAI DIGIT SIX)
119 0x0E57 ๗ (THAI DIGIT SEVEN)
120 0x0E58 ๘ (THAI DIGIT EIGHT)
121 0x0E59 ๙ (THAI DIGIT NINE)
122 0x0E5A ๚ (THAI CHARACTER ANGKHANKHU)
123 0x0E5B ๛ (THAI CHARACTER KHOMUT)

View File

@ -1,134 +0,0 @@
# For details on index index-x-mac-cyrillic.txt see the Encoding Standard
# https://encoding.spec.whatwg.org/
#
# Identifier: 73e8e7642c6fa9de29d42819b47fba55b58666fb1e339faeb4a89a0bd7c24d43
# Date: 2018-01-06
0 0x0410 А (CYRILLIC CAPITAL LETTER A)
1 0x0411 Б (CYRILLIC CAPITAL LETTER BE)
2 0x0412 В (CYRILLIC CAPITAL LETTER VE)
3 0x0413 Г (CYRILLIC CAPITAL LETTER GHE)
4 0x0414 Д (CYRILLIC CAPITAL LETTER DE)
5 0x0415 Е (CYRILLIC CAPITAL LETTER IE)
6 0x0416 Ж (CYRILLIC CAPITAL LETTER ZHE)
7 0x0417 З (CYRILLIC CAPITAL LETTER ZE)
8 0x0418 И (CYRILLIC CAPITAL LETTER I)
9 0x0419 Й (CYRILLIC CAPITAL LETTER SHORT I)
10 0x041A К (CYRILLIC CAPITAL LETTER KA)
11 0x041B Л (CYRILLIC CAPITAL LETTER EL)
12 0x041C М (CYRILLIC CAPITAL LETTER EM)
13 0x041D Н (CYRILLIC CAPITAL LETTER EN)
14 0x041E О (CYRILLIC CAPITAL LETTER O)
15 0x041F П (CYRILLIC CAPITAL LETTER PE)
16 0x0420 Р (CYRILLIC CAPITAL LETTER ER)
17 0x0421 С (CYRILLIC CAPITAL LETTER ES)
18 0x0422 Т (CYRILLIC CAPITAL LETTER TE)
19 0x0423 У (CYRILLIC CAPITAL LETTER U)
20 0x0424 Ф (CYRILLIC CAPITAL LETTER EF)
21 0x0425 Х (CYRILLIC CAPITAL LETTER HA)
22 0x0426 Ц (CYRILLIC CAPITAL LETTER TSE)
23 0x0427 Ч (CYRILLIC CAPITAL LETTER CHE)
24 0x0428 Ш (CYRILLIC CAPITAL LETTER SHA)
25 0x0429 Щ (CYRILLIC CAPITAL LETTER SHCHA)
26 0x042A Ъ (CYRILLIC CAPITAL LETTER HARD SIGN)
27 0x042B Ы (CYRILLIC CAPITAL LETTER YERU)
28 0x042C Ь (CYRILLIC CAPITAL LETTER SOFT SIGN)
29 0x042D Э (CYRILLIC CAPITAL LETTER E)
30 0x042E Ю (CYRILLIC CAPITAL LETTER YU)
31 0x042F Я (CYRILLIC CAPITAL LETTER YA)
32 0x2020 † (DAGGER)
33 0x00B0 ° (DEGREE SIGN)
34 0x0490 Ґ (CYRILLIC CAPITAL LETTER GHE WITH UPTURN)
35 0x00A3 £ (POUND SIGN)
36 0x00A7 § (SECTION SIGN)
37 0x2022 • (BULLET)
38 0x00B6 ¶ (PILCROW SIGN)
39 0x0406 І (CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I)
40 0x00AE ® (REGISTERED SIGN)
41 0x00A9 © (COPYRIGHT SIGN)
42 0x2122 ™ (TRADE MARK SIGN)
43 0x0402 Ђ (CYRILLIC CAPITAL LETTER DJE)
44 0x0452 ђ (CYRILLIC SMALL LETTER DJE)
45 0x2260 ≠ (NOT EQUAL TO)
46 0x0403 Ѓ (CYRILLIC CAPITAL LETTER GJE)
47 0x0453 ѓ (CYRILLIC SMALL LETTER GJE)
48 0x221E ∞ (INFINITY)
49 0x00B1 ± (PLUS-MINUS SIGN)
50 0x2264 ≤ (LESS-THAN OR EQUAL TO)
51 0x2265 ≥ (GREATER-THAN OR EQUAL TO)
52 0x0456 і (CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I)
53 0x00B5 µ (MICRO SIGN)
54 0x0491 ґ (CYRILLIC SMALL LETTER GHE WITH UPTURN)
55 0x0408 Ј (CYRILLIC CAPITAL LETTER JE)
56 0x0404 Є (CYRILLIC CAPITAL LETTER UKRAINIAN IE)
57 0x0454 є (CYRILLIC SMALL LETTER UKRAINIAN IE)
58 0x0407 Ї (CYRILLIC CAPITAL LETTER YI)
59 0x0457 ї (CYRILLIC SMALL LETTER YI)
60 0x0409 Љ (CYRILLIC CAPITAL LETTER LJE)
61 0x0459 љ (CYRILLIC SMALL LETTER LJE)
62 0x040A Њ (CYRILLIC CAPITAL LETTER NJE)
63 0x045A њ (CYRILLIC SMALL LETTER NJE)
64 0x0458 ј (CYRILLIC SMALL LETTER JE)
65 0x0405 Ѕ (CYRILLIC CAPITAL LETTER DZE)
66 0x00AC ¬ (NOT SIGN)
67 0x221A √ (SQUARE ROOT)
68 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK)
69 0x2248 ≈ (ALMOST EQUAL TO)
70 0x2206 ∆ (INCREMENT)
71 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK)
72 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK)
73 0x2026 … (HORIZONTAL ELLIPSIS)
74 0x00A0   (NO-BREAK SPACE)
75 0x040B Ћ (CYRILLIC CAPITAL LETTER TSHE)
76 0x045B ћ (CYRILLIC SMALL LETTER TSHE)
77 0x040C Ќ (CYRILLIC CAPITAL LETTER KJE)
78 0x045C ќ (CYRILLIC SMALL LETTER KJE)
79 0x0455 ѕ (CYRILLIC SMALL LETTER DZE)
80 0x2013 (EN DASH)
81 0x2014 — (EM DASH)
82 0x201C “ (LEFT DOUBLE QUOTATION MARK)
83 0x201D ” (RIGHT DOUBLE QUOTATION MARK)
84 0x2018 (LEFT SINGLE QUOTATION MARK)
85 0x2019 (RIGHT SINGLE QUOTATION MARK)
86 0x00F7 ÷ (DIVISION SIGN)
87 0x201E „ (DOUBLE LOW-9 QUOTATION MARK)
88 0x040E Ў (CYRILLIC CAPITAL LETTER SHORT U)
89 0x045E ў (CYRILLIC SMALL LETTER SHORT U)
90 0x040F Џ (CYRILLIC CAPITAL LETTER DZHE)
91 0x045F џ (CYRILLIC SMALL LETTER DZHE)
92 0x2116 № (NUMERO SIGN)
93 0x0401 Ё (CYRILLIC CAPITAL LETTER IO)
94 0x0451 ё (CYRILLIC SMALL LETTER IO)
95 0x044F я (CYRILLIC SMALL LETTER YA)
96 0x0430 а (CYRILLIC SMALL LETTER A)
97 0x0431 б (CYRILLIC SMALL LETTER BE)
98 0x0432 в (CYRILLIC SMALL LETTER VE)
99 0x0433 г (CYRILLIC SMALL LETTER GHE)
100 0x0434 д (CYRILLIC SMALL LETTER DE)
101 0x0435 е (CYRILLIC SMALL LETTER IE)
102 0x0436 ж (CYRILLIC SMALL LETTER ZHE)
103 0x0437 з (CYRILLIC SMALL LETTER ZE)
104 0x0438 и (CYRILLIC SMALL LETTER I)
105 0x0439 й (CYRILLIC SMALL LETTER SHORT I)
106 0x043A к (CYRILLIC SMALL LETTER KA)
107 0x043B л (CYRILLIC SMALL LETTER EL)
108 0x043C м (CYRILLIC SMALL LETTER EM)
109 0x043D н (CYRILLIC SMALL LETTER EN)
110 0x043E о (CYRILLIC SMALL LETTER O)
111 0x043F п (CYRILLIC SMALL LETTER PE)
112 0x0440 р (CYRILLIC SMALL LETTER ER)
113 0x0441 с (CYRILLIC SMALL LETTER ES)
114 0x0442 т (CYRILLIC SMALL LETTER TE)
115 0x0443 у (CYRILLIC SMALL LETTER U)
116 0x0444 ф (CYRILLIC SMALL LETTER EF)
117 0x0445 х (CYRILLIC SMALL LETTER HA)
118 0x0446 ц (CYRILLIC SMALL LETTER TSE)
119 0x0447 ч (CYRILLIC SMALL LETTER CHE)
120 0x0448 ш (CYRILLIC SMALL LETTER SHA)
121 0x0449 щ (CYRILLIC SMALL LETTER SHCHA)
122 0x044A ъ (CYRILLIC SMALL LETTER HARD SIGN)
123 0x044B ы (CYRILLIC SMALL LETTER YERU)
124 0x044C ь (CYRILLIC SMALL LETTER SOFT SIGN)
125 0x044D э (CYRILLIC SMALL LETTER E)
126 0x044E ю (CYRILLIC SMALL LETTER YU)
127 0x20AC € (EURO SIGN)

View File

@ -1,212 +0,0 @@
//! Encoding/decoding functions for ISO/IEC 8859-1 (or "latin1"), which
//! conveniently happens to map 1-to-1 to the first 256 unicode scalar values.
//!
//! Because latin1 is a single-byte encoding where all bytes are valid,
//! decoding cannot fail. However, encoding will fail with scalar values
//! greater than 255.
use core;
use {DecodeResult, EncodeError, EncodeResult};
pub fn encode_from_str<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
// Do the encode.
let mut input_i = 0;
let mut output_i = 0;
for (offset, c) in input.char_indices() {
if output_i >= output.len() {
break;
}
if c as u32 > 255 {
return Err(EncodeError {
character: c,
error_range: (offset, offset + c.len_utf8()),
output_bytes_written: output_i,
});
}
output[output_i] = c as u8;
output_i += 1;
input_i = offset + 1;
}
// Calculate how much of the input was consumed.
if input_i > input.len() {
input_i = input.len();
} else {
while !input.is_char_boundary(input_i) {
input_i += 1;
}
}
Ok((input_i, &output[..output_i]))
}
pub fn decode_to_str<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a> {
let mut input_i = 0;
let mut output_i = 0;
for &byte in input.iter() {
if byte <= 127 {
// 1-byte case
if output_i >= output.len() {
break;
}
output[output_i] = byte;
input_i += 1;
output_i += 1;
} else {
// 2-byte case
if (output_i + 1) >= output.len() {
break;
}
output[output_i] = 0b11000000 | (byte >> 6);
output[output_i + 1] = 0b10000000 | (byte & 0b00111111);
input_i += 1;
output_i += 2;
}
}
Ok((input_i, unsafe {
core::str::from_utf8_unchecked(&output[..output_i])
}))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn encode_01() {
let text = "Hello world!";
let mut buf = [0u8; 0];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(encoded, &[]);
}
#[test]
fn encode_02() {
let text = "Hello world!";
let mut buf = [0u8; 1];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 1);
assert_eq!(encoded, "H".as_bytes());
}
#[test]
fn encode_03() {
let text = "Hello world!";
let mut buf = [0u8; 2];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 2);
assert_eq!(encoded, "He".as_bytes());
}
#[test]
fn encode_04() {
let text = "Hello world!";
let mut buf = [0u8; 64];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 12);
assert_eq!(encoded, "Hello world!".as_bytes());
}
#[test]
fn encode_05() {
let text = "Hello world!こ";
let mut buf = [0u8; 12];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 12);
assert_eq!(encoded, "Hello world!".as_bytes());
}
#[test]
fn decode_01() {
let data = "Hello world!".as_bytes();
let mut buf = [0u8; 0];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(decoded, "");
}
#[test]
fn decode_02() {
let data = "Hello world!".as_bytes();
let mut buf = [0u8; 1];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 1);
assert_eq!(decoded, "H");
}
#[test]
fn decode_03() {
let data = "Hello world!".as_bytes();
let mut buf = [0u8; 2];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 2);
assert_eq!(decoded, "He");
}
#[test]
fn decode_04() {
let data = "Hello world!".as_bytes();
let mut buf = [0u8; 64];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 12);
assert_eq!(decoded, "Hello world!");
}
#[test]
fn encode_error_01() {
let text = "こello world!";
let mut buf = [0u8; 64];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '',
error_range: (0, 3),
output_bytes_written: 0,
})
);
}
#[test]
fn encode_error_02() {
let text = "Hこllo world!";
let mut buf = [0u8; 64];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '',
error_range: (1, 4),
output_bytes_written: 1,
})
);
}
#[test]
fn encode_error_03() {
let text = "Heこlo world!";
let mut buf = [0u8; 64];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '',
error_range: (2, 5),
output_bytes_written: 2,
})
);
}
#[test]
fn encode_error_04() {
let text = "Heこlo world!";
let mut buf = [0u8; 3];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '',
error_range: (2, 5),
output_bytes_written: 2,
})
);
}
}

View File

@ -1,122 +0,0 @@
#![no_std]
//! A library for incrementally encoding/decoding between utf8 and various
//! text encodings.
mod latin1;
mod single_byte;
mod utf16_be;
mod utf16_le;
mod utf32_be;
mod utf32_le;
mod utf8;
mod utils;
use single_byte::{ibm866, iso_8859_2, iso_8859_7, windows1252};
/// Encodes text from utf8 to a destination encoding.
pub fn encode_from_str<'a>(
output_encoding: Encoding,
input: &str,
output: &'a mut [u8],
) -> EncodeResult<'a> {
match output_encoding {
Encoding::Utf8 => utf8::encode_from_str(input, output),
Encoding::Utf16BE => utf16_be::encode_from_str(input, output),
Encoding::Utf16LE => utf16_le::encode_from_str(input, output),
Encoding::Utf32BE => utf32_be::encode_from_str(input, output),
Encoding::Utf32LE => utf32_le::encode_from_str(input, output),
Encoding::IBM866 => ibm866::encode_from_str(input, output),
Encoding::Latin1 => latin1::encode_from_str(input, output),
Encoding::ISO8859_2 => iso_8859_2::encode_from_str(input, output),
Encoding::ISO8859_7 => iso_8859_7::encode_from_str(input, output),
Encoding::Windows1252 => windows1252::encode_from_str(input, output),
}
}
/// Decodes text from a source encoding to utf8.
pub fn decode_to_str<'a>(
input_encoding: Encoding,
input: &[u8],
output: &'a mut [u8],
) -> DecodeResult<'a> {
match input_encoding {
Encoding::Utf8 => utf8::decode_to_str(input, output),
Encoding::Utf16BE => utf16_be::decode_to_str(input, output),
Encoding::Utf16LE => utf16_le::decode_to_str(input, output),
Encoding::Utf32BE => utf32_be::decode_to_str(input, output),
Encoding::Utf32LE => utf32_le::decode_to_str(input, output),
Encoding::IBM866 => ibm866::decode_to_str(input, output),
Encoding::Latin1 => latin1::decode_to_str(input, output),
Encoding::ISO8859_2 => iso_8859_2::decode_to_str(input, output),
Encoding::ISO8859_7 => iso_8859_7::decode_to_str(input, output),
Encoding::Windows1252 => windows1252::decode_to_str(input, output),
}
}
/// Describes a text encoding.
#[derive(Debug, Copy, Clone)]
pub enum Encoding {
Utf8,
Utf16BE, // Big endian
Utf16LE, // Little endian
Utf32BE, // Big endian
Utf32LE, // Little endian
// ShiftJIS,
// EUC_JP,
// Big5,
IBM866, // IBM 866
Latin1, // ISO/IEC 8859-1
ISO8859_2, // ISO/IEC 8859-2
ISO8859_7, // ISO/IEC 8859-7
Windows1252, // Windows code page 1252
}
/// Result type for encoding text from utf8 to a target encoding.
///
/// The Ok() variant provides the number of bytes consumed and a reference
/// to the valid encoded text data.
pub type EncodeResult<'a> = Result<(usize, &'a [u8]), EncodeError>;
/// Result type for decoding text from a target encoding to utf8.
///
/// The Ok() variant provides the number of bytes consumed and a reference
/// to the valid decoded text.
pub type DecodeResult<'a> = Result<(usize, &'a str), DecodeError>;
/// Represents an error when encoding from utf8 to some other format.
///
/// Since valid input utf8 is statically assumed, the only possible
/// error is encountering a char that is not representable in the target
/// encoding.
///
/// The problematic character, the byte index range of that character in the
/// input utf8, and the number of bytes already written to the output buffer
/// are provided.
///
/// It is guaranteed that all input leading up to the problem character has
/// already been encoded and written to the output buffer.
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct EncodeError {
pub character: char,
pub error_range: (usize, usize),
pub output_bytes_written: usize,
}
/// Represents an error when decoding to utf8 from some other format.
///
/// All supported text encodings can be fully represented in utf8, and
/// therefore the only possible error is that we encounter bytes in the
/// input data that are invalid for the text encoding we're attempting
/// to decode from.
///
/// The byte index range of the invalid input data and the number of bytes
/// already encoded and written to the output buffer are provided.
///
/// It is guaranteed that all input leading up to the invalid data has
/// already been encoded and written to the output buffer.
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct DecodeError {
pub error_range: (usize, usize),
pub output_bytes_written: usize,
}

View File

@ -1,475 +0,0 @@
//! Single byte encodings that extend ascii. Their code is auto-generated
//! by build.rs
use core;
use {DecodeError, DecodeResult, EncodeError, EncodeResult};
pub mod ibm866 {
// Generated by build.rs
include!(concat!(env!("OUT_DIR"), "/ibm866.rs"));
}
pub mod iso_8859_2 {
// Generated by build.rs
include!(concat!(env!("OUT_DIR"), "/iso-8859-2.rs"));
}
// pub mod iso_8859_3 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/iso-8859-3.rs"));
// }
// pub mod iso_8859_4 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/iso-8859-4.rs"));
// }
// pub mod iso_8859_5 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/iso-8859-5.rs"));
// }
// pub mod iso_8859_6 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/iso-8859-6.rs"));
// }
pub mod iso_8859_7 {
// Generated by build.rs
include!(concat!(env!("OUT_DIR"), "/iso-8859-7.rs"));
}
// pub mod iso_8859_8 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/iso-8859-8.rs"));
// }
// pub mod iso_8859_10 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/iso-8859-10.rs"));
// }
// pub mod iso_8859_13 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/iso-8859-13.rs"));
// }
// pub mod iso_8859_14 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/iso-8859-14.rs"));
// }
// pub mod iso_8859_15 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/iso-8859-15.rs"));
// }
// pub mod iso_8859_16 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/iso-8859-16.rs"));
// }
// pub mod koi8_r {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/koi8-r.rs"));
// }
// pub mod koi8_u {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/koi8-u.rs"));
// }
// pub mod macintosh {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/macintosh.rs"));
// }
// pub mod windows874 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/windows-874.rs"));
// }
// pub mod windows1250 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/windows-1250.rs"));
// }
// pub mod windows1251 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/windows-1251.rs"));
// }
pub mod windows1252 {
// Generated by build.rs
include!(concat!(env!("OUT_DIR"), "/windows-1252.rs"));
}
// pub mod windows1253 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/windows-1253.rs"));
// }
// pub mod windows1254 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/windows-1254.rs"));
// }
// pub mod windows1255 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/windows-1255.rs"));
// }
// pub mod windows1256 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/windows-1256.rs"));
// }
// pub mod windows1257 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/windows-1257.rs"));
// }
// pub mod windows1258 {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/windows-1258.rs"));
// }
// pub mod x_mac_cyrillic {
// // Generated by build.rs
// include!(concat!(env!("OUT_DIR"), "/x-mac-cyrillic.rs"));
// }
/// This is shared among the single byte encoders, and is shallowly
/// wrapped in each of their modules.
#[inline]
fn single_byte_encode_from_str<'a>(
table: &[(char, u8)],
input: &str,
output: &'a mut [u8],
) -> EncodeResult<'a> {
// Do the encode.
let mut input_i = 0;
let mut output_i = 0;
for (offset, c) in input.char_indices() {
if output_i >= output.len() {
break;
}
if c as u32 <= 127 {
output[output_i] = c as u8;
output_i += 1;
input_i = offset + 1;
} else {
if let Ok(i) = table.binary_search_by_key(&c, |x| x.0) {
output[output_i] = table[i].1;
output_i += 1;
input_i = offset + 1;
} else {
return Err(EncodeError {
character: c,
error_range: (offset, offset + c.len_utf8()),
output_bytes_written: output_i,
});
}
}
}
// Calculate how much of the input was consumed.
if input_i > input.len() {
input_i = input.len();
} else {
while !input.is_char_boundary(input_i) {
input_i += 1;
}
}
Ok((input_i, &output[..output_i]))
}
/// This is shared among the single byte decoders, and is shallowly
/// wrapped in each of their modules.
#[inline]
fn single_byte_decode_to_str<'a>(
table: &[char; 128],
input: &[u8],
output: &'a mut [u8],
) -> DecodeResult<'a> {
let mut input_i = 0;
let mut output_i = 0;
for &byte in input.iter() {
if byte < 0x80 {
// 1-byte case
if output_i >= output.len() {
break;
}
output[output_i] = byte;
input_i += 1;
output_i += 1;
} else {
// Use lookup table.
let code = table[byte as usize - 0x80];
if code == '<27>' {
// Error: undefined byte.
return Err(DecodeError {
error_range: (input_i, input_i + 1),
output_bytes_written: output_i,
});
}
// Encode to utf8
let mut buf = [0u8; 4];
let s = code.encode_utf8(&mut buf);
if (output_i + s.len()) > output.len() {
break;
}
output[output_i..(output_i + s.len())].copy_from_slice(s.as_bytes());
input_i += 1;
output_i += s.len();
}
}
Ok((input_i, unsafe {
core::str::from_utf8_unchecked(&output[..output_i])
}))
}
//===========================================================================
// Testing is done with iso-8859-7, since it has a few undefined characters,
// allowing us to test handling of those.
#[cfg(test)]
mod tests {
use super::iso_8859_7::*;
use {DecodeError, EncodeError};
#[test]
fn encode_01() {
let text = "Hello world!";
let mut buf = [0u8; 0];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(encoded, &[]);
}
#[test]
fn encode_02() {
let text = "Hello world!";
let mut buf = [0u8; 1];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 1);
assert_eq!(encoded, "H".as_bytes());
}
#[test]
fn encode_03() {
let text = "Hello world!";
let mut buf = [0u8; 2];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 2);
assert_eq!(encoded, "He".as_bytes());
}
#[test]
fn encode_04() {
let text = "Hello world!";
let mut buf = [0u8; 64];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 12);
assert_eq!(encoded, "Hello world!".as_bytes());
}
#[test]
fn encode_05() {
let text = "Hello world!こ";
let mut buf = [0u8; 12];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 12);
assert_eq!(encoded, "Hello world!".as_bytes());
}
#[test]
fn decode_01() {
let data = [
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!"
let mut buf = [0u8; 0];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(decoded, "");
}
#[test]
fn decode_02() {
let data = [
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!"
let mut buf = [0u8; 1];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 1);
assert_eq!(decoded, "H");
}
#[test]
fn decode_03() {
let data = [
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!"
let mut buf = [0u8; 2];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 2);
assert_eq!(decoded, "He");
}
#[test]
fn decode_04() {
let data = [
0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!"
let mut buf = [0u8; 64];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 12);
assert_eq!(decoded, "Hello world!");
}
#[test]
fn decode_05() {
let data = [
0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE,
0xCF, 0xD0, 0xD1, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9,
]; // "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ"
let mut buf = [0u8; 128];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 24);
assert_eq!(decoded, "ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ");
}
#[test]
fn encode_error_01() {
let text = "こello world!";
let mut buf = [0u8; 64];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '',
error_range: (0, 3),
output_bytes_written: 0,
})
);
}
#[test]
fn encode_error_02() {
let text = "\u{00C0}ello world!";
let mut buf = [0u8; 64];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '\u{00C0}',
error_range: (0, 2),
output_bytes_written: 0,
})
);
}
#[test]
fn encode_error_03() {
let text = "Hこllo world!";
let mut buf = [0u8; 64];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '',
error_range: (1, 4),
output_bytes_written: 1,
})
);
}
#[test]
fn encode_error_04() {
let text = "H\u{00C0}llo world!";
let mut buf = [0u8; 64];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '\u{00C0}',
error_range: (1, 3),
output_bytes_written: 1,
})
);
}
#[test]
fn encode_error_05() {
let text = "Heこlo world!";
let mut buf = [0u8; 3];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '',
error_range: (2, 5),
output_bytes_written: 2,
})
);
}
#[test]
fn encode_error_06() {
let text = "He\u{00C0}lo world!";
let mut buf = [0u8; 3];
assert_eq!(
encode_from_str(text, &mut buf),
Err(EncodeError {
character: '\u{00C0}',
error_range: (2, 4),
output_bytes_written: 2,
})
);
}
#[test]
fn decode_error_01() {
let data = [
0x48, 0xAE, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!" with an error on the second byte (undefined byte).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (1, 2),
output_bytes_written: 1,
})
);
}
#[test]
fn decode_error_02() {
let data = [
0x48, 0xD2, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!" with an error on the second byte (undefined byte).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (1, 2),
output_bytes_written: 1,
})
);
}
#[test]
fn decode_error_03() {
let data = [
0x48, 0xFF, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21,
]; // "Hello world!" with an error on the second byte (undefined byte).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (1, 2),
output_bytes_written: 1,
})
);
}
}

View File

@ -1,362 +0,0 @@
//! Encoding/decoding functions for big-endian UTF-16.
//!
//! Because both utf8 and utf16 can represent the entirety of unicode, the
//! only possible error is when invalid utf16 is encountered when decoding
//! to utf8.
use core;
use utils::{from_big_endian_u16, to_big_endian_u16};
use {DecodeError, DecodeResult, EncodeResult};
pub fn encode_from_str<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
// Do the encode.
let mut input_i = 0;
let mut output_i = 0;
for (offset, c) in input.char_indices() {
let mut code = c as u32;
if code <= 0xFFFF {
// One code unit
if (output_i + 1) < output.len() {
let val = to_big_endian_u16(code as u16);
output[output_i] = val[0];
output[output_i + 1] = val[1];
output_i += 2;
input_i = offset + 1;
} else {
break;
}
} else if (output_i + 3) < output.len() {
// Two code units
code -= 0x10000;
let first = to_big_endian_u16(0xD800 | ((code >> 10) as u16));
let second = to_big_endian_u16(0xDC00 | ((code as u16) & 0x3FF));
output[output_i] = first[0];
output[output_i + 1] = first[1];
output[output_i + 2] = second[0];
output[output_i + 3] = second[1];
output_i += 4;
input_i = offset + 1;
} else {
break;
}
}
// Calculate how much of the input was consumed.
if input_i > input.len() {
input_i = input.len();
} else {
while !input.is_char_boundary(input_i) {
input_i += 1;
}
}
Ok((input_i, &output[..output_i]))
}
pub fn decode_to_str<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a> {
let mut input_i = 0;
let mut output_i = 0;
// Loop through the input, getting 2 bytes at a time.
let mut itr = input.chunks(2);
while let Some(bytes) = itr.next() {
if bytes.len() < 2 {
break;
}
// Decode to scalar value.
let code = {
let code_1 = from_big_endian_u16([bytes[0], bytes[1]]);
if code_1 < 0xD800 || code_1 > 0xDFFF {
// Single code unit.
unsafe { core::char::from_u32_unchecked(code_1 as u32) }
} else if (code_1 & 0xFC00) == 0xDC00 {
// Error: orphaned second half of a surrogate pair.
return Err(DecodeError {
error_range: (input_i, input_i + 2),
output_bytes_written: output_i,
});
} else {
// Two code units.
// Get the second code unit, if possible.
if (input_i + 3) >= input.len() {
break;
}
let bytes_2 = itr.next().unwrap();
let code_2 = from_big_endian_u16([bytes_2[0], bytes_2[1]]);
if (code_2 & 0xFC00) != 0xDC00 {
// Error: second half is not valid surrogate.
return Err(DecodeError {
error_range: (input_i, input_i + 2),
output_bytes_written: output_i,
});
}
unsafe {
core::char::from_u32_unchecked(
(((code_1 as u32 - 0xD800) << 10) | (code_2 as u32 - 0xDC00)) + 0x10000,
)
}
}
};
// Encode to utf8.
let mut buf = [0u8; 4];
let s = code.encode_utf8(&mut buf);
if (output_i + s.len()) > output.len() {
break;
}
output[output_i..(output_i + s.len())].copy_from_slice(s.as_bytes());
// Update our counters.
input_i += code.len_utf16() * 2;
output_i += s.len();
}
Ok((input_i, unsafe {
core::str::from_utf8_unchecked(&output[..output_i])
}))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn encode_01() {
let text = "こんにちは!";
let mut buf = [0u8; 1];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(encoded, &[]);
}
#[test]
fn encode_02() {
let text = "こんにちは!";
let mut buf = [0u8; 2];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 3);
assert_eq!(encoded, &[0x30, 0x53]);
}
#[test]
fn encode_03() {
let text = "こんにちは!";
let mut buf = [0u8; 3];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 3);
assert_eq!(encoded, &[0x30, 0x53]);
}
#[test]
fn encode_04() {
let text = "😺😼";
let mut buf = [0u8; 3];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(encoded, &[]);
}
#[test]
fn encode_05() {
let text = "😺😼";
let mut buf = [0u8; 4];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(encoded, &[0xD8, 0x3D, 0xDE, 0x3A]);
}
#[test]
fn encode_06() {
let text = "😺😼";
let mut buf = [0u8; 7];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(encoded, &[0xD8, 0x3D, 0xDE, 0x3A]);
}
#[test]
fn decode_01() {
let data = [
0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
]; // "こんにちは!"
let mut buf = [0u8; 2];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(decoded, "");
}
#[test]
fn decode_02() {
let data = [
0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
]; // "こんにちは!"
let mut buf = [0u8; 3];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 2);
assert_eq!(decoded, "");
}
#[test]
fn decode_03() {
let data = [
0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
]; // "こんにちは!"
let mut buf = [0u8; 5];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 2);
assert_eq!(decoded, "");
}
#[test]
fn decode_04() {
let data = [0xD8, 0x3D, 0xDE, 0x3A, 0xD8, 0x3D, 0xDE, 0x3C]; // "😺😼"
let mut buf = [0u8; 3];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(decoded, "");
}
#[test]
fn decode_05() {
let data = [0xD8, 0x3D, 0xDE, 0x3A, 0xD8, 0x3D, 0xDE, 0x3C]; // "😺😼"
let mut buf = [0u8; 4];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_06() {
let data = [0xD8, 0x3D, 0xDE, 0x3A, 0xD8, 0x3D, 0xDE, 0x3C]; // "😺😼"
let mut buf = [0u8; 7];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_07() {
let data = [0xD8, 0x3D, 0xDE, 0x3A, 0xD8, 0x3D]; // "😺😼" with last codepoint chopped off.
let mut buf = [0u8; 64];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_08() {
let data = [0xD8, 0x3D, 0xDE, 0x3A, 0xD8, 0x3D, 0xDE]; // "😺😼" with last byte chopped off.
let mut buf = [0u8; 64];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_09() {
let data = [0xD8, 0x3D, 0xDE, 0x3A, 0xD8]; // "😺😼" with last 3 bytes chopped off.
let mut buf = [0u8; 64];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_error_01() {
let data = [
0xDE, 0x3A, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
]; // "こんにちは!" with an error on the first char (end surrogate)
let mut buf = [0u8; 2];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (0, 2),
output_bytes_written: 0,
})
);
}
#[test]
fn decode_error_02() {
let data = [
0x30, 0x53, 0xDE, 0x3A, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
]; // "こんにちは!" with an error on the second char (end surrogate)
let mut buf = [0u8; 3];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (2, 4),
output_bytes_written: 3,
})
);
}
#[test]
fn decode_error_03() {
let data = [
0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0xDE, 0x3A, 0x30, 0x6F, 0xFF, 0x01,
]; // "こんにちは!" with an error on the fourth char (end surrogate)
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (6, 8),
output_bytes_written: 9,
})
);
}
#[test]
fn decode_error_04() {
let data = [
0xD8, 0x3D, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
]; // "こんにちは!" with an error on the first char (start surrogate)
let mut buf = [0u8; 2];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (0, 2),
output_bytes_written: 0,
})
);
}
#[test]
fn decode_error_05() {
let data = [
0x30, 0x53, 0xD8, 0x3D, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0xFF, 0x01,
]; // "こんにちは!" with an error on the second char (start surrogate)
let mut buf = [0u8; 3];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (2, 4),
output_bytes_written: 3,
})
);
}
#[test]
fn decode_error_06() {
let data = [
0x30, 0x53, 0x30, 0x93, 0x30, 0x6B, 0xD8, 0x3D, 0x30, 0x6F, 0xFF, 0x01,
]; // "こんにちは!" with an error on the fourth char (start surrogate)
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (6, 8),
output_bytes_written: 9,
})
);
}
}

View File

@ -1,362 +0,0 @@
//! Encoding/decoding functions for little-endian UTF-16.
//!
//! Because both utf8 and utf16 can represent the entirety of unicode, the
//! only possible error is when invalid utf16 is encountered when decoding
//! to utf8.
use core;
use utils::{from_little_endian_u16, to_little_endian_u16};
use {DecodeError, DecodeResult, EncodeResult};
pub fn encode_from_str<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
// Do the encode.
let mut input_i = 0;
let mut output_i = 0;
for (offset, c) in input.char_indices() {
let mut code = c as u32;
if code <= 0xFFFF {
// One code unit
if (output_i + 1) < output.len() {
let val = to_little_endian_u16(code as u16);
output[output_i] = val[0];
output[output_i + 1] = val[1];
output_i += 2;
input_i = offset + 1;
} else {
break;
}
} else if (output_i + 3) < output.len() {
// Two code units
code -= 0x10000;
let first = to_little_endian_u16(0xD800 | ((code >> 10) as u16));
let second = to_little_endian_u16(0xDC00 | ((code as u16) & 0x3FF));
output[output_i] = first[0];
output[output_i + 1] = first[1];
output[output_i + 2] = second[0];
output[output_i + 3] = second[1];
output_i += 4;
input_i = offset + 1;
} else {
break;
}
}
// Calculate how much of the input was consumed.
if input_i > input.len() {
input_i = input.len();
} else {
while !input.is_char_boundary(input_i) {
input_i += 1;
}
}
Ok((input_i, &output[..output_i]))
}
pub fn decode_to_str<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a> {
let mut input_i = 0;
let mut output_i = 0;
// Loop through the input, getting 2 bytes at a time.
let mut itr = input.chunks(2);
while let Some(bytes) = itr.next() {
if bytes.len() < 2 {
break;
}
// Decode to scalar value.
let code = {
let code_1 = from_little_endian_u16([bytes[0], bytes[1]]);
if code_1 < 0xD800 || code_1 > 0xDFFF {
// Single code unit.
unsafe { core::char::from_u32_unchecked(code_1 as u32) }
} else if (code_1 & 0xFC00) == 0xDC00 {
// Error: orphaned second half of a surrogate pair.
return Err(DecodeError {
error_range: (input_i, input_i + 2),
output_bytes_written: output_i,
});
} else {
// Two code units.
// Get the second code unit, if possible.
if (input_i + 3) >= input.len() {
break;
}
let bytes_2 = itr.next().unwrap();
let code_2 = from_little_endian_u16([bytes_2[0], bytes_2[1]]);
if (code_2 & 0xFC00) != 0xDC00 {
// Error: second half is not valid surrogate.
return Err(DecodeError {
error_range: (input_i, input_i + 2),
output_bytes_written: output_i,
});
}
unsafe {
core::char::from_u32_unchecked(
(((code_1 as u32 - 0xD800) << 10) | (code_2 as u32 - 0xDC00)) + 0x10000,
)
}
}
};
// Encode to utf8.
let mut buf = [0u8; 4];
let s = code.encode_utf8(&mut buf);
if (output_i + s.len()) > output.len() {
break;
}
output[output_i..(output_i + s.len())].copy_from_slice(s.as_bytes());
// Update our counters.
input_i += code.len_utf16() * 2;
output_i += s.len();
}
Ok((input_i, unsafe {
core::str::from_utf8_unchecked(&output[..output_i])
}))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn encode_01() {
let text = "こんにちは!";
let mut buf = [0u8; 1];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(encoded, &[]);
}
#[test]
fn encode_02() {
let text = "こんにちは!";
let mut buf = [0u8; 2];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 3);
assert_eq!(encoded, &[0x53, 0x30]);
}
#[test]
fn encode_03() {
let text = "こんにちは!";
let mut buf = [0u8; 3];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 3);
assert_eq!(encoded, &[0x53, 0x30]);
}
#[test]
fn encode_04() {
let text = "😺😼";
let mut buf = [0u8; 3];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(encoded, &[]);
}
#[test]
fn encode_05() {
let text = "😺😼";
let mut buf = [0u8; 4];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(encoded, &[0x3D, 0xD8, 0x3A, 0xDE]);
}
#[test]
fn encode_06() {
let text = "😺😼";
let mut buf = [0u8; 7];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(encoded, &[0x3D, 0xD8, 0x3A, 0xDE]);
}
#[test]
fn decode_01() {
let data = [
0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
]; // "こんにちは!"
let mut buf = [0u8; 2];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(decoded, "");
}
#[test]
fn decode_02() {
let data = [
0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
]; // "こんにちは!"
let mut buf = [0u8; 3];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 2);
assert_eq!(decoded, "");
}
#[test]
fn decode_03() {
let data = [
0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
]; // "こんにちは!"
let mut buf = [0u8; 5];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 2);
assert_eq!(decoded, "");
}
#[test]
fn decode_04() {
let data = [0x3D, 0xD8, 0x3A, 0xDE, 0x3D, 0xD8, 0x3C, 0xDE]; // "😺😼"
let mut buf = [0u8; 3];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(decoded, "");
}
#[test]
fn decode_05() {
let data = [0x3D, 0xD8, 0x3A, 0xDE, 0x3D, 0xD8, 0x3C, 0xDE]; // "😺😼"
let mut buf = [0u8; 4];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_06() {
let data = [0x3D, 0xD8, 0x3A, 0xDE, 0x3D, 0xD8, 0x3C, 0xDE]; // "😺😼"
let mut buf = [0u8; 7];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_07() {
let data = [0x3D, 0xD8, 0x3A, 0xDE, 0x3D, 0xD8]; // "😺😼" with last codepoint chopped off.
let mut buf = [0u8; 64];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_08() {
let data = [0x3D, 0xD8, 0x3A, 0xDE, 0x3D, 0xD8, 0x3C]; // "😺😼" with last byte chopped off.
let mut buf = [0u8; 64];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_09() {
let data = [0x3D, 0xD8, 0x3A, 0xDE, 0x3D]; // "😺😼" with last 3 bytes chopped off.
let mut buf = [0u8; 64];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_error_01() {
let data = [
0x3A, 0xDE, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
]; // "こんにちは!" with an error on the first char (end surrogate)
let mut buf = [0u8; 2];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (0, 2),
output_bytes_written: 0,
})
);
}
#[test]
fn decode_error_02() {
let data = [
0x53, 0x30, 0x3A, 0xDE, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
]; // "こんにちは!" with an error on the second char (end surrogate)
let mut buf = [0u8; 3];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (2, 4),
output_bytes_written: 3,
})
);
}
#[test]
fn decode_error_03() {
let data = [
0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x3A, 0xDE, 0x6F, 0x30, 0x01, 0xFF,
]; // "こんにちは!" with an error on the fourth char (end surrogate)
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (6, 8),
output_bytes_written: 9,
})
);
}
#[test]
fn decode_error_04() {
let data = [
0x3D, 0xD8, 0x93, 0x30, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
]; // "こんにちは!" with an error on the first char (start surrogate)
let mut buf = [0u8; 2];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (0, 2),
output_bytes_written: 0,
})
);
}
#[test]
fn decode_error_05() {
let data = [
0x53, 0x30, 0x3D, 0xD8, 0x6B, 0x30, 0x61, 0x30, 0x6F, 0x30, 0x01, 0xFF,
]; // "こんにちは!" with an error on the second char (start surrogate)
let mut buf = [0u8; 3];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (2, 4),
output_bytes_written: 3,
})
);
}
#[test]
fn decode_error_06() {
let data = [
0x53, 0x30, 0x93, 0x30, 0x6B, 0x30, 0x3D, 0xD8, 0x6F, 0x30, 0x01, 0xFF,
]; // "こんにちは!" with an error on the fourth char (start surrogate)
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (6, 8),
output_bytes_written: 9,
})
);
}
}

View File

@ -1,327 +0,0 @@
//! Encoding/decoding functions for big-endian UTF-32.
//!
//! Because both utf8 and utf32 can represent the entirety of unicode, the
//! only possible error is when invalid utf32 is encountered when decoding
//! to utf8.
use core;
use utils::{from_big_endian_u32, to_big_endian_u32};
use {DecodeError, DecodeResult, EncodeResult};
pub fn encode_from_str<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
// Do the encode.
let mut input_i = 0;
let mut output_i = 0;
for (offset, c) in input.char_indices() {
if (output_i + 3) < output.len() {
let mut code = to_big_endian_u32(c as u32);
output[output_i] = code[0];
output[output_i + 1] = code[1];
output[output_i + 2] = code[2];
output[output_i + 3] = code[3];
output_i += 4;
input_i = offset + 1;
} else {
break;
}
}
// Calculate how much of the input was consumed.
if input_i > input.len() {
input_i = input.len();
} else {
while !input.is_char_boundary(input_i) {
input_i += 1;
}
}
Ok((input_i, &output[..output_i]))
}
pub fn decode_to_str<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a> {
let mut input_i = 0;
let mut output_i = 0;
// Loop through the input, getting 4 bytes at a time.
let mut itr = input.chunks(4);
while let Some(bytes) = itr.next() {
if bytes.len() < 4 {
break;
}
// Do the decode.
if let Some(code) = core::char::from_u32(from_big_endian_u32([
bytes[0], bytes[1], bytes[2], bytes[3],
])) {
// Encode to utf8.
let mut buf = [0u8; 4];
let s = code.encode_utf8(&mut buf);
if (output_i + s.len()) > output.len() {
break;
}
output[output_i..(output_i + s.len())].copy_from_slice(s.as_bytes());
// Update our counters.
input_i += 4;
output_i += s.len();
} else {
// Error: invalid codepoint.
return Err(DecodeError {
error_range: (input_i, input_i + 4),
output_bytes_written: output_i,
});
}
}
Ok((input_i, unsafe {
core::str::from_utf8_unchecked(&output[..output_i])
}))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn encode_01() {
let text = "こんにちは!";
let mut buf = [0u8; 3];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(encoded, &[]);
}
#[test]
fn encode_02() {
let text = "こんにちは!";
let mut buf = [0u8; 4];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 3);
assert_eq!(encoded, &[0x00, 0x00, 0x30, 0x53]);
}
#[test]
fn encode_03() {
let text = "こんにちは!";
let mut buf = [0u8; 7];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 3);
assert_eq!(encoded, &[0x00, 0x00, 0x30, 0x53]);
}
#[test]
fn encode_04() {
let text = "😺😼";
let mut buf = [0u8; 3];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(encoded, &[]);
}
#[test]
fn encode_05() {
let text = "😺😼";
let mut buf = [0u8; 4];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(encoded, &[0x00, 0x01, 0xF6, 0x3A]);
}
#[test]
fn encode_06() {
let text = "😺😼";
let mut buf = [0u8; 7];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(encoded, &[0x00, 0x01, 0xF6, 0x3A]);
}
#[test]
fn decode_01() {
let data = [
0x00, 0x00, 0x30, 0x53, 0x00, 0x00, 0x30, 0x93, 0x00, 0x00, 0x30, 0x6B, 0x00, 0x00,
0x30, 0x61, 0x00, 0x00, 0x30, 0x6F, 0x00, 0x00, 0xFF, 0x01,
]; // "こんにちは!"
let mut buf = [0u8; 2];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(decoded, "");
}
#[test]
fn decode_02() {
let data = [
0x00, 0x00, 0x30, 0x53, 0x00, 0x00, 0x30, 0x93, 0x00, 0x00, 0x30, 0x6B, 0x00, 0x00,
0x30, 0x61, 0x00, 0x00, 0x30, 0x6F, 0x00, 0x00, 0xFF, 0x01,
]; // "こんにちは!"
let mut buf = [0u8; 3];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "");
}
#[test]
fn decode_03() {
let data = [
0x00, 0x00, 0x30, 0x53, 0x00, 0x00, 0x30, 0x93, 0x00, 0x00, 0x30, 0x6B, 0x00, 0x00,
0x30, 0x61, 0x00, 0x00, 0x30, 0x6F, 0x00, 0x00, 0xFF, 0x01,
]; // "こんにちは!"
let mut buf = [0u8; 5];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "");
}
#[test]
fn decode_04() {
let data = [0x00, 0x01, 0xF6, 0x3A, 0x00, 0x01, 0xF6, 0x3C]; // "😺😼"
let mut buf = [0u8; 3];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(decoded, "");
}
#[test]
fn decode_05() {
let data = [0x00, 0x01, 0xF6, 0x3A, 0x00, 0x01, 0xF6, 0x3C]; // "😺😼"
let mut buf = [0u8; 4];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_06() {
let data = [0x00, 0x01, 0xF6, 0x3A, 0x00, 0x01, 0xF6, 0x3C]; // "😺😼"
let mut buf = [0u8; 7];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_07() {
let data = [0x00, 0x01, 0xF6, 0x3A, 0x00, 0x01, 0xF6]; // "😺😼" with last byte chopped off.
let mut buf = [0u8; 64];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_08() {
let data = [0x00, 0x01, 0xF6, 0x3A, 0x00, 0x01]; // "😺😼" with last 2 bytes chopped off.
let mut buf = [0u8; 64];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_09() {
let data = [0x00, 0x01, 0xF6, 0x3A, 0x00]; // "😺😼" with last 3 bytes chopped off.
let mut buf = [0u8; 64];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_error_01() {
let data = [
0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x30, 0x93, 0x00, 0x00, 0x30, 0x6B, 0x00, 0x00,
0x30, 0x61, 0x00, 0x00, 0x30, 0x6F, 0x00, 0x00, 0xFF, 0x01,
]; // "こんにちは!" with an error on the first char (value out of range)
let mut buf = [0u8; 2];
assert_eq!(
decode_to_str(&data, &mut buf),
Err(DecodeError {
error_range: (0, 4),
output_bytes_written: 0,
})
);
}
#[test]
fn decode_error_02() {
let data = [
0x00, 0x00, 0xD8, 0x00, 0x00, 0x00, 0x30, 0x93, 0x00, 0x00, 0x30, 0x6B, 0x00, 0x00,
0x30, 0x61, 0x00, 0x00, 0x30, 0x6F, 0x00, 0x00, 0xFF, 0x01,
]; // "こんにちは!" with an error on the first char (value in surrogate range)
let mut buf = [0u8; 2];
assert_eq!(
decode_to_str(&data, &mut buf),
Err(DecodeError {
error_range: (0, 4),
output_bytes_written: 0,
})
);
}
#[test]
fn decode_error_03() {
let data = [
0x00, 0x00, 0xDF, 0xFF, 0x00, 0x00, 0x30, 0x93, 0x00, 0x00, 0x30, 0x6B, 0x00, 0x00,
0x30, 0x61, 0x00, 0x00, 0x30, 0x6F, 0x00, 0x00, 0xFF, 0x01,
]; // "こんにちは!" with an error on the first char (value in surrogate range)
let mut buf = [0u8; 64];
assert_eq!(
decode_to_str(&data, &mut buf),
Err(DecodeError {
error_range: (0, 4),
output_bytes_written: 0,
})
);
}
#[test]
fn decode_error_04() {
let data = [
0x00, 0x00, 0x30, 0x53, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x30, 0x6B, 0x00, 0x00,
0x30, 0x61, 0x00, 0x00, 0x30, 0x6F, 0x00, 0x00, 0xFF, 0x01,
]; // "こんにちは!" with an error on the second char (value out of range)
let mut buf = [0u8; 64];
assert_eq!(
decode_to_str(&data, &mut buf),
Err(DecodeError {
error_range: (4, 8),
output_bytes_written: 3,
})
);
assert_eq!(&buf[..3], &[0xE3, 0x81, 0x93]);
}
#[test]
fn decode_error_05() {
let data = [
0x00, 0x00, 0x30, 0x53, 0x00, 0x00, 0xD8, 0x00, 0x00, 0x00, 0x30, 0x6B, 0x00, 0x00,
0x30, 0x61, 0x00, 0x00, 0x30, 0x6F, 0x00, 0x00, 0xFF, 0x01,
]; // "こんにちは!" with an error on the second char (value in surrogate range)
let mut buf = [0u8; 64];
assert_eq!(
decode_to_str(&data, &mut buf),
Err(DecodeError {
error_range: (4, 8),
output_bytes_written: 3,
})
);
assert_eq!(&buf[..3], &[0xE3, 0x81, 0x93]);
}
#[test]
fn decode_error_06() {
let data = [
0x00, 0x00, 0x30, 0x53, 0x00, 0x00, 0xDF, 0xFF, 0x00, 0x00, 0x30, 0x6B, 0x00, 0x00,
0x30, 0x61, 0x00, 0x00, 0x30, 0x6F, 0x00, 0x00, 0xFF, 0x01,
]; // "こんにちは!" with an error on the second char (value in surrogate range)
let mut buf = [0u8; 64];
assert_eq!(
decode_to_str(&data, &mut buf),
Err(DecodeError {
error_range: (4, 8),
output_bytes_written: 3,
})
);
assert_eq!(&buf[..3], &[0xE3, 0x81, 0x93]);
}
}

View File

@ -1,327 +0,0 @@
//! Encoding/decoding functions for big-endian UTF-32.
//!
//! Because both utf8 and utf32 can represent the entirety of unicode, the
//! only possible error is when invalid utf32 is encountered when decoding
//! to utf8.
use core;
use utils::{from_little_endian_u32, to_little_endian_u32};
use {DecodeError, DecodeResult, EncodeResult};
pub fn encode_from_str<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
// Do the encode.
let mut input_i = 0;
let mut output_i = 0;
for (offset, c) in input.char_indices() {
if (output_i + 3) < output.len() {
let mut code = to_little_endian_u32(c as u32);
output[output_i] = code[0];
output[output_i + 1] = code[1];
output[output_i + 2] = code[2];
output[output_i + 3] = code[3];
output_i += 4;
input_i = offset + 1;
} else {
break;
}
}
// Calculate how much of the input was consumed.
if input_i > input.len() {
input_i = input.len();
} else {
while !input.is_char_boundary(input_i) {
input_i += 1;
}
}
Ok((input_i, &output[..output_i]))
}
pub fn decode_to_str<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a> {
let mut input_i = 0;
let mut output_i = 0;
// Loop through the input, getting 4 bytes at a time.
let mut itr = input.chunks(4);
while let Some(bytes) = itr.next() {
if bytes.len() < 4 {
break;
}
// Do the decode.
if let Some(code) = core::char::from_u32(from_little_endian_u32([
bytes[0], bytes[1], bytes[2], bytes[3],
])) {
// Encode to utf8.
let mut buf = [0u8; 4];
let s = code.encode_utf8(&mut buf);
if (output_i + s.len()) > output.len() {
break;
}
output[output_i..(output_i + s.len())].copy_from_slice(s.as_bytes());
// Update our counters.
input_i += 4;
output_i += s.len();
} else {
// Error: invalid codepoint.
return Err(DecodeError {
error_range: (input_i, input_i + 4),
output_bytes_written: output_i,
});
}
}
Ok((input_i, unsafe {
core::str::from_utf8_unchecked(&output[..output_i])
}))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn encode_01() {
let text = "こんにちは!";
let mut buf = [0u8; 3];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(encoded, &[]);
}
#[test]
fn encode_02() {
let text = "こんにちは!";
let mut buf = [0u8; 4];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 3);
assert_eq!(encoded, &[0x53, 0x30, 0x00, 0x00]);
}
#[test]
fn encode_03() {
let text = "こんにちは!";
let mut buf = [0u8; 7];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 3);
assert_eq!(encoded, &[0x53, 0x30, 0x00, 0x00]);
}
#[test]
fn encode_04() {
let text = "😺😼";
let mut buf = [0u8; 3];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(encoded, &[]);
}
#[test]
fn encode_05() {
let text = "😺😼";
let mut buf = [0u8; 4];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(encoded, &[0x3A, 0xF6, 0x01, 0x00]);
}
#[test]
fn encode_06() {
let text = "😺😼";
let mut buf = [0u8; 7];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(encoded, &[0x3A, 0xF6, 0x01, 0x00]);
}
#[test]
fn decode_01() {
let data = [
0x53, 0x30, 0x00, 0x00, 0x93, 0x30, 0x00, 0x00, 0x6B, 0x30, 0x00, 0x00, 0x61, 0x30,
0x00, 0x00, 0x6F, 0x30, 0x00, 0x00, 0x01, 0xFF, 0x00, 0x00,
]; // "こんにちは!"
let mut buf = [0u8; 2];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(decoded, "");
}
#[test]
fn decode_02() {
let data = [
0x53, 0x30, 0x00, 0x00, 0x93, 0x30, 0x00, 0x00, 0x6B, 0x30, 0x00, 0x00, 0x61, 0x30,
0x00, 0x00, 0x6F, 0x30, 0x00, 0x00, 0x01, 0xFF, 0x00, 0x00,
]; // "こんにちは!"
let mut buf = [0u8; 3];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "");
}
#[test]
fn decode_03() {
let data = [
0x53, 0x30, 0x00, 0x00, 0x93, 0x30, 0x00, 0x00, 0x6B, 0x30, 0x00, 0x00, 0x61, 0x30,
0x00, 0x00, 0x6F, 0x30, 0x00, 0x00, 0x01, 0xFF, 0x00, 0x00,
]; // "こんにちは!"
let mut buf = [0u8; 5];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "");
}
#[test]
fn decode_04() {
let data = [0x3A, 0xF6, 0x01, 0x00, 0x3C, 0xF6, 0x01, 0x00]; // "😺😼"
let mut buf = [0u8; 3];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(decoded, "");
}
#[test]
fn decode_05() {
let data = [0x3A, 0xF6, 0x01, 0x00, 0x3C, 0xF6, 0x01, 0x00]; // "😺😼"
let mut buf = [0u8; 4];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_06() {
let data = [0x3A, 0xF6, 0x01, 0x00, 0x3C, 0xF6, 0x01, 0x00]; // "😺😼"
let mut buf = [0u8; 7];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_07() {
let data = [0x3A, 0xF6, 0x01, 0x00, 0x3C, 0xF6, 0x01]; // "😺😼" with last byte chopped off.
let mut buf = [0u8; 64];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_08() {
let data = [0x3A, 0xF6, 0x01, 0x00, 0x3C, 0xF6]; // "😺😼" with last 2 bytes chopped off.
let mut buf = [0u8; 64];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_09() {
let data = [0x3A, 0xF6, 0x01, 0x00, 0x3C]; // "😺😼" with last 3 bytes chopped off.
let mut buf = [0u8; 64];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 4);
assert_eq!(decoded, "😺");
}
#[test]
fn decode_error_01() {
let data = [
0x00, 0x00, 0x11, 0x00, 0x93, 0x30, 0x00, 0x00, 0x6B, 0x30, 0x00, 0x00, 0x61, 0x30,
0x00, 0x00, 0x6F, 0x30, 0x00, 0x00, 0x01, 0xFF, 0x00, 0x00,
]; // "こんにちは!" with an error on the first char (value out of range)
let mut buf = [0u8; 2];
assert_eq!(
decode_to_str(&data, &mut buf),
Err(DecodeError {
error_range: (0, 4),
output_bytes_written: 0,
})
);
}
#[test]
fn decode_error_02() {
let data = [
0x00, 0xD8, 0x00, 0x00, 0x93, 0x30, 0x00, 0x00, 0x6B, 0x30, 0x00, 0x00, 0x61, 0x30,
0x00, 0x00, 0x6F, 0x30, 0x00, 0x00, 0x01, 0xFF, 0x00, 0x00,
]; // "こんにちは!" with an error on the first char (value in surrogate range)
let mut buf = [0u8; 2];
assert_eq!(
decode_to_str(&data, &mut buf),
Err(DecodeError {
error_range: (0, 4),
output_bytes_written: 0,
})
);
}
#[test]
fn decode_error_03() {
let data = [
0xFF, 0xDF, 0x00, 0x00, 0x93, 0x30, 0x00, 0x00, 0x6B, 0x30, 0x00, 0x00, 0x61, 0x30,
0x00, 0x00, 0x6F, 0x30, 0x00, 0x00, 0x01, 0xFF, 0x00, 0x00,
]; // "こんにちは!" with an error on the first char (value in surrogate range)
let mut buf = [0u8; 64];
assert_eq!(
decode_to_str(&data, &mut buf),
Err(DecodeError {
error_range: (0, 4),
output_bytes_written: 0,
})
);
}
#[test]
fn decode_error_04() {
let data = [
0x53, 0x30, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x6B, 0x30, 0x00, 0x00, 0x61, 0x30,
0x00, 0x00, 0x6F, 0x30, 0x00, 0x00, 0x01, 0xFF, 0x00, 0x00,
]; // "こんにちは!" with an error on the second char (value out of range)
let mut buf = [0u8; 64];
assert_eq!(
decode_to_str(&data, &mut buf),
Err(DecodeError {
error_range: (4, 8),
output_bytes_written: 3,
})
);
assert_eq!(&buf[..3], &[0xE3, 0x81, 0x93]);
}
#[test]
fn decode_error_05() {
let data = [
0x53, 0x30, 0x00, 0x00, 0x00, 0xD8, 0x00, 0x00, 0x6B, 0x30, 0x00, 0x00, 0x61, 0x30,
0x00, 0x00, 0x6F, 0x30, 0x00, 0x00, 0x01, 0xFF, 0x00, 0x00,
]; // "こんにちは!" with an error on the second char (value in surrogate range)
let mut buf = [0u8; 64];
assert_eq!(
decode_to_str(&data, &mut buf),
Err(DecodeError {
error_range: (4, 8),
output_bytes_written: 3,
})
);
assert_eq!(&buf[..3], &[0xE3, 0x81, 0x93]);
}
#[test]
fn decode_error_06() {
let data = [
0x53, 0x30, 0x00, 0x00, 0xFF, 0xDF, 0x00, 0x00, 0x6B, 0x30, 0x00, 0x00, 0x61, 0x30,
0x00, 0x00, 0x6F, 0x30, 0x00, 0x00, 0x01, 0xFF, 0x00, 0x00,
]; // "こんにちは!" with an error on the second char (value in surrogate range)
let mut buf = [0u8; 64];
assert_eq!(
decode_to_str(&data, &mut buf),
Err(DecodeError {
error_range: (4, 8),
output_bytes_written: 3,
})
);
assert_eq!(&buf[..3], &[0xE3, 0x81, 0x93]);
}
}

View File

@ -1,338 +0,0 @@
//! These functions are essentially redundant, since they're supposedly
//! encoding/decoding between utf8 and... utf8. However, `decode_to_str()`
//! is still useful for validating unknown input. And they allow a uniform
//! API for all encodings.
use core;
use {DecodeError, DecodeResult, EncodeResult};
pub fn encode_from_str<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {
let cl = copy_len(input.as_bytes(), output.len());
output[..cl].copy_from_slice(input[..cl].as_bytes());
Ok((cl, &output[..cl]))
}
pub fn decode_to_str<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a> {
// Find how much of the data is valid utf8.
let valid_up_to = match core::str::from_utf8(input) {
Ok(text) => text.len(),
Err(e) => e.valid_up_to(),
};
// Copy over what we can.
let bytes_copied = copy_len(&input[..valid_up_to], output.len());
output[..bytes_copied].copy_from_slice(&input[..bytes_copied]);
// Determine if there's an error.
if bytes_copied < output.len() && bytes_copied == valid_up_to && valid_up_to < input.len() {
let trailing_bytes = input.len() - valid_up_to;
let byte = input[valid_up_to];
// First we check if we're truncated. If we are, then don't error
// yet, because we want to provide the full byte range of the error.
let is_truncated = ((byte & 0b11100000) == 0b11000000 && trailing_bytes < 2)
|| ((byte & 0b11110000) == 0b11100000 && trailing_bytes < 3)
|| ((byte & 0b11111000) == 0b11110000 && trailing_bytes < 4);
if !is_truncated {
// Find the byte range of the error by finding the next valid
// starting byte (or reaching end of input).
let mut i = valid_up_to + 1;
while i < input.len()
&& ((input[i] & 0b11000000) == 0b10000000 || (input[i] & 0b11111000) == 0b11111000)
{
i += 1;
}
// Return the error.
return Err(DecodeError {
error_range: (valid_up_to, i),
output_bytes_written: bytes_copied,
});
}
}
// No error, return success.
Ok((bytes_copied, unsafe {
core::str::from_utf8_unchecked(&output[..bytes_copied])
}))
}
/// Calculates how many bytes should be copied from input to output given
/// their lengths and the content of input. Specifically, it calculates
/// the maximum amount that can be copied without incompletely copying
/// any multi-byte codepoints.
///
/// Input is assumed to be valid and complete utf8 (i.e. could be turned
/// directly into a &str).
#[inline(always)]
fn copy_len(input: &[u8], output_len: usize) -> usize {
if output_len >= input.len() {
input.len()
} else {
let mut i = output_len;
while i > 0 && (input[i] & 0b11000000) == 0b10000000 {
i -= 1;
}
i
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn encode_01() {
let text = "こんにちは!";
let mut buf = [0u8; 2];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(encoded, &[]);
}
#[test]
fn encode_02() {
let text = "こんにちは!";
let mut buf = [0u8; 3];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 3);
assert_eq!(encoded, &[0xE3, 0x81, 0x93]);
}
#[test]
fn encode_03() {
let text = "こんにちは!";
let mut buf = [0u8; 5];
let (consumed_count, encoded) = encode_from_str(text, &mut buf).unwrap();
assert_eq!(consumed_count, 3);
assert_eq!(encoded, &[0xE3, 0x81, 0x93]);
}
#[test]
fn decode_01() {
let data = [
0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0xAB, 0xE3, 0x81, 0xA1, 0xE3, 0x81,
0xAF, 0xEF, 0xBC, 0x81,
]; // "こんにちは!"
let mut buf = [0u8; 2];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 0);
assert_eq!(decoded, "");
}
#[test]
fn decode_02() {
let data = [
0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0xAB, 0xE3, 0x81, 0xA1, 0xE3, 0x81,
0xAF, 0xEF, 0xBC, 0x81,
]; // "こんにちは!"
let mut buf = [0u8; 3];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 3);
assert_eq!(decoded, "");
}
#[test]
fn decode_03() {
let data = [
0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0xAB, 0xE3, 0x81, 0xA1, 0xE3, 0x81,
0xAF, 0xEF, 0xBC, 0x81,
]; // "こんにちは!"
let mut buf = [0u8; 5];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 3);
assert_eq!(decoded, "");
}
#[test]
fn decode_04() {
let data = [
0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0xAB, 0xE3, 0x81, 0xA1, 0xE3, 0x81,
0xAF, 0xEF, 0xBC,
]; // "こんにちは!" with last byte chopped off.
let mut buf = [0u8; 64];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 15);
assert_eq!(decoded, "こんにちは");
}
#[test]
fn decode_05() {
let data = [
0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0xAB, 0xE3, 0x81, 0xA1, 0xE3, 0x81,
0xAF, 0xEF,
]; // "こんにちは!" with last 2 bytes chopped off.
let mut buf = [0u8; 64];
let (consumed_count, decoded) = decode_to_str(&data, &mut buf).unwrap();
assert_eq!(consumed_count, 15);
assert_eq!(decoded, "こんにちは");
}
#[test]
fn decode_error_01() {
let data = [
0b10000000, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0xAB, 0xE3, 0x81, 0xA1, 0xE3,
0x81, 0xAF, 0xEF, 0xBC, 0x81,
]; // "こんにちは!" with an error on the first char (continuing code unit).
let mut buf = [0u8; 2];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (0, 3),
output_bytes_written: 0,
})
);
}
#[test]
fn decode_error_02() {
let data = [
0xE3, 0x81, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0xAB, 0xE3, 0x81, 0xA1, 0xE3, 0x81, 0xAF,
0xEF, 0xBC, 0x81,
]; // "こんにちは!" with an error on the first code point (too few continuing code units).
let mut buf = [0u8; 2];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (0, 2),
output_bytes_written: 0,
})
);
}
#[test]
fn decode_error_03() {
let data = [
0xE3, 0x81, 0x93, 0b10000000, 0x82, 0x93, 0xE3, 0x81, 0xAB, 0xE3, 0x81, 0xA1, 0xE3,
0x81, 0xAF, 0xEF, 0xBC, 0x81,
]; // "こんにちは!" with an error on the second code point (continuing code unit).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (3, 6),
output_bytes_written: 3,
})
);
}
#[test]
fn decode_error_04() {
let data = [
0xE3, 0x81, 0x93, 0b10000000, 0x82, 0x93, 0b10000000, 0x81, 0xAB, 0b10000000, 0x81,
0xA1, 0xE3, 0x81, 0xAF, 0xEF, 0xBC, 0x81,
]; // "こんにちは!" with an error on the second code point (lots of continuing code units).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (3, 12),
output_bytes_written: 3,
})
);
}
#[test]
fn decode_error_05() {
let data = [
0xE3, 0x81, 0x93, 0b11111000, 0x82, 0x93, 0x93, 0x93, 0xE3, 0x81, 0xAB, 0xE3, 0x81,
0xA1, 0xE3, 0x81, 0xAF, 0xEF, 0xBC, 0x81,
]; // "こんにちは!" with an error on the second code point (invalid bit pattern).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (3, 8),
output_bytes_written: 3,
})
);
}
#[test]
fn decode_error_06() {
let data = [
0xE3, 0x81, 0x93, 0xED, 0xA0, 0x80, 0xE3, 0x81, 0xAB, 0xE3, 0x81, 0xA1, 0xE3, 0x81,
0xAF, 0xEF, 0xBC, 0x81,
]; // "こんにちは!" with an error on the second code point (beginning of surrogate range).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (3, 6),
output_bytes_written: 3,
})
);
}
#[test]
fn decode_error_07() {
let data = [
0xE3, 0x81, 0x93, 0xED, 0xBF, 0xBF, 0xE3, 0x81, 0xAB, 0xE3, 0x81, 0xA1, 0xE3, 0x81,
0xAF, 0xEF, 0xBC, 0x81,
]; // "こんにちは!" with an error on the second code point (end of surrogate range).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (3, 6),
output_bytes_written: 3,
})
);
}
#[test]
fn decode_error_08() {
let data = [
0xE3, 0x81, 0x93, 0xF4, 0x90, 0x80, 0x80, 0xE3, 0x81, 0xAB, 0xE3, 0x81, 0xA1, 0xE3,
0x81, 0xAF, 0xEF, 0xBC, 0x81,
]; // "こんにちは!" with an error on the second code point (out of unicode range).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (3, 7),
output_bytes_written: 3,
})
);
}
#[test]
fn decode_error_09() {
let data = [
0xE3, 0x81, 0x93, 0xC0, 0x93, 0xE3, 0x81, 0xAB, 0xE3, 0x81, 0xA1, 0xE3, 0x81, 0xAF,
0xEF, 0xBC, 0x81,
]; // "こんにちは!" with an error on the second code point (byte == 0xC0).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (3, 5),
output_bytes_written: 3,
})
);
}
#[test]
fn decode_error_10() {
let data = [
0xE3, 0x81, 0x93, 0xC1, 0x93, 0xE3, 0x81, 0xAB, 0xE3, 0x81, 0xA1, 0xE3, 0x81, 0xAF,
0xEF, 0xBC, 0x81,
]; // "こんにちは!" with an error on the second code point (byte == 0xC1).
let mut buf = [0u8; 64];
let error = decode_to_str(&data, &mut buf);
assert_eq!(
error,
Err(DecodeError {
error_range: (3, 5),
output_bytes_written: 3,
})
);
}
}

View File

@ -1,121 +0,0 @@
use core::mem::transmute;
#[inline(always)]
pub(crate) fn to_big_endian_u16(n: u16) -> [u8; 2] {
let ptr = unsafe { transmute::<*const u16, *const u8>(&n as *const u16) };
if cfg!(target_endian = "little") {
unsafe { [*ptr.offset(1), *ptr] }
} else {
unsafe { [*ptr, *ptr.offset(1)] }
}
}
#[inline(always)]
pub(crate) fn from_big_endian_u16(n: [u8; 2]) -> u16 {
let mut x: u16 = 0;
let ptr = unsafe { transmute::<*mut u16, *mut u8>(&mut x as *mut u16) };
if cfg!(target_endian = "little") {
unsafe {
*ptr = n[1];
*ptr.offset(1) = n[0];
}
} else {
unsafe {
*ptr = n[0];
*ptr.offset(1) = n[1];
}
}
x
}
#[inline(always)]
pub(crate) fn to_little_endian_u16(n: u16) -> [u8; 2] {
let ptr = unsafe { transmute::<*const u16, *const u8>(&n as *const u16) };
if cfg!(target_endian = "little") {
unsafe { [*ptr, *ptr.offset(1)] }
} else {
unsafe { [*ptr.offset(1), *ptr] }
}
}
#[inline(always)]
pub(crate) fn from_little_endian_u16(n: [u8; 2]) -> u16 {
let mut x: u16 = 0;
let ptr = unsafe { transmute::<*mut u16, *mut u8>(&mut x as *mut u16) };
if cfg!(target_endian = "little") {
unsafe {
*ptr = n[0];
*ptr.offset(1) = n[1];
}
} else {
unsafe {
*ptr = n[1];
*ptr.offset(1) = n[0];
}
}
x
}
#[inline(always)]
pub(crate) fn to_big_endian_u32(n: u32) -> [u8; 4] {
let ptr = unsafe { transmute::<*const u32, *const u8>(&n as *const u32) };
if cfg!(target_endian = "little") {
unsafe { [*ptr.offset(3), *ptr.offset(2), *ptr.offset(1), *ptr] }
} else {
unsafe { [*ptr, *ptr.offset(1), *ptr.offset(2), *ptr.offset(3)] }
}
}
#[inline(always)]
pub(crate) fn from_big_endian_u32(n: [u8; 4]) -> u32 {
let mut x: u32 = 0;
let ptr = unsafe { transmute::<*mut u32, *mut u8>(&mut x as *mut u32) };
if cfg!(target_endian = "little") {
unsafe {
*ptr = n[3];
*ptr.offset(1) = n[2];
*ptr.offset(2) = n[1];
*ptr.offset(3) = n[0];
}
} else {
unsafe {
*ptr = n[0];
*ptr.offset(1) = n[1];
*ptr.offset(2) = n[2];
*ptr.offset(3) = n[3];
}
}
x
}
#[inline(always)]
pub(crate) fn to_little_endian_u32(n: u32) -> [u8; 4] {
let ptr = unsafe { transmute::<*const u32, *const u8>(&n as *const u32) };
if cfg!(target_endian = "little") {
unsafe { [*ptr, *ptr.offset(1), *ptr.offset(2), *ptr.offset(3)] }
} else {
unsafe { [*ptr.offset(3), *ptr.offset(2), *ptr.offset(1), *ptr] }
}
}
#[inline(always)]
pub(crate) fn from_little_endian_u32(n: [u8; 4]) -> u32 {
let mut x: u32 = 0;
let ptr = unsafe { transmute::<*mut u32, *mut u8>(&mut x as *mut u32) };
if cfg!(target_endian = "little") {
unsafe {
*ptr = n[0];
*ptr.offset(1) = n[1];
*ptr.offset(2) = n[2];
*ptr.offset(3) = n[3];
}
} else {
unsafe {
*ptr = n[3];
*ptr.offset(1) = n[2];
*ptr.offset(2) = n[1];
*ptr.offset(3) = n[0];
}
}
x
}

View File

@ -1,239 +0,0 @@
#[macro_use]
extern crate proptest;
extern crate text_encoding;
use proptest::collection::vec;
use proptest::test_runner::Config;
use text_encoding::{decode_to_str, encode_from_str, Encoding};
proptest! {
#![proptest_config(Config::with_cases(512))]
#[test]
fn pt_utf8_roundtrip(ref text in "\\PC*\\PC*\\PC*") {
let mut buf = [0u8; 32];
let mut utf8_encoded: Vec<u8> = Vec::new();
let mut utf8 = String::new();
// Encode to utf8
let mut tmp = &text[..];
while !tmp.is_empty() {
if let Ok((n, encoded)) = encode_from_str(Encoding::Utf8, tmp, &mut buf) {
tmp = &tmp[n..];
utf8_encoded.extend_from_slice(encoded);
} else {
panic!("Error when encoding.");
}
}
// Decode back from utf8
let mut tmp = &utf8_encoded[..];
while !tmp.is_empty() {
if let Ok((n, decoded)) = decode_to_str(Encoding::Utf8, tmp, &mut buf) {
tmp = &tmp[n..];
utf8.extend(decoded.chars());
} else {
panic!("Error when decoding.");
}
}
assert_eq!(&text[..], &utf8[..]);
assert_eq!(text.as_bytes(), &utf8_encoded[..]);
assert_eq!(utf8.as_bytes(), &utf8_encoded[..]);
}
#[test]
fn pt_utf16be_roundtrip(ref text in "\\PC*\\PC*\\PC*") {
let mut buf = [0u8; 32];
let mut utf16: Vec<u8> = Vec::new();
let mut utf8 = String::new();
// Encode to utf16 big endian
let mut tmp = &text[..];
while !tmp.is_empty() {
if let Ok((n, encoded)) = encode_from_str(Encoding::Utf16BE, tmp, &mut buf) {
tmp = &tmp[n..];
utf16.extend_from_slice(encoded);
} else {
panic!("Error when encoding.");
}
}
// Decode back from utf16 big endian
let mut tmp = &utf16[..];
while !tmp.is_empty() {
if let Ok((n, decoded)) = decode_to_str(Encoding::Utf16BE, tmp, &mut buf) {
tmp = &tmp[n..];
utf8.extend(decoded.chars());
} else {
panic!("Error when decoding.");
}
}
assert_eq!(&text[..], &utf8[..]);
}
#[test]
fn pt_utf16le_roundtrip(ref text in "\\PC*\\PC*\\PC*") {
let mut buf = [0u8; 32];
let mut utf16: Vec<u8> = Vec::new();
let mut utf8 = String::new();
// Encode to utf16 little endian
let mut tmp = &text[..];
while !tmp.is_empty() {
if let Ok((n, encoded)) = encode_from_str(Encoding::Utf16LE, tmp, &mut buf) {
tmp = &tmp[n..];
utf16.extend_from_slice(encoded);
} else {
panic!("Error when encoding.");
}
}
// Decode back from utf16 big endian
let mut tmp = &utf16[..];
while !tmp.is_empty() {
if let Ok((n, decoded)) = decode_to_str(Encoding::Utf16LE, tmp, &mut buf) {
tmp = &tmp[n..];
utf8.extend(decoded.chars());
} else {
panic!("Error when decoding.");
}
}
assert_eq!(&text[..], &utf8[..]);
}
#[test]
fn pt_utf32be_roundtrip(ref text in "\\PC*\\PC*\\PC*") {
let mut buf = [0u8; 32];
let mut utf32: Vec<u8> = Vec::new();
let mut utf8 = String::new();
// Encode to utf32 big endian
let mut tmp = &text[..];
while !tmp.is_empty() {
if let Ok((n, encoded)) = encode_from_str(Encoding::Utf32BE, tmp, &mut buf) {
tmp = &tmp[n..];
utf32.extend_from_slice(encoded);
} else {
panic!("Error when encoding.");
}
}
// Decode back from utf32 big endian
let mut tmp = &utf32[..];
while !tmp.is_empty() {
if let Ok((n, decoded)) = decode_to_str(Encoding::Utf32BE, tmp, &mut buf) {
tmp = &tmp[n..];
utf8.extend(decoded.chars());
} else {
panic!("Error when decoding.");
}
}
assert_eq!(&text[..], &utf8[..]);
}
#[test]
fn pt_utf32le_roundtrip(ref text in "\\PC*\\PC*\\PC*") {
let mut buf = [0u8; 32];
let mut utf32: Vec<u8> = Vec::new();
let mut utf8 = String::new();
// Encode to utf32 little endian
let mut tmp = &text[..];
while !tmp.is_empty() {
if let Ok((n, encoded)) = encode_from_str(Encoding::Utf32LE, tmp, &mut buf) {
tmp = &tmp[n..];
utf32.extend_from_slice(encoded);
} else {
panic!("Error when encoding.");
}
}
// Decode back from utf32 little endian
let mut tmp = &utf32[..];
while !tmp.is_empty() {
if let Ok((n, decoded)) = decode_to_str(Encoding::Utf32LE, tmp, &mut buf) {
tmp = &tmp[n..];
utf8.extend(decoded.chars());
} else {
panic!("Error when decoding.");
}
}
assert_eq!(&text[..], &utf8[..]);
}
#[test]
fn pt_latin1_roundtrip(ref data in vec(0u8..=255, 0..1000)) {
let mut buf = [0u8; 32];
let mut utf8 = String::new();
let mut latin1: Vec<u8> = Vec::new();
// Decode from latin1 to utf8
let mut tmp = &data[..];
while !tmp.is_empty() {
if let Ok((n, decoded)) = decode_to_str(Encoding::Latin1, tmp, &mut buf) {
tmp = &tmp[n..];
utf8.extend(decoded.chars());
} else {
panic!("Error when decoding.");
}
}
// Encode to from utf8 back to latin1
let mut tmp = &utf8[..];
while !tmp.is_empty() {
if let Ok((n, encoded)) = encode_from_str(Encoding::Latin1, tmp, &mut buf) {
tmp = &tmp[n..];
latin1.extend_from_slice(encoded);
} else {
panic!("Error when encoding.");
}
}
assert_eq!(&data[..], &latin1[..]);
}
// The iso-8859-7 tests are representative of all single-byte encodings
// (except latin1) since they're all generated and share their code.
#[test]
fn pt_iso_8859_7_roundtrip(mut data in vec(0u8..=255, 0..1000)) {
let mut buf = [0u8; 32];
let mut utf8 = String::new();
let mut iso8859_7: Vec<u8> = Vec::new();
// Eliminate undefined bytes in input.
for b in data.iter_mut() {
if *b == 0xAE || *b == 0xD2 || *b == 0xFF {
*b = 0;
}
}
// Decode from iso-8859-7 to utf8
let mut tmp = &data[..];
while !tmp.is_empty() {
if let Ok((n, decoded)) = decode_to_str(Encoding::ISO8859_7, tmp, &mut buf) {
tmp = &tmp[n..];
utf8.extend(decoded.chars());
} else {
panic!("Error when decoding.");
}
}
// Encode to from utf8 back to iso-8859-7
let mut tmp = &utf8[..];
while !tmp.is_empty() {
if let Ok((n, encoded)) = encode_from_str(Encoding::ISO8859_7, tmp, &mut buf) {
tmp = &tmp[n..];
iso8859_7.extend_from_slice(encoded);
} else {
panic!("Error when encoding.");
}
}
assert_eq!(&data[..], &iso8859_7[..]);
}
}