From 3d8eaefa6b24ef3114131db48738edeae6126687 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Thu, 23 Aug 2018 02:29:58 -0700 Subject: [PATCH] Added a lot of single-byte text encodings to text_encoding sub-crate. They are largely auto-generated from text-based tables from the WHATWG encoding standard. Most of them are still not available in the code itself, but they are generating correctly, and exposing them is just a matter of boiler plate. --- sub_crates/text_encoding/Cargo.toml | 1 + sub_crates/text_encoding/build.rs | 221 +++++++++++++++++ .../encoding_tables/index-ibm866.txt | 134 +++++++++++ .../encoding_tables/index-iso-8859-10.txt | 134 +++++++++++ .../encoding_tables/index-iso-8859-13.txt | 134 +++++++++++ .../encoding_tables/index-iso-8859-14.txt | 134 +++++++++++ .../encoding_tables/index-iso-8859-15.txt | 134 +++++++++++ .../encoding_tables/index-iso-8859-16.txt | 134 +++++++++++ .../encoding_tables/index-iso-8859-2.txt | 134 +++++++++++ .../encoding_tables/index-iso-8859-3.txt | 127 ++++++++++ .../encoding_tables/index-iso-8859-4.txt | 134 +++++++++++ .../encoding_tables/index-iso-8859-5.txt | 134 +++++++++++ .../encoding_tables/index-iso-8859-6.txt | 89 +++++++ .../encoding_tables/index-iso-8859-7.txt | 131 ++++++++++ .../encoding_tables/index-iso-8859-8.txt | 98 ++++++++ .../encoding_tables/index-koi8-r.txt | 134 +++++++++++ .../encoding_tables/index-koi8-u.txt | 134 +++++++++++ .../encoding_tables/index-macintosh.txt | 134 +++++++++++ .../encoding_tables/index-windows-1250.txt | 134 +++++++++++ .../encoding_tables/index-windows-1251.txt | 134 +++++++++++ .../encoding_tables/index-windows-1252.txt | 134 +++++++++++ .../encoding_tables/index-windows-1253.txt | 131 ++++++++++ .../encoding_tables/index-windows-1254.txt | 134 +++++++++++ .../encoding_tables/index-windows-1255.txt | 124 ++++++++++ .../encoding_tables/index-windows-1256.txt | 134 +++++++++++ .../encoding_tables/index-windows-1257.txt | 132 ++++++++++ .../encoding_tables/index-windows-1258.txt | 134 +++++++++++ .../encoding_tables/index-windows-874.txt | 126 ++++++++++ .../encoding_tables/index-x-mac-cyrillic.txt | 134 +++++++++++ sub_crates/text_encoding/src/lib.rs | 9 + sub_crates/text_encoding/src/single_byte.rs | 226 ++++++++++++++++++ 31 files changed, 3961 insertions(+) create mode 100644 sub_crates/text_encoding/build.rs create mode 100644 sub_crates/text_encoding/encoding_tables/index-ibm866.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-iso-8859-10.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-iso-8859-13.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-iso-8859-14.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-iso-8859-15.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-iso-8859-16.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-iso-8859-2.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-iso-8859-3.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-iso-8859-4.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-iso-8859-5.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-iso-8859-6.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-iso-8859-7.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-iso-8859-8.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-koi8-r.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-koi8-u.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-macintosh.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-windows-1250.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-windows-1251.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-windows-1252.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-windows-1253.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-windows-1254.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-windows-1255.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-windows-1256.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-windows-1257.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-windows-1258.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-windows-874.txt create mode 100644 sub_crates/text_encoding/encoding_tables/index-x-mac-cyrillic.txt create mode 100644 sub_crates/text_encoding/src/single_byte.rs diff --git a/sub_crates/text_encoding/Cargo.toml b/sub_crates/text_encoding/Cargo.toml index 1af192c..41d8070 100644 --- a/sub_crates/text_encoding/Cargo.toml +++ b/sub_crates/text_encoding/Cargo.toml @@ -3,6 +3,7 @@ name = "text_encoding" version = "0.1.0" authors = ["Nathan Vegdahl "] license = "MIT" +build = "build.rs" [lib] name = "text_encoding" diff --git a/sub_crates/text_encoding/build.rs b/sub_crates/text_encoding/build.rs new file mode 100644 index 0000000..947abbc --- /dev/null +++ b/sub_crates/text_encoding/build.rs @@ -0,0 +1,221 @@ +use std::env; +use std::fs::File; +use std::io::{BufRead, Read, Write}; +use std::path::Path; + +fn main() { + let out_dir = env::var("OUT_DIR").unwrap(); + + // Generate all of the single byte encoding tables and wrapper code. + { + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-ibm866.txt").unwrap(), + File::create(&Path::new(&out_dir).join("ibm866.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-iso-8859-2.txt").unwrap(), + File::create(&Path::new(&out_dir).join("iso-8859-2.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-iso-8859-3.txt").unwrap(), + File::create(&Path::new(&out_dir).join("iso-8859-3.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-iso-8859-4.txt").unwrap(), + File::create(&Path::new(&out_dir).join("iso-8859-4.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-iso-8859-5.txt").unwrap(), + File::create(&Path::new(&out_dir).join("iso-8859-5.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-iso-8859-6.txt").unwrap(), + File::create(&Path::new(&out_dir).join("iso-8859-6.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-iso-8859-7.txt").unwrap(), + File::create(&Path::new(&out_dir).join("iso-8859-7.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-iso-8859-8.txt").unwrap(), + File::create(&Path::new(&out_dir).join("iso-8859-8.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-iso-8859-10.txt").unwrap(), + File::create(&Path::new(&out_dir).join("iso-8859-10.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-iso-8859-13.txt").unwrap(), + File::create(&Path::new(&out_dir).join("iso-8859-13.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-iso-8859-14.txt").unwrap(), + File::create(&Path::new(&out_dir).join("iso-8859-14.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-iso-8859-15.txt").unwrap(), + File::create(&Path::new(&out_dir).join("iso-8859-15.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-iso-8859-16.txt").unwrap(), + File::create(&Path::new(&out_dir).join("iso-8859-16.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-koi8-r.txt").unwrap(), + File::create(&Path::new(&out_dir).join("koi8-r.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-koi8-u.txt").unwrap(), + File::create(&Path::new(&out_dir).join("koi8-u.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-macintosh.txt").unwrap(), + File::create(&Path::new(&out_dir).join("macintosh.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-windows-874.txt").unwrap(), + File::create(&Path::new(&out_dir).join("windows-874.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-windows-1250.txt").unwrap(), + File::create(&Path::new(&out_dir).join("windows-1250.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-windows-1251.txt").unwrap(), + File::create(&Path::new(&out_dir).join("windows-1251.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-windows-1252.txt").unwrap(), + File::create(&Path::new(&out_dir).join("windows-1252.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-windows-1253.txt").unwrap(), + File::create(&Path::new(&out_dir).join("windows-1253.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-windows-1254.txt").unwrap(), + File::create(&Path::new(&out_dir).join("windows-1254.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-windows-1255.txt").unwrap(), + File::create(&Path::new(&out_dir).join("windows-1255.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-windows-1256.txt").unwrap(), + File::create(&Path::new(&out_dir).join("windows-1256.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-windows-1257.txt").unwrap(), + File::create(&Path::new(&out_dir).join("windows-1257.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-windows-1258.txt").unwrap(), + File::create(&Path::new(&out_dir).join("windows-1258.rs")).unwrap(), + ).unwrap(); + generate_single_byte_encoding_from_index( + File::open("encoding_tables/index-x-mac-cyrillic.txt").unwrap(), + File::create(&Path::new(&out_dir).join("x-mac-cyrillic.rs")).unwrap(), + ).unwrap(); + } +} + +fn generate_single_byte_encoding_from_index( + in_file: R, + mut out_file: W, +) -> std::io::Result<()> { + let in_file = std::io::BufReader::new(in_file); + + // Collect the table. + let table = { + let mut table = ['�'; 128]; + for line in in_file.lines() { + let tmp = line.unwrap(); + let line = tmp.trim(); + if line.starts_with("#") || line == "" { + continue; + } + + let elements: Vec<_> = line.split_whitespace().collect(); + if elements.len() >= 2 { + let index = elements[0].parse::().unwrap(); + assert!(index <= 127); + let code = std::char::from_u32(u32::from_str_radix(&elements[1][2..], 16).unwrap()) + .unwrap(); + table[index] = code; + } + } + table + }; + + // Build the reverse table. + let rev_table = { + let mut rev_table = vec![]; + for (i, c) in table.iter().enumerate() { + rev_table.push((c, i)); + } + rev_table.sort_by_key(|x| x.0); + rev_table + }; + + // Write shared code. + out_file.write_all( + format!( + r#" +use {{DecodeResult, EncodeResult}}; + +pub fn encode_from_str<'a>(input: &str, output: &'a mut [u8]) -> EncodeResult<'a> {{ + super::single_byte_encode_from_str(&ENCODE_TABLE, input, output) +}} + +pub fn decode_to_str<'a>(input: &[u8], output: &'a mut [u8]) -> DecodeResult<'a> {{ + super::single_byte_decode_to_str(&DECODE_TABLE, input, output) +}} +"# + ).as_bytes(), + )?; + + // Write encode table. + out_file.write_all( + format!( + r#" +const ENCODE_TABLE: [(char, u8); {}] = [ +"#, + rev_table.len() + ).as_bytes(), + )?; + + for (c, i) in rev_table.iter() { + out_file.write_all(format!("('\\u{{{:04X}}}', 0x{:02X}), ", **c as u32, i).as_bytes())?; + } + + out_file.write_all( + format!( + r#" +]; +"# + ).as_bytes(), + )?; + + // Write decode table. + out_file.write_all( + format!( + r#" +const DECODE_TABLE: [char; 128] = [ +"# + ).as_bytes(), + )?; + + for c in table.iter() { + out_file.write_all(format!("'\\u{{{:04X}}}', ", *c as u32).as_bytes())?; + } + + out_file.write_all( + format!( + r#" +]; +"# + ).as_bytes(), + )?; + + Ok(()) +} diff --git a/sub_crates/text_encoding/encoding_tables/index-ibm866.txt b/sub_crates/text_encoding/encoding_tables/index-ibm866.txt new file mode 100644 index 0000000..6bbd9e3 --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-ibm866.txt @@ -0,0 +1,134 @@ +# For details on index index-ibm866.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: db6fe14a559d1601a7667338d83704773d5708dbc641e1ad3c5e21405770f05e +# Date: 2018-01-06 + + 0 0x0410 А (CYRILLIC CAPITAL LETTER A) + 1 0x0411 Б (CYRILLIC CAPITAL LETTER BE) + 2 0x0412 В (CYRILLIC CAPITAL LETTER VE) + 3 0x0413 Г (CYRILLIC CAPITAL LETTER GHE) + 4 0x0414 Д (CYRILLIC CAPITAL LETTER DE) + 5 0x0415 Е (CYRILLIC CAPITAL LETTER IE) + 6 0x0416 Ж (CYRILLIC CAPITAL LETTER ZHE) + 7 0x0417 З (CYRILLIC CAPITAL LETTER ZE) + 8 0x0418 И (CYRILLIC CAPITAL LETTER I) + 9 0x0419 Й (CYRILLIC CAPITAL LETTER SHORT I) + 10 0x041A К (CYRILLIC CAPITAL LETTER KA) + 11 0x041B Л (CYRILLIC CAPITAL LETTER EL) + 12 0x041C М (CYRILLIC CAPITAL LETTER EM) + 13 0x041D Н (CYRILLIC CAPITAL LETTER EN) + 14 0x041E О (CYRILLIC CAPITAL LETTER O) + 15 0x041F П (CYRILLIC CAPITAL LETTER PE) + 16 0x0420 Р (CYRILLIC CAPITAL LETTER ER) + 17 0x0421 С (CYRILLIC CAPITAL LETTER ES) + 18 0x0422 Т (CYRILLIC CAPITAL LETTER TE) + 19 0x0423 У (CYRILLIC CAPITAL LETTER U) + 20 0x0424 Ф (CYRILLIC CAPITAL LETTER EF) + 21 0x0425 Х (CYRILLIC CAPITAL LETTER HA) + 22 0x0426 Ц (CYRILLIC CAPITAL LETTER TSE) + 23 0x0427 Ч (CYRILLIC CAPITAL LETTER CHE) + 24 0x0428 Ш (CYRILLIC CAPITAL LETTER SHA) + 25 0x0429 Щ (CYRILLIC CAPITAL LETTER SHCHA) + 26 0x042A Ъ (CYRILLIC CAPITAL LETTER HARD SIGN) + 27 0x042B Ы (CYRILLIC CAPITAL LETTER YERU) + 28 0x042C Ь (CYRILLIC CAPITAL LETTER SOFT SIGN) + 29 0x042D Э (CYRILLIC CAPITAL LETTER E) + 30 0x042E Ю (CYRILLIC CAPITAL LETTER YU) + 31 0x042F Я (CYRILLIC CAPITAL LETTER YA) + 32 0x0430 а (CYRILLIC SMALL LETTER A) + 33 0x0431 б (CYRILLIC SMALL LETTER BE) + 34 0x0432 в (CYRILLIC SMALL LETTER VE) + 35 0x0433 г (CYRILLIC SMALL LETTER GHE) + 36 0x0434 д (CYRILLIC SMALL LETTER DE) + 37 0x0435 е (CYRILLIC SMALL LETTER IE) + 38 0x0436 ж (CYRILLIC SMALL LETTER ZHE) + 39 0x0437 з (CYRILLIC SMALL LETTER ZE) + 40 0x0438 и (CYRILLIC SMALL LETTER I) + 41 0x0439 й (CYRILLIC SMALL LETTER SHORT I) + 42 0x043A к (CYRILLIC SMALL LETTER KA) + 43 0x043B л (CYRILLIC SMALL LETTER EL) + 44 0x043C м (CYRILLIC SMALL LETTER EM) + 45 0x043D н (CYRILLIC SMALL LETTER EN) + 46 0x043E о (CYRILLIC SMALL LETTER O) + 47 0x043F п (CYRILLIC SMALL LETTER PE) + 48 0x2591 ░ (LIGHT SHADE) + 49 0x2592 ▒ (MEDIUM SHADE) + 50 0x2593 ▓ (DARK SHADE) + 51 0x2502 │ (BOX DRAWINGS LIGHT VERTICAL) + 52 0x2524 ┤ (BOX DRAWINGS LIGHT VERTICAL AND LEFT) + 53 0x2561 ╡ (BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE) + 54 0x2562 ╢ (BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE) + 55 0x2556 ╖ (BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE) + 56 0x2555 ╕ (BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE) + 57 0x2563 ╣ (BOX DRAWINGS DOUBLE VERTICAL AND LEFT) + 58 0x2551 ║ (BOX DRAWINGS DOUBLE VERTICAL) + 59 0x2557 ╗ (BOX DRAWINGS DOUBLE DOWN AND LEFT) + 60 0x255D ╝ (BOX DRAWINGS DOUBLE UP AND LEFT) + 61 0x255C ╜ (BOX DRAWINGS UP DOUBLE AND LEFT SINGLE) + 62 0x255B ╛ (BOX DRAWINGS UP SINGLE AND LEFT DOUBLE) + 63 0x2510 ┐ (BOX DRAWINGS LIGHT DOWN AND LEFT) + 64 0x2514 └ (BOX DRAWINGS LIGHT UP AND RIGHT) + 65 0x2534 ┴ (BOX DRAWINGS LIGHT UP AND HORIZONTAL) + 66 0x252C ┬ (BOX DRAWINGS LIGHT DOWN AND HORIZONTAL) + 67 0x251C ├ (BOX DRAWINGS LIGHT VERTICAL AND RIGHT) + 68 0x2500 ─ (BOX DRAWINGS LIGHT HORIZONTAL) + 69 0x253C ┼ (BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL) + 70 0x255E ╞ (BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE) + 71 0x255F ╟ (BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE) + 72 0x255A ╚ (BOX DRAWINGS DOUBLE UP AND RIGHT) + 73 0x2554 ╔ (BOX DRAWINGS DOUBLE DOWN AND RIGHT) + 74 0x2569 ╩ (BOX DRAWINGS DOUBLE UP AND HORIZONTAL) + 75 0x2566 ╦ (BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL) + 76 0x2560 ╠ (BOX DRAWINGS DOUBLE VERTICAL AND RIGHT) + 77 0x2550 ═ (BOX DRAWINGS DOUBLE HORIZONTAL) + 78 0x256C ╬ (BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL) + 79 0x2567 ╧ (BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE) + 80 0x2568 ╨ (BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE) + 81 0x2564 ╤ (BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE) + 82 0x2565 ╥ (BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE) + 83 0x2559 ╙ (BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE) + 84 0x2558 ╘ (BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE) + 85 0x2552 ╒ (BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE) + 86 0x2553 ╓ (BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE) + 87 0x256B ╫ (BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE) + 88 0x256A ╪ (BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE) + 89 0x2518 ┘ (BOX DRAWINGS LIGHT UP AND LEFT) + 90 0x250C ┌ (BOX DRAWINGS LIGHT DOWN AND RIGHT) + 91 0x2588 █ (FULL BLOCK) + 92 0x2584 ▄ (LOWER HALF BLOCK) + 93 0x258C ▌ (LEFT HALF BLOCK) + 94 0x2590 ▐ (RIGHT HALF BLOCK) + 95 0x2580 ▀ (UPPER HALF BLOCK) + 96 0x0440 р (CYRILLIC SMALL LETTER ER) + 97 0x0441 с (CYRILLIC SMALL LETTER ES) + 98 0x0442 т (CYRILLIC SMALL LETTER TE) + 99 0x0443 у (CYRILLIC SMALL LETTER U) +100 0x0444 ф (CYRILLIC SMALL LETTER EF) +101 0x0445 х (CYRILLIC SMALL LETTER HA) +102 0x0446 ц (CYRILLIC SMALL LETTER TSE) +103 0x0447 ч (CYRILLIC SMALL LETTER CHE) +104 0x0448 ш (CYRILLIC SMALL LETTER SHA) +105 0x0449 щ (CYRILLIC SMALL LETTER SHCHA) +106 0x044A ъ (CYRILLIC SMALL LETTER HARD SIGN) +107 0x044B ы (CYRILLIC SMALL LETTER YERU) +108 0x044C ь (CYRILLIC SMALL LETTER SOFT SIGN) +109 0x044D э (CYRILLIC SMALL LETTER E) +110 0x044E ю (CYRILLIC SMALL LETTER YU) +111 0x044F я (CYRILLIC SMALL LETTER YA) +112 0x0401 Ё (CYRILLIC CAPITAL LETTER IO) +113 0x0451 ё (CYRILLIC SMALL LETTER IO) +114 0x0404 Є (CYRILLIC CAPITAL LETTER UKRAINIAN IE) +115 0x0454 є (CYRILLIC SMALL LETTER UKRAINIAN IE) +116 0x0407 Ї (CYRILLIC CAPITAL LETTER YI) +117 0x0457 ї (CYRILLIC SMALL LETTER YI) +118 0x040E Ў (CYRILLIC CAPITAL LETTER SHORT U) +119 0x045E ў (CYRILLIC SMALL LETTER SHORT U) +120 0x00B0 ° (DEGREE SIGN) +121 0x2219 ∙ (BULLET OPERATOR) +122 0x00B7 · (MIDDLE DOT) +123 0x221A √ (SQUARE ROOT) +124 0x2116 № (NUMERO SIGN) +125 0x00A4 ¤ (CURRENCY SIGN) +126 0x25A0 ■ (BLACK SQUARE) +127 0x00A0   (NO-BREAK SPACE) diff --git a/sub_crates/text_encoding/encoding_tables/index-iso-8859-10.txt b/sub_crates/text_encoding/encoding_tables/index-iso-8859-10.txt new file mode 100644 index 0000000..8386ba1 --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-iso-8859-10.txt @@ -0,0 +1,134 @@ +# For details on index index-iso-8859-10.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: 02c2b5590d8ccda9931008c471f6ee2c590b2c8fe5e6ccb3b08638115d778507 +# Date: 2018-01-06 + + 0 0x0080 € () + 1 0x0081  () + 2 0x0082 ‚ () + 3 0x0083 ƒ () + 4 0x0084 „ () + 5 0x0085 … () + 6 0x0086 † () + 7 0x0087 ‡ () + 8 0x0088 ˆ () + 9 0x0089 ‰ () + 10 0x008A Š () + 11 0x008B ‹ () + 12 0x008C Œ () + 13 0x008D  () + 14 0x008E Ž () + 15 0x008F  () + 16 0x0090  () + 17 0x0091 ‘ () + 18 0x0092 ’ () + 19 0x0093 “ () + 20 0x0094 ” () + 21 0x0095 • () + 22 0x0096 – () + 23 0x0097 — () + 24 0x0098 ˜ () + 25 0x0099 ™ () + 26 0x009A š () + 27 0x009B › () + 28 0x009C œ () + 29 0x009D  () + 30 0x009E ž () + 31 0x009F Ÿ () + 32 0x00A0   (NO-BREAK SPACE) + 33 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK) + 34 0x0112 Ē (LATIN CAPITAL LETTER E WITH MACRON) + 35 0x0122 Ģ (LATIN CAPITAL LETTER G WITH CEDILLA) + 36 0x012A Ī (LATIN CAPITAL LETTER I WITH MACRON) + 37 0x0128 Ĩ (LATIN CAPITAL LETTER I WITH TILDE) + 38 0x0136 Ķ (LATIN CAPITAL LETTER K WITH CEDILLA) + 39 0x00A7 § (SECTION SIGN) + 40 0x013B Ļ (LATIN CAPITAL LETTER L WITH CEDILLA) + 41 0x0110 Đ (LATIN CAPITAL LETTER D WITH STROKE) + 42 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) + 43 0x0166 Ŧ (LATIN CAPITAL LETTER T WITH STROKE) + 44 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON) + 45 0x00AD ­ (SOFT HYPHEN) + 46 0x016A Ū (LATIN CAPITAL LETTER U WITH MACRON) + 47 0x014A Ŋ (LATIN CAPITAL LETTER ENG) + 48 0x00B0 ° (DEGREE SIGN) + 49 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK) + 50 0x0113 ē (LATIN SMALL LETTER E WITH MACRON) + 51 0x0123 ģ (LATIN SMALL LETTER G WITH CEDILLA) + 52 0x012B ī (LATIN SMALL LETTER I WITH MACRON) + 53 0x0129 ĩ (LATIN SMALL LETTER I WITH TILDE) + 54 0x0137 ķ (LATIN SMALL LETTER K WITH CEDILLA) + 55 0x00B7 · (MIDDLE DOT) + 56 0x013C ļ (LATIN SMALL LETTER L WITH CEDILLA) + 57 0x0111 đ (LATIN SMALL LETTER D WITH STROKE) + 58 0x0161 š (LATIN SMALL LETTER S WITH CARON) + 59 0x0167 ŧ (LATIN SMALL LETTER T WITH STROKE) + 60 0x017E ž (LATIN SMALL LETTER Z WITH CARON) + 61 0x2015 ― (HORIZONTAL BAR) + 62 0x016B ū (LATIN SMALL LETTER U WITH MACRON) + 63 0x014B ŋ (LATIN SMALL LETTER ENG) + 64 0x0100 Ā (LATIN CAPITAL LETTER A WITH MACRON) + 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) + 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) + 67 0x00C3 à (LATIN CAPITAL LETTER A WITH TILDE) + 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) + 69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) + 70 0x00C6 Æ (LATIN CAPITAL LETTER AE) + 71 0x012E Į (LATIN CAPITAL LETTER I WITH OGONEK) + 72 0x010C Č (LATIN CAPITAL LETTER C WITH CARON) + 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) + 74 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK) + 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) + 76 0x0116 Ė (LATIN CAPITAL LETTER E WITH DOT ABOVE) + 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) + 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) + 79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS) + 80 0x00D0 Ð (LATIN CAPITAL LETTER ETH) + 81 0x0145 Ņ (LATIN CAPITAL LETTER N WITH CEDILLA) + 82 0x014C Ō (LATIN CAPITAL LETTER O WITH MACRON) + 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) + 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) + 85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE) + 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) + 87 0x0168 Ũ (LATIN CAPITAL LETTER U WITH TILDE) + 88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) + 89 0x0172 Ų (LATIN CAPITAL LETTER U WITH OGONEK) + 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) + 91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) + 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) + 93 0x00DD Ý (LATIN CAPITAL LETTER Y WITH ACUTE) + 94 0x00DE Þ (LATIN CAPITAL LETTER THORN) + 95 0x00DF ß (LATIN SMALL LETTER SHARP S) + 96 0x0101 ā (LATIN SMALL LETTER A WITH MACRON) + 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) + 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) + 99 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE) +100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) +101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) +102 0x00E6 æ (LATIN SMALL LETTER AE) +103 0x012F į (LATIN SMALL LETTER I WITH OGONEK) +104 0x010D č (LATIN SMALL LETTER C WITH CARON) +105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) +106 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK) +107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) +108 0x0117 ė (LATIN SMALL LETTER E WITH DOT ABOVE) +109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) +110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) +111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) +112 0x00F0 ð (LATIN SMALL LETTER ETH) +113 0x0146 ņ (LATIN SMALL LETTER N WITH CEDILLA) +114 0x014D ō (LATIN SMALL LETTER O WITH MACRON) +115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) +116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) +117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE) +118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) +119 0x0169 ũ (LATIN SMALL LETTER U WITH TILDE) +120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) +121 0x0173 ų (LATIN SMALL LETTER U WITH OGONEK) +122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) +123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) +124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) +125 0x00FD ý (LATIN SMALL LETTER Y WITH ACUTE) +126 0x00FE þ (LATIN SMALL LETTER THORN) +127 0x0138 ĸ (LATIN SMALL LETTER KRA) diff --git a/sub_crates/text_encoding/encoding_tables/index-iso-8859-13.txt b/sub_crates/text_encoding/encoding_tables/index-iso-8859-13.txt new file mode 100644 index 0000000..031bb90 --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-iso-8859-13.txt @@ -0,0 +1,134 @@ +# For details on index index-iso-8859-13.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: 40736338e964ab520407cebcb01329f8d450abf6ce12bf88b74b655b60e43300 +# Date: 2018-01-06 + + 0 0x0080 € () + 1 0x0081  () + 2 0x0082 ‚ () + 3 0x0083 ƒ () + 4 0x0084 „ () + 5 0x0085 … () + 6 0x0086 † () + 7 0x0087 ‡ () + 8 0x0088 ˆ () + 9 0x0089 ‰ () + 10 0x008A Š () + 11 0x008B ‹ () + 12 0x008C Œ () + 13 0x008D  () + 14 0x008E Ž () + 15 0x008F  () + 16 0x0090  () + 17 0x0091 ‘ () + 18 0x0092 ’ () + 19 0x0093 “ () + 20 0x0094 ” () + 21 0x0095 • () + 22 0x0096 – () + 23 0x0097 — () + 24 0x0098 ˜ () + 25 0x0099 ™ () + 26 0x009A š () + 27 0x009B › () + 28 0x009C œ () + 29 0x009D  () + 30 0x009E ž () + 31 0x009F Ÿ () + 32 0x00A0   (NO-BREAK SPACE) + 33 0x201D ” (RIGHT DOUBLE QUOTATION MARK) + 34 0x00A2 ¢ (CENT SIGN) + 35 0x00A3 £ (POUND SIGN) + 36 0x00A4 ¤ (CURRENCY SIGN) + 37 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) + 38 0x00A6 ¦ (BROKEN BAR) + 39 0x00A7 § (SECTION SIGN) + 40 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) + 41 0x00A9 © (COPYRIGHT SIGN) + 42 0x0156 Ŗ (LATIN CAPITAL LETTER R WITH CEDILLA) + 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) + 44 0x00AC ¬ (NOT SIGN) + 45 0x00AD ­ (SOFT HYPHEN) + 46 0x00AE ® (REGISTERED SIGN) + 47 0x00C6 Æ (LATIN CAPITAL LETTER AE) + 48 0x00B0 ° (DEGREE SIGN) + 49 0x00B1 ± (PLUS-MINUS SIGN) + 50 0x00B2 ² (SUPERSCRIPT TWO) + 51 0x00B3 ³ (SUPERSCRIPT THREE) + 52 0x201C “ (LEFT DOUBLE QUOTATION MARK) + 53 0x00B5 µ (MICRO SIGN) + 54 0x00B6 ¶ (PILCROW SIGN) + 55 0x00B7 · (MIDDLE DOT) + 56 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) + 57 0x00B9 ¹ (SUPERSCRIPT ONE) + 58 0x0157 ŗ (LATIN SMALL LETTER R WITH CEDILLA) + 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) + 60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER) + 61 0x00BD ½ (VULGAR FRACTION ONE HALF) + 62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS) + 63 0x00E6 æ (LATIN SMALL LETTER AE) + 64 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK) + 65 0x012E Į (LATIN CAPITAL LETTER I WITH OGONEK) + 66 0x0100 Ā (LATIN CAPITAL LETTER A WITH MACRON) + 67 0x0106 Ć (LATIN CAPITAL LETTER C WITH ACUTE) + 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) + 69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) + 70 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK) + 71 0x0112 Ē (LATIN CAPITAL LETTER E WITH MACRON) + 72 0x010C Č (LATIN CAPITAL LETTER C WITH CARON) + 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) + 74 0x0179 Ź (LATIN CAPITAL LETTER Z WITH ACUTE) + 75 0x0116 Ė (LATIN CAPITAL LETTER E WITH DOT ABOVE) + 76 0x0122 Ģ (LATIN CAPITAL LETTER G WITH CEDILLA) + 77 0x0136 Ķ (LATIN CAPITAL LETTER K WITH CEDILLA) + 78 0x012A Ī (LATIN CAPITAL LETTER I WITH MACRON) + 79 0x013B Ļ (LATIN CAPITAL LETTER L WITH CEDILLA) + 80 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) + 81 0x0143 Ń (LATIN CAPITAL LETTER N WITH ACUTE) + 82 0x0145 Ņ (LATIN CAPITAL LETTER N WITH CEDILLA) + 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) + 84 0x014C Ō (LATIN CAPITAL LETTER O WITH MACRON) + 85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE) + 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) + 87 0x00D7 × (MULTIPLICATION SIGN) + 88 0x0172 Ų (LATIN CAPITAL LETTER U WITH OGONEK) + 89 0x0141 Ł (LATIN CAPITAL LETTER L WITH STROKE) + 90 0x015A Ś (LATIN CAPITAL LETTER S WITH ACUTE) + 91 0x016A Ū (LATIN CAPITAL LETTER U WITH MACRON) + 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) + 93 0x017B Ż (LATIN CAPITAL LETTER Z WITH DOT ABOVE) + 94 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON) + 95 0x00DF ß (LATIN SMALL LETTER SHARP S) + 96 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK) + 97 0x012F į (LATIN SMALL LETTER I WITH OGONEK) + 98 0x0101 ā (LATIN SMALL LETTER A WITH MACRON) + 99 0x0107 ć (LATIN SMALL LETTER C WITH ACUTE) +100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) +101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) +102 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK) +103 0x0113 ē (LATIN SMALL LETTER E WITH MACRON) +104 0x010D č (LATIN SMALL LETTER C WITH CARON) +105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) +106 0x017A ź (LATIN SMALL LETTER Z WITH ACUTE) +107 0x0117 ė (LATIN SMALL LETTER E WITH DOT ABOVE) +108 0x0123 ģ (LATIN SMALL LETTER G WITH CEDILLA) +109 0x0137 ķ (LATIN SMALL LETTER K WITH CEDILLA) +110 0x012B ī (LATIN SMALL LETTER I WITH MACRON) +111 0x013C ļ (LATIN SMALL LETTER L WITH CEDILLA) +112 0x0161 š (LATIN SMALL LETTER S WITH CARON) +113 0x0144 ń (LATIN SMALL LETTER N WITH ACUTE) +114 0x0146 ņ (LATIN SMALL LETTER N WITH CEDILLA) +115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) +116 0x014D ō (LATIN SMALL LETTER O WITH MACRON) +117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE) +118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) +119 0x00F7 ÷ (DIVISION SIGN) +120 0x0173 ų (LATIN SMALL LETTER U WITH OGONEK) +121 0x0142 ł (LATIN SMALL LETTER L WITH STROKE) +122 0x015B ś (LATIN SMALL LETTER S WITH ACUTE) +123 0x016B ū (LATIN SMALL LETTER U WITH MACRON) +124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) +125 0x017C ż (LATIN SMALL LETTER Z WITH DOT ABOVE) +126 0x017E ž (LATIN SMALL LETTER Z WITH CARON) +127 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) diff --git a/sub_crates/text_encoding/encoding_tables/index-iso-8859-14.txt b/sub_crates/text_encoding/encoding_tables/index-iso-8859-14.txt new file mode 100644 index 0000000..932fa55 --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-iso-8859-14.txt @@ -0,0 +1,134 @@ +# For details on index index-iso-8859-14.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: 2c8651cfc08b1f35b17919ee5379f2fa006af3ec809f11b3b7f470785580542b +# Date: 2018-01-06 + + 0 0x0080 € () + 1 0x0081  () + 2 0x0082 ‚ () + 3 0x0083 ƒ () + 4 0x0084 „ () + 5 0x0085 … () + 6 0x0086 † () + 7 0x0087 ‡ () + 8 0x0088 ˆ () + 9 0x0089 ‰ () + 10 0x008A Š () + 11 0x008B ‹ () + 12 0x008C Œ () + 13 0x008D  () + 14 0x008E Ž () + 15 0x008F  () + 16 0x0090  () + 17 0x0091 ‘ () + 18 0x0092 ’ () + 19 0x0093 “ () + 20 0x0094 ” () + 21 0x0095 • () + 22 0x0096 – () + 23 0x0097 — () + 24 0x0098 ˜ () + 25 0x0099 ™ () + 26 0x009A š () + 27 0x009B › () + 28 0x009C œ () + 29 0x009D  () + 30 0x009E ž () + 31 0x009F Ÿ () + 32 0x00A0   (NO-BREAK SPACE) + 33 0x1E02 Ḃ (LATIN CAPITAL LETTER B WITH DOT ABOVE) + 34 0x1E03 ḃ (LATIN SMALL LETTER B WITH DOT ABOVE) + 35 0x00A3 £ (POUND SIGN) + 36 0x010A Ċ (LATIN CAPITAL LETTER C WITH DOT ABOVE) + 37 0x010B ċ (LATIN SMALL LETTER C WITH DOT ABOVE) + 38 0x1E0A Ḋ (LATIN CAPITAL LETTER D WITH DOT ABOVE) + 39 0x00A7 § (SECTION SIGN) + 40 0x1E80 Ẁ (LATIN CAPITAL LETTER W WITH GRAVE) + 41 0x00A9 © (COPYRIGHT SIGN) + 42 0x1E82 Ẃ (LATIN CAPITAL LETTER W WITH ACUTE) + 43 0x1E0B ḋ (LATIN SMALL LETTER D WITH DOT ABOVE) + 44 0x1EF2 Ỳ (LATIN CAPITAL LETTER Y WITH GRAVE) + 45 0x00AD ­ (SOFT HYPHEN) + 46 0x00AE ® (REGISTERED SIGN) + 47 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS) + 48 0x1E1E Ḟ (LATIN CAPITAL LETTER F WITH DOT ABOVE) + 49 0x1E1F ḟ (LATIN SMALL LETTER F WITH DOT ABOVE) + 50 0x0120 Ġ (LATIN CAPITAL LETTER G WITH DOT ABOVE) + 51 0x0121 ġ (LATIN SMALL LETTER G WITH DOT ABOVE) + 52 0x1E40 Ṁ (LATIN CAPITAL LETTER M WITH DOT ABOVE) + 53 0x1E41 ṁ (LATIN SMALL LETTER M WITH DOT ABOVE) + 54 0x00B6 ¶ (PILCROW SIGN) + 55 0x1E56 Ṗ (LATIN CAPITAL LETTER P WITH DOT ABOVE) + 56 0x1E81 ẁ (LATIN SMALL LETTER W WITH GRAVE) + 57 0x1E57 ṗ (LATIN SMALL LETTER P WITH DOT ABOVE) + 58 0x1E83 ẃ (LATIN SMALL LETTER W WITH ACUTE) + 59 0x1E60 Ṡ (LATIN CAPITAL LETTER S WITH DOT ABOVE) + 60 0x1EF3 ỳ (LATIN SMALL LETTER Y WITH GRAVE) + 61 0x1E84 Ẅ (LATIN CAPITAL LETTER W WITH DIAERESIS) + 62 0x1E85 ẅ (LATIN SMALL LETTER W WITH DIAERESIS) + 63 0x1E61 ṡ (LATIN SMALL LETTER S WITH DOT ABOVE) + 64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE) + 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) + 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) + 67 0x00C3 à (LATIN CAPITAL LETTER A WITH TILDE) + 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) + 69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) + 70 0x00C6 Æ (LATIN CAPITAL LETTER AE) + 71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) + 72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE) + 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) + 74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX) + 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) + 76 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE) + 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) + 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) + 79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS) + 80 0x0174 Ŵ (LATIN CAPITAL LETTER W WITH CIRCUMFLEX) + 81 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE) + 82 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE) + 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) + 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) + 85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE) + 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) + 87 0x1E6A Ṫ (LATIN CAPITAL LETTER T WITH DOT ABOVE) + 88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) + 89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE) + 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) + 91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) + 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) + 93 0x00DD Ý (LATIN CAPITAL LETTER Y WITH ACUTE) + 94 0x0176 Ŷ (LATIN CAPITAL LETTER Y WITH CIRCUMFLEX) + 95 0x00DF ß (LATIN SMALL LETTER SHARP S) + 96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE) + 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) + 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) + 99 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE) +100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) +101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) +102 0x00E6 æ (LATIN SMALL LETTER AE) +103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) +104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE) +105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) +106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX) +107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) +108 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE) +109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) +110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) +111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) +112 0x0175 ŵ (LATIN SMALL LETTER W WITH CIRCUMFLEX) +113 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE) +114 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE) +115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) +116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) +117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE) +118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) +119 0x1E6B ṫ (LATIN SMALL LETTER T WITH DOT ABOVE) +120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) +121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE) +122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) +123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) +124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) +125 0x00FD ý (LATIN SMALL LETTER Y WITH ACUTE) +126 0x0177 ŷ (LATIN SMALL LETTER Y WITH CIRCUMFLEX) +127 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS) diff --git a/sub_crates/text_encoding/encoding_tables/index-iso-8859-15.txt b/sub_crates/text_encoding/encoding_tables/index-iso-8859-15.txt new file mode 100644 index 0000000..65961d9 --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-iso-8859-15.txt @@ -0,0 +1,134 @@ +# For details on index index-iso-8859-15.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: a560aba47bccd7510a6ac77f671fe75dca3800f05cf6d676910c311a8f8ff079 +# Date: 2018-01-06 + + 0 0x0080 € () + 1 0x0081  () + 2 0x0082 ‚ () + 3 0x0083 ƒ () + 4 0x0084 „ () + 5 0x0085 … () + 6 0x0086 † () + 7 0x0087 ‡ () + 8 0x0088 ˆ () + 9 0x0089 ‰ () + 10 0x008A Š () + 11 0x008B ‹ () + 12 0x008C Œ () + 13 0x008D  () + 14 0x008E Ž () + 15 0x008F  () + 16 0x0090  () + 17 0x0091 ‘ () + 18 0x0092 ’ () + 19 0x0093 “ () + 20 0x0094 ” () + 21 0x0095 • () + 22 0x0096 – () + 23 0x0097 — () + 24 0x0098 ˜ () + 25 0x0099 ™ () + 26 0x009A š () + 27 0x009B › () + 28 0x009C œ () + 29 0x009D  () + 30 0x009E ž () + 31 0x009F Ÿ () + 32 0x00A0   (NO-BREAK SPACE) + 33 0x00A1 ¡ (INVERTED EXCLAMATION MARK) + 34 0x00A2 ¢ (CENT SIGN) + 35 0x00A3 £ (POUND SIGN) + 36 0x20AC € (EURO SIGN) + 37 0x00A5 ¥ (YEN SIGN) + 38 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) + 39 0x00A7 § (SECTION SIGN) + 40 0x0161 š (LATIN SMALL LETTER S WITH CARON) + 41 0x00A9 © (COPYRIGHT SIGN) + 42 0x00AA ª (FEMININE ORDINAL INDICATOR) + 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) + 44 0x00AC ¬ (NOT SIGN) + 45 0x00AD ­ (SOFT HYPHEN) + 46 0x00AE ® (REGISTERED SIGN) + 47 0x00AF ¯ (MACRON) + 48 0x00B0 ° (DEGREE SIGN) + 49 0x00B1 ± (PLUS-MINUS SIGN) + 50 0x00B2 ² (SUPERSCRIPT TWO) + 51 0x00B3 ³ (SUPERSCRIPT THREE) + 52 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON) + 53 0x00B5 µ (MICRO SIGN) + 54 0x00B6 ¶ (PILCROW SIGN) + 55 0x00B7 · (MIDDLE DOT) + 56 0x017E ž (LATIN SMALL LETTER Z WITH CARON) + 57 0x00B9 ¹ (SUPERSCRIPT ONE) + 58 0x00BA º (MASCULINE ORDINAL INDICATOR) + 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) + 60 0x0152 Œ (LATIN CAPITAL LIGATURE OE) + 61 0x0153 œ (LATIN SMALL LIGATURE OE) + 62 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS) + 63 0x00BF ¿ (INVERTED QUESTION MARK) + 64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE) + 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) + 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) + 67 0x00C3 à (LATIN CAPITAL LETTER A WITH TILDE) + 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) + 69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) + 70 0x00C6 Æ (LATIN CAPITAL LETTER AE) + 71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) + 72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE) + 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) + 74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX) + 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) + 76 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE) + 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) + 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) + 79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS) + 80 0x00D0 Ð (LATIN CAPITAL LETTER ETH) + 81 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE) + 82 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE) + 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) + 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) + 85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE) + 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) + 87 0x00D7 × (MULTIPLICATION SIGN) + 88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) + 89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE) + 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) + 91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) + 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) + 93 0x00DD Ý (LATIN CAPITAL LETTER Y WITH ACUTE) + 94 0x00DE Þ (LATIN CAPITAL LETTER THORN) + 95 0x00DF ß (LATIN SMALL LETTER SHARP S) + 96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE) + 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) + 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) + 99 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE) +100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) +101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) +102 0x00E6 æ (LATIN SMALL LETTER AE) +103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) +104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE) +105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) +106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX) +107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) +108 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE) +109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) +110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) +111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) +112 0x00F0 ð (LATIN SMALL LETTER ETH) +113 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE) +114 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE) +115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) +116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) +117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE) +118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) +119 0x00F7 ÷ (DIVISION SIGN) +120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) +121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE) +122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) +123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) +124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) +125 0x00FD ý (LATIN SMALL LETTER Y WITH ACUTE) +126 0x00FE þ (LATIN SMALL LETTER THORN) +127 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS) diff --git a/sub_crates/text_encoding/encoding_tables/index-iso-8859-16.txt b/sub_crates/text_encoding/encoding_tables/index-iso-8859-16.txt new file mode 100644 index 0000000..16e416f --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-iso-8859-16.txt @@ -0,0 +1,134 @@ +# For details on index index-iso-8859-16.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: 55676320d2d1b6e6909f5b3d741a7cf0cefc84e920aa4474afc091459111c2e3 +# Date: 2018-01-06 + + 0 0x0080 € () + 1 0x0081  () + 2 0x0082 ‚ () + 3 0x0083 ƒ () + 4 0x0084 „ () + 5 0x0085 … () + 6 0x0086 † () + 7 0x0087 ‡ () + 8 0x0088 ˆ () + 9 0x0089 ‰ () + 10 0x008A Š () + 11 0x008B ‹ () + 12 0x008C Œ () + 13 0x008D  () + 14 0x008E Ž () + 15 0x008F  () + 16 0x0090  () + 17 0x0091 ‘ () + 18 0x0092 ’ () + 19 0x0093 “ () + 20 0x0094 ” () + 21 0x0095 • () + 22 0x0096 – () + 23 0x0097 — () + 24 0x0098 ˜ () + 25 0x0099 ™ () + 26 0x009A š () + 27 0x009B › () + 28 0x009C œ () + 29 0x009D  () + 30 0x009E ž () + 31 0x009F Ÿ () + 32 0x00A0   (NO-BREAK SPACE) + 33 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK) + 34 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK) + 35 0x0141 Ł (LATIN CAPITAL LETTER L WITH STROKE) + 36 0x20AC € (EURO SIGN) + 37 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) + 38 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) + 39 0x00A7 § (SECTION SIGN) + 40 0x0161 š (LATIN SMALL LETTER S WITH CARON) + 41 0x00A9 © (COPYRIGHT SIGN) + 42 0x0218 Ș (LATIN CAPITAL LETTER S WITH COMMA BELOW) + 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) + 44 0x0179 Ź (LATIN CAPITAL LETTER Z WITH ACUTE) + 45 0x00AD ­ (SOFT HYPHEN) + 46 0x017A ź (LATIN SMALL LETTER Z WITH ACUTE) + 47 0x017B Ż (LATIN CAPITAL LETTER Z WITH DOT ABOVE) + 48 0x00B0 ° (DEGREE SIGN) + 49 0x00B1 ± (PLUS-MINUS SIGN) + 50 0x010C Č (LATIN CAPITAL LETTER C WITH CARON) + 51 0x0142 ł (LATIN SMALL LETTER L WITH STROKE) + 52 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON) + 53 0x201D ” (RIGHT DOUBLE QUOTATION MARK) + 54 0x00B6 ¶ (PILCROW SIGN) + 55 0x00B7 · (MIDDLE DOT) + 56 0x017E ž (LATIN SMALL LETTER Z WITH CARON) + 57 0x010D č (LATIN SMALL LETTER C WITH CARON) + 58 0x0219 ș (LATIN SMALL LETTER S WITH COMMA BELOW) + 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) + 60 0x0152 Œ (LATIN CAPITAL LIGATURE OE) + 61 0x0153 œ (LATIN SMALL LIGATURE OE) + 62 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS) + 63 0x017C ż (LATIN SMALL LETTER Z WITH DOT ABOVE) + 64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE) + 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) + 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) + 67 0x0102 Ă (LATIN CAPITAL LETTER A WITH BREVE) + 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) + 69 0x0106 Ć (LATIN CAPITAL LETTER C WITH ACUTE) + 70 0x00C6 Æ (LATIN CAPITAL LETTER AE) + 71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) + 72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE) + 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) + 74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX) + 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) + 76 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE) + 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) + 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) + 79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS) + 80 0x0110 Đ (LATIN CAPITAL LETTER D WITH STROKE) + 81 0x0143 Ń (LATIN CAPITAL LETTER N WITH ACUTE) + 82 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE) + 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) + 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) + 85 0x0150 Ő (LATIN CAPITAL LETTER O WITH DOUBLE ACUTE) + 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) + 87 0x015A Ś (LATIN CAPITAL LETTER S WITH ACUTE) + 88 0x0170 Ű (LATIN CAPITAL LETTER U WITH DOUBLE ACUTE) + 89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE) + 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) + 91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) + 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) + 93 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK) + 94 0x021A Ț (LATIN CAPITAL LETTER T WITH COMMA BELOW) + 95 0x00DF ß (LATIN SMALL LETTER SHARP S) + 96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE) + 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) + 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) + 99 0x0103 ă (LATIN SMALL LETTER A WITH BREVE) +100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) +101 0x0107 ć (LATIN SMALL LETTER C WITH ACUTE) +102 0x00E6 æ (LATIN SMALL LETTER AE) +103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) +104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE) +105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) +106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX) +107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) +108 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE) +109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) +110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) +111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) +112 0x0111 đ (LATIN SMALL LETTER D WITH STROKE) +113 0x0144 ń (LATIN SMALL LETTER N WITH ACUTE) +114 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE) +115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) +116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) +117 0x0151 ő (LATIN SMALL LETTER O WITH DOUBLE ACUTE) +118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) +119 0x015B ś (LATIN SMALL LETTER S WITH ACUTE) +120 0x0171 ű (LATIN SMALL LETTER U WITH DOUBLE ACUTE) +121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE) +122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) +123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) +124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) +125 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK) +126 0x021B ț (LATIN SMALL LETTER T WITH COMMA BELOW) +127 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS) diff --git a/sub_crates/text_encoding/encoding_tables/index-iso-8859-2.txt b/sub_crates/text_encoding/encoding_tables/index-iso-8859-2.txt new file mode 100644 index 0000000..b0e14de --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-iso-8859-2.txt @@ -0,0 +1,134 @@ +# For details on index index-iso-8859-2.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: 9569c67f22d0b57790e1c407c6eecf227e4562322dc296de43cdab7a0152ec73 +# Date: 2018-01-06 + + 0 0x0080 € () + 1 0x0081  () + 2 0x0082 ‚ () + 3 0x0083 ƒ () + 4 0x0084 „ () + 5 0x0085 … () + 6 0x0086 † () + 7 0x0087 ‡ () + 8 0x0088 ˆ () + 9 0x0089 ‰ () + 10 0x008A Š () + 11 0x008B ‹ () + 12 0x008C Œ () + 13 0x008D  () + 14 0x008E Ž () + 15 0x008F  () + 16 0x0090  () + 17 0x0091 ‘ () + 18 0x0092 ’ () + 19 0x0093 “ () + 20 0x0094 ” () + 21 0x0095 • () + 22 0x0096 – () + 23 0x0097 — () + 24 0x0098 ˜ () + 25 0x0099 ™ () + 26 0x009A š () + 27 0x009B › () + 28 0x009C œ () + 29 0x009D  () + 30 0x009E ž () + 31 0x009F Ÿ () + 32 0x00A0   (NO-BREAK SPACE) + 33 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK) + 34 0x02D8 ˘ (BREVE) + 35 0x0141 Ł (LATIN CAPITAL LETTER L WITH STROKE) + 36 0x00A4 ¤ (CURRENCY SIGN) + 37 0x013D Ľ (LATIN CAPITAL LETTER L WITH CARON) + 38 0x015A Ś (LATIN CAPITAL LETTER S WITH ACUTE) + 39 0x00A7 § (SECTION SIGN) + 40 0x00A8 ¨ (DIAERESIS) + 41 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) + 42 0x015E Ş (LATIN CAPITAL LETTER S WITH CEDILLA) + 43 0x0164 Ť (LATIN CAPITAL LETTER T WITH CARON) + 44 0x0179 Ź (LATIN CAPITAL LETTER Z WITH ACUTE) + 45 0x00AD ­ (SOFT HYPHEN) + 46 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON) + 47 0x017B Ż (LATIN CAPITAL LETTER Z WITH DOT ABOVE) + 48 0x00B0 ° (DEGREE SIGN) + 49 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK) + 50 0x02DB ˛ (OGONEK) + 51 0x0142 ł (LATIN SMALL LETTER L WITH STROKE) + 52 0x00B4 ´ (ACUTE ACCENT) + 53 0x013E ľ (LATIN SMALL LETTER L WITH CARON) + 54 0x015B ś (LATIN SMALL LETTER S WITH ACUTE) + 55 0x02C7 ˇ (CARON) + 56 0x00B8 ¸ (CEDILLA) + 57 0x0161 š (LATIN SMALL LETTER S WITH CARON) + 58 0x015F ş (LATIN SMALL LETTER S WITH CEDILLA) + 59 0x0165 ť (LATIN SMALL LETTER T WITH CARON) + 60 0x017A ź (LATIN SMALL LETTER Z WITH ACUTE) + 61 0x02DD ˝ (DOUBLE ACUTE ACCENT) + 62 0x017E ž (LATIN SMALL LETTER Z WITH CARON) + 63 0x017C ż (LATIN SMALL LETTER Z WITH DOT ABOVE) + 64 0x0154 Ŕ (LATIN CAPITAL LETTER R WITH ACUTE) + 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) + 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) + 67 0x0102 Ă (LATIN CAPITAL LETTER A WITH BREVE) + 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) + 69 0x0139 Ĺ (LATIN CAPITAL LETTER L WITH ACUTE) + 70 0x0106 Ć (LATIN CAPITAL LETTER C WITH ACUTE) + 71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) + 72 0x010C Č (LATIN CAPITAL LETTER C WITH CARON) + 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) + 74 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK) + 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) + 76 0x011A Ě (LATIN CAPITAL LETTER E WITH CARON) + 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) + 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) + 79 0x010E Ď (LATIN CAPITAL LETTER D WITH CARON) + 80 0x0110 Đ (LATIN CAPITAL LETTER D WITH STROKE) + 81 0x0143 Ń (LATIN CAPITAL LETTER N WITH ACUTE) + 82 0x0147 Ň (LATIN CAPITAL LETTER N WITH CARON) + 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) + 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) + 85 0x0150 Ő (LATIN CAPITAL LETTER O WITH DOUBLE ACUTE) + 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) + 87 0x00D7 × (MULTIPLICATION SIGN) + 88 0x0158 Ř (LATIN CAPITAL LETTER R WITH CARON) + 89 0x016E Ů (LATIN CAPITAL LETTER U WITH RING ABOVE) + 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) + 91 0x0170 Ű (LATIN CAPITAL LETTER U WITH DOUBLE ACUTE) + 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) + 93 0x00DD Ý (LATIN CAPITAL LETTER Y WITH ACUTE) + 94 0x0162 Ţ (LATIN CAPITAL LETTER T WITH CEDILLA) + 95 0x00DF ß (LATIN SMALL LETTER SHARP S) + 96 0x0155 ŕ (LATIN SMALL LETTER R WITH ACUTE) + 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) + 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) + 99 0x0103 ă (LATIN SMALL LETTER A WITH BREVE) +100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) +101 0x013A ĺ (LATIN SMALL LETTER L WITH ACUTE) +102 0x0107 ć (LATIN SMALL LETTER C WITH ACUTE) +103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) +104 0x010D č (LATIN SMALL LETTER C WITH CARON) +105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) +106 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK) +107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) +108 0x011B ě (LATIN SMALL LETTER E WITH CARON) +109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) +110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) +111 0x010F ď (LATIN SMALL LETTER D WITH CARON) +112 0x0111 đ (LATIN SMALL LETTER D WITH STROKE) +113 0x0144 ń (LATIN SMALL LETTER N WITH ACUTE) +114 0x0148 ň (LATIN SMALL LETTER N WITH CARON) +115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) +116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) +117 0x0151 ő (LATIN SMALL LETTER O WITH DOUBLE ACUTE) +118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) +119 0x00F7 ÷ (DIVISION SIGN) +120 0x0159 ř (LATIN SMALL LETTER R WITH CARON) +121 0x016F ů (LATIN SMALL LETTER U WITH RING ABOVE) +122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) +123 0x0171 ű (LATIN SMALL LETTER U WITH DOUBLE ACUTE) +124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) +125 0x00FD ý (LATIN SMALL LETTER Y WITH ACUTE) +126 0x0163 ţ (LATIN SMALL LETTER T WITH CEDILLA) +127 0x02D9 ˙ (DOT ABOVE) diff --git a/sub_crates/text_encoding/encoding_tables/index-iso-8859-3.txt b/sub_crates/text_encoding/encoding_tables/index-iso-8859-3.txt new file mode 100644 index 0000000..018861a --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-iso-8859-3.txt @@ -0,0 +1,127 @@ +# For details on index index-iso-8859-3.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: af8f1e12df79b768322b5e83613698cdc619438270a2fc359554331c805054a3 +# Date: 2018-01-06 + + 0 0x0080 € () + 1 0x0081  () + 2 0x0082 ‚ () + 3 0x0083 ƒ () + 4 0x0084 „ () + 5 0x0085 … () + 6 0x0086 † () + 7 0x0087 ‡ () + 8 0x0088 ˆ () + 9 0x0089 ‰ () + 10 0x008A Š () + 11 0x008B ‹ () + 12 0x008C Œ () + 13 0x008D  () + 14 0x008E Ž () + 15 0x008F  () + 16 0x0090  () + 17 0x0091 ‘ () + 18 0x0092 ’ () + 19 0x0093 “ () + 20 0x0094 ” () + 21 0x0095 • () + 22 0x0096 – () + 23 0x0097 — () + 24 0x0098 ˜ () + 25 0x0099 ™ () + 26 0x009A š () + 27 0x009B › () + 28 0x009C œ () + 29 0x009D  () + 30 0x009E ž () + 31 0x009F Ÿ () + 32 0x00A0   (NO-BREAK SPACE) + 33 0x0126 Ħ (LATIN CAPITAL LETTER H WITH STROKE) + 34 0x02D8 ˘ (BREVE) + 35 0x00A3 £ (POUND SIGN) + 36 0x00A4 ¤ (CURRENCY SIGN) + 38 0x0124 Ĥ (LATIN CAPITAL LETTER H WITH CIRCUMFLEX) + 39 0x00A7 § (SECTION SIGN) + 40 0x00A8 ¨ (DIAERESIS) + 41 0x0130 İ (LATIN CAPITAL LETTER I WITH DOT ABOVE) + 42 0x015E Ş (LATIN CAPITAL LETTER S WITH CEDILLA) + 43 0x011E Ğ (LATIN CAPITAL LETTER G WITH BREVE) + 44 0x0134 Ĵ (LATIN CAPITAL LETTER J WITH CIRCUMFLEX) + 45 0x00AD ­ (SOFT HYPHEN) + 47 0x017B Ż (LATIN CAPITAL LETTER Z WITH DOT ABOVE) + 48 0x00B0 ° (DEGREE SIGN) + 49 0x0127 ħ (LATIN SMALL LETTER H WITH STROKE) + 50 0x00B2 ² (SUPERSCRIPT TWO) + 51 0x00B3 ³ (SUPERSCRIPT THREE) + 52 0x00B4 ´ (ACUTE ACCENT) + 53 0x00B5 µ (MICRO SIGN) + 54 0x0125 ĥ (LATIN SMALL LETTER H WITH CIRCUMFLEX) + 55 0x00B7 · (MIDDLE DOT) + 56 0x00B8 ¸ (CEDILLA) + 57 0x0131 ı (LATIN SMALL LETTER DOTLESS I) + 58 0x015F ş (LATIN SMALL LETTER S WITH CEDILLA) + 59 0x011F ğ (LATIN SMALL LETTER G WITH BREVE) + 60 0x0135 ĵ (LATIN SMALL LETTER J WITH CIRCUMFLEX) + 61 0x00BD ½ (VULGAR FRACTION ONE HALF) + 63 0x017C ż (LATIN SMALL LETTER Z WITH DOT ABOVE) + 64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE) + 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) + 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) + 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) + 69 0x010A Ċ (LATIN CAPITAL LETTER C WITH DOT ABOVE) + 70 0x0108 Ĉ (LATIN CAPITAL LETTER C WITH CIRCUMFLEX) + 71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) + 72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE) + 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) + 74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX) + 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) + 76 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE) + 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) + 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) + 79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS) + 81 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE) + 82 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE) + 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) + 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) + 85 0x0120 Ġ (LATIN CAPITAL LETTER G WITH DOT ABOVE) + 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) + 87 0x00D7 × (MULTIPLICATION SIGN) + 88 0x011C Ĝ (LATIN CAPITAL LETTER G WITH CIRCUMFLEX) + 89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE) + 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) + 91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) + 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) + 93 0x016C Ŭ (LATIN CAPITAL LETTER U WITH BREVE) + 94 0x015C Ŝ (LATIN CAPITAL LETTER S WITH CIRCUMFLEX) + 95 0x00DF ß (LATIN SMALL LETTER SHARP S) + 96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE) + 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) + 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) +100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) +101 0x010B ċ (LATIN SMALL LETTER C WITH DOT ABOVE) +102 0x0109 ĉ (LATIN SMALL LETTER C WITH CIRCUMFLEX) +103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) +104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE) +105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) +106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX) +107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) +108 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE) +109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) +110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) +111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) +113 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE) +114 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE) +115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) +116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) +117 0x0121 ġ (LATIN SMALL LETTER G WITH DOT ABOVE) +118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) +119 0x00F7 ÷ (DIVISION SIGN) +120 0x011D ĝ (LATIN SMALL LETTER G WITH CIRCUMFLEX) +121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE) +122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) +123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) +124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) +125 0x016D ŭ (LATIN SMALL LETTER U WITH BREVE) +126 0x015D ŝ (LATIN SMALL LETTER S WITH CIRCUMFLEX) +127 0x02D9 ˙ (DOT ABOVE) diff --git a/sub_crates/text_encoding/encoding_tables/index-iso-8859-4.txt b/sub_crates/text_encoding/encoding_tables/index-iso-8859-4.txt new file mode 100644 index 0000000..a268878 --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-iso-8859-4.txt @@ -0,0 +1,134 @@ +# For details on index index-iso-8859-4.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: 72f29c92344d351fe9e74a946e7e0468d76d542c6894ff82982cb652ebe0feb7 +# Date: 2018-01-06 + + 0 0x0080 € () + 1 0x0081  () + 2 0x0082 ‚ () + 3 0x0083 ƒ () + 4 0x0084 „ () + 5 0x0085 … () + 6 0x0086 † () + 7 0x0087 ‡ () + 8 0x0088 ˆ () + 9 0x0089 ‰ () + 10 0x008A Š () + 11 0x008B ‹ () + 12 0x008C Œ () + 13 0x008D  () + 14 0x008E Ž () + 15 0x008F  () + 16 0x0090  () + 17 0x0091 ‘ () + 18 0x0092 ’ () + 19 0x0093 “ () + 20 0x0094 ” () + 21 0x0095 • () + 22 0x0096 – () + 23 0x0097 — () + 24 0x0098 ˜ () + 25 0x0099 ™ () + 26 0x009A š () + 27 0x009B › () + 28 0x009C œ () + 29 0x009D  () + 30 0x009E ž () + 31 0x009F Ÿ () + 32 0x00A0   (NO-BREAK SPACE) + 33 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK) + 34 0x0138 ĸ (LATIN SMALL LETTER KRA) + 35 0x0156 Ŗ (LATIN CAPITAL LETTER R WITH CEDILLA) + 36 0x00A4 ¤ (CURRENCY SIGN) + 37 0x0128 Ĩ (LATIN CAPITAL LETTER I WITH TILDE) + 38 0x013B Ļ (LATIN CAPITAL LETTER L WITH CEDILLA) + 39 0x00A7 § (SECTION SIGN) + 40 0x00A8 ¨ (DIAERESIS) + 41 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) + 42 0x0112 Ē (LATIN CAPITAL LETTER E WITH MACRON) + 43 0x0122 Ģ (LATIN CAPITAL LETTER G WITH CEDILLA) + 44 0x0166 Ŧ (LATIN CAPITAL LETTER T WITH STROKE) + 45 0x00AD ­ (SOFT HYPHEN) + 46 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON) + 47 0x00AF ¯ (MACRON) + 48 0x00B0 ° (DEGREE SIGN) + 49 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK) + 50 0x02DB ˛ (OGONEK) + 51 0x0157 ŗ (LATIN SMALL LETTER R WITH CEDILLA) + 52 0x00B4 ´ (ACUTE ACCENT) + 53 0x0129 ĩ (LATIN SMALL LETTER I WITH TILDE) + 54 0x013C ļ (LATIN SMALL LETTER L WITH CEDILLA) + 55 0x02C7 ˇ (CARON) + 56 0x00B8 ¸ (CEDILLA) + 57 0x0161 š (LATIN SMALL LETTER S WITH CARON) + 58 0x0113 ē (LATIN SMALL LETTER E WITH MACRON) + 59 0x0123 ģ (LATIN SMALL LETTER G WITH CEDILLA) + 60 0x0167 ŧ (LATIN SMALL LETTER T WITH STROKE) + 61 0x014A Ŋ (LATIN CAPITAL LETTER ENG) + 62 0x017E ž (LATIN SMALL LETTER Z WITH CARON) + 63 0x014B ŋ (LATIN SMALL LETTER ENG) + 64 0x0100 Ā (LATIN CAPITAL LETTER A WITH MACRON) + 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) + 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) + 67 0x00C3 à (LATIN CAPITAL LETTER A WITH TILDE) + 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) + 69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) + 70 0x00C6 Æ (LATIN CAPITAL LETTER AE) + 71 0x012E Į (LATIN CAPITAL LETTER I WITH OGONEK) + 72 0x010C Č (LATIN CAPITAL LETTER C WITH CARON) + 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) + 74 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK) + 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) + 76 0x0116 Ė (LATIN CAPITAL LETTER E WITH DOT ABOVE) + 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) + 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) + 79 0x012A Ī (LATIN CAPITAL LETTER I WITH MACRON) + 80 0x0110 Đ (LATIN CAPITAL LETTER D WITH STROKE) + 81 0x0145 Ņ (LATIN CAPITAL LETTER N WITH CEDILLA) + 82 0x014C Ō (LATIN CAPITAL LETTER O WITH MACRON) + 83 0x0136 Ķ (LATIN CAPITAL LETTER K WITH CEDILLA) + 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) + 85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE) + 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) + 87 0x00D7 × (MULTIPLICATION SIGN) + 88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) + 89 0x0172 Ų (LATIN CAPITAL LETTER U WITH OGONEK) + 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) + 91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) + 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) + 93 0x0168 Ũ (LATIN CAPITAL LETTER U WITH TILDE) + 94 0x016A Ū (LATIN CAPITAL LETTER U WITH MACRON) + 95 0x00DF ß (LATIN SMALL LETTER SHARP S) + 96 0x0101 ā (LATIN SMALL LETTER A WITH MACRON) + 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) + 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) + 99 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE) +100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) +101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) +102 0x00E6 æ (LATIN SMALL LETTER AE) +103 0x012F į (LATIN SMALL LETTER I WITH OGONEK) +104 0x010D č (LATIN SMALL LETTER C WITH CARON) +105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) +106 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK) +107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) +108 0x0117 ė (LATIN SMALL LETTER E WITH DOT ABOVE) +109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) +110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) +111 0x012B ī (LATIN SMALL LETTER I WITH MACRON) +112 0x0111 đ (LATIN SMALL LETTER D WITH STROKE) +113 0x0146 ņ (LATIN SMALL LETTER N WITH CEDILLA) +114 0x014D ō (LATIN SMALL LETTER O WITH MACRON) +115 0x0137 ķ (LATIN SMALL LETTER K WITH CEDILLA) +116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) +117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE) +118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) +119 0x00F7 ÷ (DIVISION SIGN) +120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) +121 0x0173 ų (LATIN SMALL LETTER U WITH OGONEK) +122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) +123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) +124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) +125 0x0169 ũ (LATIN SMALL LETTER U WITH TILDE) +126 0x016B ū (LATIN SMALL LETTER U WITH MACRON) +127 0x02D9 ˙ (DOT ABOVE) diff --git a/sub_crates/text_encoding/encoding_tables/index-iso-8859-5.txt b/sub_crates/text_encoding/encoding_tables/index-iso-8859-5.txt new file mode 100644 index 0000000..f5e2962 --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-iso-8859-5.txt @@ -0,0 +1,134 @@ +# For details on index index-iso-8859-5.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: fa9b1f3f5242df43e2e7bca80e9b6997c67944f20a4af91ee06bacc4e132d9c9 +# Date: 2018-01-06 + + 0 0x0080 € () + 1 0x0081  () + 2 0x0082 ‚ () + 3 0x0083 ƒ () + 4 0x0084 „ () + 5 0x0085 … () + 6 0x0086 † () + 7 0x0087 ‡ () + 8 0x0088 ˆ () + 9 0x0089 ‰ () + 10 0x008A Š () + 11 0x008B ‹ () + 12 0x008C Œ () + 13 0x008D  () + 14 0x008E Ž () + 15 0x008F  () + 16 0x0090  () + 17 0x0091 ‘ () + 18 0x0092 ’ () + 19 0x0093 “ () + 20 0x0094 ” () + 21 0x0095 • () + 22 0x0096 – () + 23 0x0097 — () + 24 0x0098 ˜ () + 25 0x0099 ™ () + 26 0x009A š () + 27 0x009B › () + 28 0x009C œ () + 29 0x009D  () + 30 0x009E ž () + 31 0x009F Ÿ () + 32 0x00A0   (NO-BREAK SPACE) + 33 0x0401 Ё (CYRILLIC CAPITAL LETTER IO) + 34 0x0402 Ђ (CYRILLIC CAPITAL LETTER DJE) + 35 0x0403 Ѓ (CYRILLIC CAPITAL LETTER GJE) + 36 0x0404 Є (CYRILLIC CAPITAL LETTER UKRAINIAN IE) + 37 0x0405 Ѕ (CYRILLIC CAPITAL LETTER DZE) + 38 0x0406 І (CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I) + 39 0x0407 Ї (CYRILLIC CAPITAL LETTER YI) + 40 0x0408 Ј (CYRILLIC CAPITAL LETTER JE) + 41 0x0409 Љ (CYRILLIC CAPITAL LETTER LJE) + 42 0x040A Њ (CYRILLIC CAPITAL LETTER NJE) + 43 0x040B Ћ (CYRILLIC CAPITAL LETTER TSHE) + 44 0x040C Ќ (CYRILLIC CAPITAL LETTER KJE) + 45 0x00AD ­ (SOFT HYPHEN) + 46 0x040E Ў (CYRILLIC CAPITAL LETTER SHORT U) + 47 0x040F Џ (CYRILLIC CAPITAL LETTER DZHE) + 48 0x0410 А (CYRILLIC CAPITAL LETTER A) + 49 0x0411 Б (CYRILLIC CAPITAL LETTER BE) + 50 0x0412 В (CYRILLIC CAPITAL LETTER VE) + 51 0x0413 Г (CYRILLIC CAPITAL LETTER GHE) + 52 0x0414 Д (CYRILLIC CAPITAL LETTER DE) + 53 0x0415 Е (CYRILLIC CAPITAL LETTER IE) + 54 0x0416 Ж (CYRILLIC CAPITAL LETTER ZHE) + 55 0x0417 З (CYRILLIC CAPITAL LETTER ZE) + 56 0x0418 И (CYRILLIC CAPITAL LETTER I) + 57 0x0419 Й (CYRILLIC CAPITAL LETTER SHORT I) + 58 0x041A К (CYRILLIC CAPITAL LETTER KA) + 59 0x041B Л (CYRILLIC CAPITAL LETTER EL) + 60 0x041C М (CYRILLIC CAPITAL LETTER EM) + 61 0x041D Н (CYRILLIC CAPITAL LETTER EN) + 62 0x041E О (CYRILLIC CAPITAL LETTER O) + 63 0x041F П (CYRILLIC CAPITAL LETTER PE) + 64 0x0420 Р (CYRILLIC CAPITAL LETTER ER) + 65 0x0421 С (CYRILLIC CAPITAL LETTER ES) + 66 0x0422 Т (CYRILLIC CAPITAL LETTER TE) + 67 0x0423 У (CYRILLIC CAPITAL LETTER U) + 68 0x0424 Ф (CYRILLIC CAPITAL LETTER EF) + 69 0x0425 Х (CYRILLIC CAPITAL LETTER HA) + 70 0x0426 Ц (CYRILLIC CAPITAL LETTER TSE) + 71 0x0427 Ч (CYRILLIC CAPITAL LETTER CHE) + 72 0x0428 Ш (CYRILLIC CAPITAL LETTER SHA) + 73 0x0429 Щ (CYRILLIC CAPITAL LETTER SHCHA) + 74 0x042A Ъ (CYRILLIC CAPITAL LETTER HARD SIGN) + 75 0x042B Ы (CYRILLIC CAPITAL LETTER YERU) + 76 0x042C Ь (CYRILLIC CAPITAL LETTER SOFT SIGN) + 77 0x042D Э (CYRILLIC CAPITAL LETTER E) + 78 0x042E Ю (CYRILLIC CAPITAL LETTER YU) + 79 0x042F Я (CYRILLIC CAPITAL LETTER YA) + 80 0x0430 а (CYRILLIC SMALL LETTER A) + 81 0x0431 б (CYRILLIC SMALL LETTER BE) + 82 0x0432 в (CYRILLIC SMALL LETTER VE) + 83 0x0433 г (CYRILLIC SMALL LETTER GHE) + 84 0x0434 д (CYRILLIC SMALL LETTER DE) + 85 0x0435 е (CYRILLIC SMALL LETTER IE) + 86 0x0436 ж (CYRILLIC SMALL LETTER ZHE) + 87 0x0437 з (CYRILLIC SMALL LETTER ZE) + 88 0x0438 и (CYRILLIC SMALL LETTER I) + 89 0x0439 й (CYRILLIC SMALL LETTER SHORT I) + 90 0x043A к (CYRILLIC SMALL LETTER KA) + 91 0x043B л (CYRILLIC SMALL LETTER EL) + 92 0x043C м (CYRILLIC SMALL LETTER EM) + 93 0x043D н (CYRILLIC SMALL LETTER EN) + 94 0x043E о (CYRILLIC SMALL LETTER O) + 95 0x043F п (CYRILLIC SMALL LETTER PE) + 96 0x0440 р (CYRILLIC SMALL LETTER ER) + 97 0x0441 с (CYRILLIC SMALL LETTER ES) + 98 0x0442 т (CYRILLIC SMALL LETTER TE) + 99 0x0443 у (CYRILLIC SMALL LETTER U) +100 0x0444 ф (CYRILLIC SMALL LETTER EF) +101 0x0445 х (CYRILLIC SMALL LETTER HA) +102 0x0446 ц (CYRILLIC SMALL LETTER TSE) +103 0x0447 ч (CYRILLIC SMALL LETTER CHE) +104 0x0448 ш (CYRILLIC SMALL LETTER SHA) +105 0x0449 щ (CYRILLIC SMALL LETTER SHCHA) +106 0x044A ъ (CYRILLIC SMALL LETTER HARD SIGN) +107 0x044B ы (CYRILLIC SMALL LETTER YERU) +108 0x044C ь (CYRILLIC SMALL LETTER SOFT SIGN) +109 0x044D э (CYRILLIC SMALL LETTER E) +110 0x044E ю (CYRILLIC SMALL LETTER YU) +111 0x044F я (CYRILLIC SMALL LETTER YA) +112 0x2116 № (NUMERO SIGN) +113 0x0451 ё (CYRILLIC SMALL LETTER IO) +114 0x0452 ђ (CYRILLIC SMALL LETTER DJE) +115 0x0453 ѓ (CYRILLIC SMALL LETTER GJE) +116 0x0454 є (CYRILLIC SMALL LETTER UKRAINIAN IE) +117 0x0455 ѕ (CYRILLIC SMALL LETTER DZE) +118 0x0456 і (CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I) +119 0x0457 ї (CYRILLIC SMALL LETTER YI) +120 0x0458 ј (CYRILLIC SMALL LETTER JE) +121 0x0459 љ (CYRILLIC SMALL LETTER LJE) +122 0x045A њ (CYRILLIC SMALL LETTER NJE) +123 0x045B ћ (CYRILLIC SMALL LETTER TSHE) +124 0x045C ќ (CYRILLIC SMALL LETTER KJE) +125 0x00A7 § (SECTION SIGN) +126 0x045E ў (CYRILLIC SMALL LETTER SHORT U) +127 0x045F џ (CYRILLIC SMALL LETTER DZHE) diff --git a/sub_crates/text_encoding/encoding_tables/index-iso-8859-6.txt b/sub_crates/text_encoding/encoding_tables/index-iso-8859-6.txt new file mode 100644 index 0000000..a0691ff --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-iso-8859-6.txt @@ -0,0 +1,89 @@ +# For details on index index-iso-8859-6.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: 85bb7b5c2dc75975afebe5743935ba4ed5a09c1e9e34e9bfb2ff80293f5d8bbc +# Date: 2018-01-06 + + 0 0x0080 € () + 1 0x0081  () + 2 0x0082 ‚ () + 3 0x0083 ƒ () + 4 0x0084 „ () + 5 0x0085 … () + 6 0x0086 † () + 7 0x0087 ‡ () + 8 0x0088 ˆ () + 9 0x0089 ‰ () + 10 0x008A Š () + 11 0x008B ‹ () + 12 0x008C Œ () + 13 0x008D  () + 14 0x008E Ž () + 15 0x008F  () + 16 0x0090  () + 17 0x0091 ‘ () + 18 0x0092 ’ () + 19 0x0093 “ () + 20 0x0094 ” () + 21 0x0095 • () + 22 0x0096 – () + 23 0x0097 — () + 24 0x0098 ˜ () + 25 0x0099 ™ () + 26 0x009A š () + 27 0x009B › () + 28 0x009C œ () + 29 0x009D  () + 30 0x009E ž () + 31 0x009F Ÿ () + 32 0x00A0   (NO-BREAK SPACE) + 36 0x00A4 ¤ (CURRENCY SIGN) + 44 0x060C ، (ARABIC COMMA) + 45 0x00AD ­ (SOFT HYPHEN) + 59 0x061B ؛ (ARABIC SEMICOLON) + 63 0x061F ؟ (ARABIC QUESTION MARK) + 65 0x0621 ء (ARABIC LETTER HAMZA) + 66 0x0622 آ (ARABIC LETTER ALEF WITH MADDA ABOVE) + 67 0x0623 أ (ARABIC LETTER ALEF WITH HAMZA ABOVE) + 68 0x0624 ؤ (ARABIC LETTER WAW WITH HAMZA ABOVE) + 69 0x0625 إ (ARABIC LETTER ALEF WITH HAMZA BELOW) + 70 0x0626 ئ (ARABIC LETTER YEH WITH HAMZA ABOVE) + 71 0x0627 ا (ARABIC LETTER ALEF) + 72 0x0628 ب (ARABIC LETTER BEH) + 73 0x0629 ة (ARABIC LETTER TEH MARBUTA) + 74 0x062A ت (ARABIC LETTER TEH) + 75 0x062B ث (ARABIC LETTER THEH) + 76 0x062C ج (ARABIC LETTER JEEM) + 77 0x062D ح (ARABIC LETTER HAH) + 78 0x062E خ (ARABIC LETTER KHAH) + 79 0x062F د (ARABIC LETTER DAL) + 80 0x0630 ذ (ARABIC LETTER THAL) + 81 0x0631 ر (ARABIC LETTER REH) + 82 0x0632 ز (ARABIC LETTER ZAIN) + 83 0x0633 س (ARABIC LETTER SEEN) + 84 0x0634 ش (ARABIC LETTER SHEEN) + 85 0x0635 ص (ARABIC LETTER SAD) + 86 0x0636 ض (ARABIC LETTER DAD) + 87 0x0637 ط (ARABIC LETTER TAH) + 88 0x0638 ظ (ARABIC LETTER ZAH) + 89 0x0639 ع (ARABIC LETTER AIN) + 90 0x063A غ (ARABIC LETTER GHAIN) + 96 0x0640 ـ (ARABIC TATWEEL) + 97 0x0641 ف (ARABIC LETTER FEH) + 98 0x0642 ق (ARABIC LETTER QAF) + 99 0x0643 ك (ARABIC LETTER KAF) +100 0x0644 ل (ARABIC LETTER LAM) +101 0x0645 م (ARABIC LETTER MEEM) +102 0x0646 ن (ARABIC LETTER NOON) +103 0x0647 ه (ARABIC LETTER HEH) +104 0x0648 و (ARABIC LETTER WAW) +105 0x0649 ى (ARABIC LETTER ALEF MAKSURA) +106 0x064A ي (ARABIC LETTER YEH) +107 0x064B ً (ARABIC FATHATAN) +108 0x064C ٌ (ARABIC DAMMATAN) +109 0x064D ٍ (ARABIC KASRATAN) +110 0x064E َ (ARABIC FATHA) +111 0x064F ُ (ARABIC DAMMA) +112 0x0650 ِ (ARABIC KASRA) +113 0x0651 ّ (ARABIC SHADDA) +114 0x0652 ْ (ARABIC SUKUN) diff --git a/sub_crates/text_encoding/encoding_tables/index-iso-8859-7.txt b/sub_crates/text_encoding/encoding_tables/index-iso-8859-7.txt new file mode 100644 index 0000000..5fd6737 --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-iso-8859-7.txt @@ -0,0 +1,131 @@ +# For details on index index-iso-8859-7.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: f53d8aeba36314ef950eef02ffcf11dff540638ce27dfe7a86b6ccc6875afb24 +# Date: 2018-01-06 + + 0 0x0080 € () + 1 0x0081  () + 2 0x0082 ‚ () + 3 0x0083 ƒ () + 4 0x0084 „ () + 5 0x0085 … () + 6 0x0086 † () + 7 0x0087 ‡ () + 8 0x0088 ˆ () + 9 0x0089 ‰ () + 10 0x008A Š () + 11 0x008B ‹ () + 12 0x008C Œ () + 13 0x008D  () + 14 0x008E Ž () + 15 0x008F  () + 16 0x0090  () + 17 0x0091 ‘ () + 18 0x0092 ’ () + 19 0x0093 “ () + 20 0x0094 ” () + 21 0x0095 • () + 22 0x0096 – () + 23 0x0097 — () + 24 0x0098 ˜ () + 25 0x0099 ™ () + 26 0x009A š () + 27 0x009B › () + 28 0x009C œ () + 29 0x009D  () + 30 0x009E ž () + 31 0x009F Ÿ () + 32 0x00A0   (NO-BREAK SPACE) + 33 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) + 34 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) + 35 0x00A3 £ (POUND SIGN) + 36 0x20AC € (EURO SIGN) + 37 0x20AF ₯ (DRACHMA SIGN) + 38 0x00A6 ¦ (BROKEN BAR) + 39 0x00A7 § (SECTION SIGN) + 40 0x00A8 ¨ (DIAERESIS) + 41 0x00A9 © (COPYRIGHT SIGN) + 42 0x037A ͺ (GREEK YPOGEGRAMMENI) + 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) + 44 0x00AC ¬ (NOT SIGN) + 45 0x00AD ­ (SOFT HYPHEN) + 47 0x2015 ― (HORIZONTAL BAR) + 48 0x00B0 ° (DEGREE SIGN) + 49 0x00B1 ± (PLUS-MINUS SIGN) + 50 0x00B2 ² (SUPERSCRIPT TWO) + 51 0x00B3 ³ (SUPERSCRIPT THREE) + 52 0x0384 ΄ (GREEK TONOS) + 53 0x0385 ΅ (GREEK DIALYTIKA TONOS) + 54 0x0386 Ά (GREEK CAPITAL LETTER ALPHA WITH TONOS) + 55 0x00B7 · (MIDDLE DOT) + 56 0x0388 Έ (GREEK CAPITAL LETTER EPSILON WITH TONOS) + 57 0x0389 Ή (GREEK CAPITAL LETTER ETA WITH TONOS) + 58 0x038A Ί (GREEK CAPITAL LETTER IOTA WITH TONOS) + 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) + 60 0x038C Ό (GREEK CAPITAL LETTER OMICRON WITH TONOS) + 61 0x00BD ½ (VULGAR FRACTION ONE HALF) + 62 0x038E Ύ (GREEK CAPITAL LETTER UPSILON WITH TONOS) + 63 0x038F Ώ (GREEK CAPITAL LETTER OMEGA WITH TONOS) + 64 0x0390 ΐ (GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS) + 65 0x0391 Α (GREEK CAPITAL LETTER ALPHA) + 66 0x0392 Β (GREEK CAPITAL LETTER BETA) + 67 0x0393 Γ (GREEK CAPITAL LETTER GAMMA) + 68 0x0394 Δ (GREEK CAPITAL LETTER DELTA) + 69 0x0395 Ε (GREEK CAPITAL LETTER EPSILON) + 70 0x0396 Ζ (GREEK CAPITAL LETTER ZETA) + 71 0x0397 Η (GREEK CAPITAL LETTER ETA) + 72 0x0398 Θ (GREEK CAPITAL LETTER THETA) + 73 0x0399 Ι (GREEK CAPITAL LETTER IOTA) + 74 0x039A Κ (GREEK CAPITAL LETTER KAPPA) + 75 0x039B Λ (GREEK CAPITAL LETTER LAMDA) + 76 0x039C Μ (GREEK CAPITAL LETTER MU) + 77 0x039D Ν (GREEK CAPITAL LETTER NU) + 78 0x039E Ξ (GREEK CAPITAL LETTER XI) + 79 0x039F Ο (GREEK CAPITAL LETTER OMICRON) + 80 0x03A0 Π (GREEK CAPITAL LETTER PI) + 81 0x03A1 Ρ (GREEK CAPITAL LETTER RHO) + 83 0x03A3 Σ (GREEK CAPITAL LETTER SIGMA) + 84 0x03A4 Τ (GREEK CAPITAL LETTER TAU) + 85 0x03A5 Υ (GREEK CAPITAL LETTER UPSILON) + 86 0x03A6 Φ (GREEK CAPITAL LETTER PHI) + 87 0x03A7 Χ (GREEK CAPITAL LETTER CHI) + 88 0x03A8 Ψ (GREEK CAPITAL LETTER PSI) + 89 0x03A9 Ω (GREEK CAPITAL LETTER OMEGA) + 90 0x03AA Ϊ (GREEK CAPITAL LETTER IOTA WITH DIALYTIKA) + 91 0x03AB Ϋ (GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA) + 92 0x03AC ά (GREEK SMALL LETTER ALPHA WITH TONOS) + 93 0x03AD έ (GREEK SMALL LETTER EPSILON WITH TONOS) + 94 0x03AE ή (GREEK SMALL LETTER ETA WITH TONOS) + 95 0x03AF ί (GREEK SMALL LETTER IOTA WITH TONOS) + 96 0x03B0 ΰ (GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS) + 97 0x03B1 α (GREEK SMALL LETTER ALPHA) + 98 0x03B2 β (GREEK SMALL LETTER BETA) + 99 0x03B3 γ (GREEK SMALL LETTER GAMMA) +100 0x03B4 δ (GREEK SMALL LETTER DELTA) +101 0x03B5 ε (GREEK SMALL LETTER EPSILON) +102 0x03B6 ζ (GREEK SMALL LETTER ZETA) +103 0x03B7 η (GREEK SMALL LETTER ETA) +104 0x03B8 θ (GREEK SMALL LETTER THETA) +105 0x03B9 ι (GREEK SMALL LETTER IOTA) +106 0x03BA κ (GREEK SMALL LETTER KAPPA) +107 0x03BB λ (GREEK SMALL LETTER LAMDA) +108 0x03BC μ (GREEK SMALL LETTER MU) +109 0x03BD ν (GREEK SMALL LETTER NU) +110 0x03BE ξ (GREEK SMALL LETTER XI) +111 0x03BF ο (GREEK SMALL LETTER OMICRON) +112 0x03C0 π (GREEK SMALL LETTER PI) +113 0x03C1 ρ (GREEK SMALL LETTER RHO) +114 0x03C2 ς (GREEK SMALL LETTER FINAL SIGMA) +115 0x03C3 σ (GREEK SMALL LETTER SIGMA) +116 0x03C4 τ (GREEK SMALL LETTER TAU) +117 0x03C5 υ (GREEK SMALL LETTER UPSILON) +118 0x03C6 φ (GREEK SMALL LETTER PHI) +119 0x03C7 χ (GREEK SMALL LETTER CHI) +120 0x03C8 ψ (GREEK SMALL LETTER PSI) +121 0x03C9 ω (GREEK SMALL LETTER OMEGA) +122 0x03CA ϊ (GREEK SMALL LETTER IOTA WITH DIALYTIKA) +123 0x03CB ϋ (GREEK SMALL LETTER UPSILON WITH DIALYTIKA) +124 0x03CC ό (GREEK SMALL LETTER OMICRON WITH TONOS) +125 0x03CD ύ (GREEK SMALL LETTER UPSILON WITH TONOS) +126 0x03CE ώ (GREEK SMALL LETTER OMEGA WITH TONOS) diff --git a/sub_crates/text_encoding/encoding_tables/index-iso-8859-8.txt b/sub_crates/text_encoding/encoding_tables/index-iso-8859-8.txt new file mode 100644 index 0000000..5aedc57 --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-iso-8859-8.txt @@ -0,0 +1,98 @@ +# For details on index index-iso-8859-8.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: 7657a9ca3fa875990da960d3f812eea28dcd0ae6ed55a18d5394303c86f5484b +# Date: 2018-01-06 + + 0 0x0080 € () + 1 0x0081  () + 2 0x0082 ‚ () + 3 0x0083 ƒ () + 4 0x0084 „ () + 5 0x0085 … () + 6 0x0086 † () + 7 0x0087 ‡ () + 8 0x0088 ˆ () + 9 0x0089 ‰ () + 10 0x008A Š () + 11 0x008B ‹ () + 12 0x008C Œ () + 13 0x008D  () + 14 0x008E Ž () + 15 0x008F  () + 16 0x0090  () + 17 0x0091 ‘ () + 18 0x0092 ’ () + 19 0x0093 “ () + 20 0x0094 ” () + 21 0x0095 • () + 22 0x0096 – () + 23 0x0097 — () + 24 0x0098 ˜ () + 25 0x0099 ™ () + 26 0x009A š () + 27 0x009B › () + 28 0x009C œ () + 29 0x009D  () + 30 0x009E ž () + 31 0x009F Ÿ () + 32 0x00A0   (NO-BREAK SPACE) + 34 0x00A2 ¢ (CENT SIGN) + 35 0x00A3 £ (POUND SIGN) + 36 0x00A4 ¤ (CURRENCY SIGN) + 37 0x00A5 ¥ (YEN SIGN) + 38 0x00A6 ¦ (BROKEN BAR) + 39 0x00A7 § (SECTION SIGN) + 40 0x00A8 ¨ (DIAERESIS) + 41 0x00A9 © (COPYRIGHT SIGN) + 42 0x00D7 × (MULTIPLICATION SIGN) + 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) + 44 0x00AC ¬ (NOT SIGN) + 45 0x00AD ­ (SOFT HYPHEN) + 46 0x00AE ® (REGISTERED SIGN) + 47 0x00AF ¯ (MACRON) + 48 0x00B0 ° (DEGREE SIGN) + 49 0x00B1 ± (PLUS-MINUS SIGN) + 50 0x00B2 ² (SUPERSCRIPT TWO) + 51 0x00B3 ³ (SUPERSCRIPT THREE) + 52 0x00B4 ´ (ACUTE ACCENT) + 53 0x00B5 µ (MICRO SIGN) + 54 0x00B6 ¶ (PILCROW SIGN) + 55 0x00B7 · (MIDDLE DOT) + 56 0x00B8 ¸ (CEDILLA) + 57 0x00B9 ¹ (SUPERSCRIPT ONE) + 58 0x00F7 ÷ (DIVISION SIGN) + 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) + 60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER) + 61 0x00BD ½ (VULGAR FRACTION ONE HALF) + 62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS) + 95 0x2017 ‗ (DOUBLE LOW LINE) + 96 0x05D0 א (HEBREW LETTER ALEF) + 97 0x05D1 ב (HEBREW LETTER BET) + 98 0x05D2 ג (HEBREW LETTER GIMEL) + 99 0x05D3 ד (HEBREW LETTER DALET) +100 0x05D4 ה (HEBREW LETTER HE) +101 0x05D5 ו (HEBREW LETTER VAV) +102 0x05D6 ז (HEBREW LETTER ZAYIN) +103 0x05D7 ח (HEBREW LETTER HET) +104 0x05D8 ט (HEBREW LETTER TET) +105 0x05D9 י (HEBREW LETTER YOD) +106 0x05DA ך (HEBREW LETTER FINAL KAF) +107 0x05DB כ (HEBREW LETTER KAF) +108 0x05DC ל (HEBREW LETTER LAMED) +109 0x05DD ם (HEBREW LETTER FINAL MEM) +110 0x05DE מ (HEBREW LETTER MEM) +111 0x05DF ן (HEBREW LETTER FINAL NUN) +112 0x05E0 נ (HEBREW LETTER NUN) +113 0x05E1 ס (HEBREW LETTER SAMEKH) +114 0x05E2 ע (HEBREW LETTER AYIN) +115 0x05E3 ף (HEBREW LETTER FINAL PE) +116 0x05E4 פ (HEBREW LETTER PE) +117 0x05E5 ץ (HEBREW LETTER FINAL TSADI) +118 0x05E6 צ (HEBREW LETTER TSADI) +119 0x05E7 ק (HEBREW LETTER QOF) +120 0x05E8 ר (HEBREW LETTER RESH) +121 0x05E9 ש (HEBREW LETTER SHIN) +122 0x05EA ת (HEBREW LETTER TAV) +125 0x200E ‎ (LEFT-TO-RIGHT MARK) +126 0x200F ‏ (RIGHT-TO-LEFT MARK) diff --git a/sub_crates/text_encoding/encoding_tables/index-koi8-r.txt b/sub_crates/text_encoding/encoding_tables/index-koi8-r.txt new file mode 100644 index 0000000..639e9c4 --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-koi8-r.txt @@ -0,0 +1,134 @@ +# For details on index index-koi8-r.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: c5497cd9071cb352c0e56b219154e539badf63de40b71578f09e2e11fe7d50ae +# Date: 2018-01-06 + + 0 0x2500 ─ (BOX DRAWINGS LIGHT HORIZONTAL) + 1 0x2502 │ (BOX DRAWINGS LIGHT VERTICAL) + 2 0x250C ┌ (BOX DRAWINGS LIGHT DOWN AND RIGHT) + 3 0x2510 ┐ (BOX DRAWINGS LIGHT DOWN AND LEFT) + 4 0x2514 └ (BOX DRAWINGS LIGHT UP AND RIGHT) + 5 0x2518 ┘ (BOX DRAWINGS LIGHT UP AND LEFT) + 6 0x251C ├ (BOX DRAWINGS LIGHT VERTICAL AND RIGHT) + 7 0x2524 ┤ (BOX DRAWINGS LIGHT VERTICAL AND LEFT) + 8 0x252C ┬ (BOX DRAWINGS LIGHT DOWN AND HORIZONTAL) + 9 0x2534 ┴ (BOX DRAWINGS LIGHT UP AND HORIZONTAL) + 10 0x253C ┼ (BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL) + 11 0x2580 ▀ (UPPER HALF BLOCK) + 12 0x2584 ▄ (LOWER HALF BLOCK) + 13 0x2588 █ (FULL BLOCK) + 14 0x258C ▌ (LEFT HALF BLOCK) + 15 0x2590 ▐ (RIGHT HALF BLOCK) + 16 0x2591 ░ (LIGHT SHADE) + 17 0x2592 ▒ (MEDIUM SHADE) + 18 0x2593 ▓ (DARK SHADE) + 19 0x2320 ⌠ (TOP HALF INTEGRAL) + 20 0x25A0 ■ (BLACK SQUARE) + 21 0x2219 ∙ (BULLET OPERATOR) + 22 0x221A √ (SQUARE ROOT) + 23 0x2248 ≈ (ALMOST EQUAL TO) + 24 0x2264 ≤ (LESS-THAN OR EQUAL TO) + 25 0x2265 ≥ (GREATER-THAN OR EQUAL TO) + 26 0x00A0   (NO-BREAK SPACE) + 27 0x2321 ⌡ (BOTTOM HALF INTEGRAL) + 28 0x00B0 ° (DEGREE SIGN) + 29 0x00B2 ² (SUPERSCRIPT TWO) + 30 0x00B7 · (MIDDLE DOT) + 31 0x00F7 ÷ (DIVISION SIGN) + 32 0x2550 ═ (BOX DRAWINGS DOUBLE HORIZONTAL) + 33 0x2551 ║ (BOX DRAWINGS DOUBLE VERTICAL) + 34 0x2552 ╒ (BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE) + 35 0x0451 ё (CYRILLIC SMALL LETTER IO) + 36 0x2553 ╓ (BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE) + 37 0x2554 ╔ (BOX DRAWINGS DOUBLE DOWN AND RIGHT) + 38 0x2555 ╕ (BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE) + 39 0x2556 ╖ (BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE) + 40 0x2557 ╗ (BOX DRAWINGS DOUBLE DOWN AND LEFT) + 41 0x2558 ╘ (BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE) + 42 0x2559 ╙ (BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE) + 43 0x255A ╚ (BOX DRAWINGS DOUBLE UP AND RIGHT) + 44 0x255B ╛ (BOX DRAWINGS UP SINGLE AND LEFT DOUBLE) + 45 0x255C ╜ (BOX DRAWINGS UP DOUBLE AND LEFT SINGLE) + 46 0x255D ╝ (BOX DRAWINGS DOUBLE UP AND LEFT) + 47 0x255E ╞ (BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE) + 48 0x255F ╟ (BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE) + 49 0x2560 ╠ (BOX DRAWINGS DOUBLE VERTICAL AND RIGHT) + 50 0x2561 ╡ (BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE) + 51 0x0401 Ё (CYRILLIC CAPITAL LETTER IO) + 52 0x2562 ╢ (BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE) + 53 0x2563 ╣ (BOX DRAWINGS DOUBLE VERTICAL AND LEFT) + 54 0x2564 ╤ (BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE) + 55 0x2565 ╥ (BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE) + 56 0x2566 ╦ (BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL) + 57 0x2567 ╧ (BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE) + 58 0x2568 ╨ (BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE) + 59 0x2569 ╩ (BOX DRAWINGS DOUBLE UP AND HORIZONTAL) + 60 0x256A ╪ (BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE) + 61 0x256B ╫ (BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE) + 62 0x256C ╬ (BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL) + 63 0x00A9 © (COPYRIGHT SIGN) + 64 0x044E ю (CYRILLIC SMALL LETTER YU) + 65 0x0430 а (CYRILLIC SMALL LETTER A) + 66 0x0431 б (CYRILLIC SMALL LETTER BE) + 67 0x0446 ц (CYRILLIC SMALL LETTER TSE) + 68 0x0434 д (CYRILLIC SMALL LETTER DE) + 69 0x0435 е (CYRILLIC SMALL LETTER IE) + 70 0x0444 ф (CYRILLIC SMALL LETTER EF) + 71 0x0433 г (CYRILLIC SMALL LETTER GHE) + 72 0x0445 х (CYRILLIC SMALL LETTER HA) + 73 0x0438 и (CYRILLIC SMALL LETTER I) + 74 0x0439 й (CYRILLIC SMALL LETTER SHORT I) + 75 0x043A к (CYRILLIC SMALL LETTER KA) + 76 0x043B л (CYRILLIC SMALL LETTER EL) + 77 0x043C м (CYRILLIC SMALL LETTER EM) + 78 0x043D н (CYRILLIC SMALL LETTER EN) + 79 0x043E о (CYRILLIC SMALL LETTER O) + 80 0x043F п (CYRILLIC SMALL LETTER PE) + 81 0x044F я (CYRILLIC SMALL LETTER YA) + 82 0x0440 р (CYRILLIC SMALL LETTER ER) + 83 0x0441 с (CYRILLIC SMALL LETTER ES) + 84 0x0442 т (CYRILLIC SMALL LETTER TE) + 85 0x0443 у (CYRILLIC SMALL LETTER U) + 86 0x0436 ж (CYRILLIC SMALL LETTER ZHE) + 87 0x0432 в (CYRILLIC SMALL LETTER VE) + 88 0x044C ь (CYRILLIC SMALL LETTER SOFT SIGN) + 89 0x044B ы (CYRILLIC SMALL LETTER YERU) + 90 0x0437 з (CYRILLIC SMALL LETTER ZE) + 91 0x0448 ш (CYRILLIC SMALL LETTER SHA) + 92 0x044D э (CYRILLIC SMALL LETTER E) + 93 0x0449 щ (CYRILLIC SMALL LETTER SHCHA) + 94 0x0447 ч (CYRILLIC SMALL LETTER CHE) + 95 0x044A ъ (CYRILLIC SMALL LETTER HARD SIGN) + 96 0x042E Ю (CYRILLIC CAPITAL LETTER YU) + 97 0x0410 А (CYRILLIC CAPITAL LETTER A) + 98 0x0411 Б (CYRILLIC CAPITAL LETTER BE) + 99 0x0426 Ц (CYRILLIC CAPITAL LETTER TSE) +100 0x0414 Д (CYRILLIC CAPITAL LETTER DE) +101 0x0415 Е (CYRILLIC CAPITAL LETTER IE) +102 0x0424 Ф (CYRILLIC CAPITAL LETTER EF) +103 0x0413 Г (CYRILLIC CAPITAL LETTER GHE) +104 0x0425 Х (CYRILLIC CAPITAL LETTER HA) +105 0x0418 И (CYRILLIC CAPITAL LETTER I) +106 0x0419 Й (CYRILLIC CAPITAL LETTER SHORT I) +107 0x041A К (CYRILLIC CAPITAL LETTER KA) +108 0x041B Л (CYRILLIC CAPITAL LETTER EL) +109 0x041C М (CYRILLIC CAPITAL LETTER EM) +110 0x041D Н (CYRILLIC CAPITAL LETTER EN) +111 0x041E О (CYRILLIC CAPITAL LETTER O) +112 0x041F П (CYRILLIC CAPITAL LETTER PE) +113 0x042F Я (CYRILLIC CAPITAL LETTER YA) +114 0x0420 Р (CYRILLIC CAPITAL LETTER ER) +115 0x0421 С (CYRILLIC CAPITAL LETTER ES) +116 0x0422 Т (CYRILLIC CAPITAL LETTER TE) +117 0x0423 У (CYRILLIC CAPITAL LETTER U) +118 0x0416 Ж (CYRILLIC CAPITAL LETTER ZHE) +119 0x0412 В (CYRILLIC CAPITAL LETTER VE) +120 0x042C Ь (CYRILLIC CAPITAL LETTER SOFT SIGN) +121 0x042B Ы (CYRILLIC CAPITAL LETTER YERU) +122 0x0417 З (CYRILLIC CAPITAL LETTER ZE) +123 0x0428 Ш (CYRILLIC CAPITAL LETTER SHA) +124 0x042D Э (CYRILLIC CAPITAL LETTER E) +125 0x0429 Щ (CYRILLIC CAPITAL LETTER SHCHA) +126 0x0427 Ч (CYRILLIC CAPITAL LETTER CHE) +127 0x042A Ъ (CYRILLIC CAPITAL LETTER HARD SIGN) diff --git a/sub_crates/text_encoding/encoding_tables/index-koi8-u.txt b/sub_crates/text_encoding/encoding_tables/index-koi8-u.txt new file mode 100644 index 0000000..6654e43 --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-koi8-u.txt @@ -0,0 +1,134 @@ +# For details on index index-koi8-u.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: 19a4da2c3f245118bbc8019326f45a07832949938ff903f03d62ac4da1f61f40 +# Date: 2018-01-06 + + 0 0x2500 ─ (BOX DRAWINGS LIGHT HORIZONTAL) + 1 0x2502 │ (BOX DRAWINGS LIGHT VERTICAL) + 2 0x250C ┌ (BOX DRAWINGS LIGHT DOWN AND RIGHT) + 3 0x2510 ┐ (BOX DRAWINGS LIGHT DOWN AND LEFT) + 4 0x2514 └ (BOX DRAWINGS LIGHT UP AND RIGHT) + 5 0x2518 ┘ (BOX DRAWINGS LIGHT UP AND LEFT) + 6 0x251C ├ (BOX DRAWINGS LIGHT VERTICAL AND RIGHT) + 7 0x2524 ┤ (BOX DRAWINGS LIGHT VERTICAL AND LEFT) + 8 0x252C ┬ (BOX DRAWINGS LIGHT DOWN AND HORIZONTAL) + 9 0x2534 ┴ (BOX DRAWINGS LIGHT UP AND HORIZONTAL) + 10 0x253C ┼ (BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL) + 11 0x2580 ▀ (UPPER HALF BLOCK) + 12 0x2584 ▄ (LOWER HALF BLOCK) + 13 0x2588 █ (FULL BLOCK) + 14 0x258C ▌ (LEFT HALF BLOCK) + 15 0x2590 ▐ (RIGHT HALF BLOCK) + 16 0x2591 ░ (LIGHT SHADE) + 17 0x2592 ▒ (MEDIUM SHADE) + 18 0x2593 ▓ (DARK SHADE) + 19 0x2320 ⌠ (TOP HALF INTEGRAL) + 20 0x25A0 ■ (BLACK SQUARE) + 21 0x2219 ∙ (BULLET OPERATOR) + 22 0x221A √ (SQUARE ROOT) + 23 0x2248 ≈ (ALMOST EQUAL TO) + 24 0x2264 ≤ (LESS-THAN OR EQUAL TO) + 25 0x2265 ≥ (GREATER-THAN OR EQUAL TO) + 26 0x00A0   (NO-BREAK SPACE) + 27 0x2321 ⌡ (BOTTOM HALF INTEGRAL) + 28 0x00B0 ° (DEGREE SIGN) + 29 0x00B2 ² (SUPERSCRIPT TWO) + 30 0x00B7 · (MIDDLE DOT) + 31 0x00F7 ÷ (DIVISION SIGN) + 32 0x2550 ═ (BOX DRAWINGS DOUBLE HORIZONTAL) + 33 0x2551 ║ (BOX DRAWINGS DOUBLE VERTICAL) + 34 0x2552 ╒ (BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE) + 35 0x0451 ё (CYRILLIC SMALL LETTER IO) + 36 0x0454 є (CYRILLIC SMALL LETTER UKRAINIAN IE) + 37 0x2554 ╔ (BOX DRAWINGS DOUBLE DOWN AND RIGHT) + 38 0x0456 і (CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I) + 39 0x0457 ї (CYRILLIC SMALL LETTER YI) + 40 0x2557 ╗ (BOX DRAWINGS DOUBLE DOWN AND LEFT) + 41 0x2558 ╘ (BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE) + 42 0x2559 ╙ (BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE) + 43 0x255A ╚ (BOX DRAWINGS DOUBLE UP AND RIGHT) + 44 0x255B ╛ (BOX DRAWINGS UP SINGLE AND LEFT DOUBLE) + 45 0x0491 ґ (CYRILLIC SMALL LETTER GHE WITH UPTURN) + 46 0x045E ў (CYRILLIC SMALL LETTER SHORT U) + 47 0x255E ╞ (BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE) + 48 0x255F ╟ (BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE) + 49 0x2560 ╠ (BOX DRAWINGS DOUBLE VERTICAL AND RIGHT) + 50 0x2561 ╡ (BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE) + 51 0x0401 Ё (CYRILLIC CAPITAL LETTER IO) + 52 0x0404 Є (CYRILLIC CAPITAL LETTER UKRAINIAN IE) + 53 0x2563 ╣ (BOX DRAWINGS DOUBLE VERTICAL AND LEFT) + 54 0x0406 І (CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I) + 55 0x0407 Ї (CYRILLIC CAPITAL LETTER YI) + 56 0x2566 ╦ (BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL) + 57 0x2567 ╧ (BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE) + 58 0x2568 ╨ (BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE) + 59 0x2569 ╩ (BOX DRAWINGS DOUBLE UP AND HORIZONTAL) + 60 0x256A ╪ (BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE) + 61 0x0490 Ґ (CYRILLIC CAPITAL LETTER GHE WITH UPTURN) + 62 0x040E Ў (CYRILLIC CAPITAL LETTER SHORT U) + 63 0x00A9 © (COPYRIGHT SIGN) + 64 0x044E ю (CYRILLIC SMALL LETTER YU) + 65 0x0430 а (CYRILLIC SMALL LETTER A) + 66 0x0431 б (CYRILLIC SMALL LETTER BE) + 67 0x0446 ц (CYRILLIC SMALL LETTER TSE) + 68 0x0434 д (CYRILLIC SMALL LETTER DE) + 69 0x0435 е (CYRILLIC SMALL LETTER IE) + 70 0x0444 ф (CYRILLIC SMALL LETTER EF) + 71 0x0433 г (CYRILLIC SMALL LETTER GHE) + 72 0x0445 х (CYRILLIC SMALL LETTER HA) + 73 0x0438 и (CYRILLIC SMALL LETTER I) + 74 0x0439 й (CYRILLIC SMALL LETTER SHORT I) + 75 0x043A к (CYRILLIC SMALL LETTER KA) + 76 0x043B л (CYRILLIC SMALL LETTER EL) + 77 0x043C м (CYRILLIC SMALL LETTER EM) + 78 0x043D н (CYRILLIC SMALL LETTER EN) + 79 0x043E о (CYRILLIC SMALL LETTER O) + 80 0x043F п (CYRILLIC SMALL LETTER PE) + 81 0x044F я (CYRILLIC SMALL LETTER YA) + 82 0x0440 р (CYRILLIC SMALL LETTER ER) + 83 0x0441 с (CYRILLIC SMALL LETTER ES) + 84 0x0442 т (CYRILLIC SMALL LETTER TE) + 85 0x0443 у (CYRILLIC SMALL LETTER U) + 86 0x0436 ж (CYRILLIC SMALL LETTER ZHE) + 87 0x0432 в (CYRILLIC SMALL LETTER VE) + 88 0x044C ь (CYRILLIC SMALL LETTER SOFT SIGN) + 89 0x044B ы (CYRILLIC SMALL LETTER YERU) + 90 0x0437 з (CYRILLIC SMALL LETTER ZE) + 91 0x0448 ш (CYRILLIC SMALL LETTER SHA) + 92 0x044D э (CYRILLIC SMALL LETTER E) + 93 0x0449 щ (CYRILLIC SMALL LETTER SHCHA) + 94 0x0447 ч (CYRILLIC SMALL LETTER CHE) + 95 0x044A ъ (CYRILLIC SMALL LETTER HARD SIGN) + 96 0x042E Ю (CYRILLIC CAPITAL LETTER YU) + 97 0x0410 А (CYRILLIC CAPITAL LETTER A) + 98 0x0411 Б (CYRILLIC CAPITAL LETTER BE) + 99 0x0426 Ц (CYRILLIC CAPITAL LETTER TSE) +100 0x0414 Д (CYRILLIC CAPITAL LETTER DE) +101 0x0415 Е (CYRILLIC CAPITAL LETTER IE) +102 0x0424 Ф (CYRILLIC CAPITAL LETTER EF) +103 0x0413 Г (CYRILLIC CAPITAL LETTER GHE) +104 0x0425 Х (CYRILLIC CAPITAL LETTER HA) +105 0x0418 И (CYRILLIC CAPITAL LETTER I) +106 0x0419 Й (CYRILLIC CAPITAL LETTER SHORT I) +107 0x041A К (CYRILLIC CAPITAL LETTER KA) +108 0x041B Л (CYRILLIC CAPITAL LETTER EL) +109 0x041C М (CYRILLIC CAPITAL LETTER EM) +110 0x041D Н (CYRILLIC CAPITAL LETTER EN) +111 0x041E О (CYRILLIC CAPITAL LETTER O) +112 0x041F П (CYRILLIC CAPITAL LETTER PE) +113 0x042F Я (CYRILLIC CAPITAL LETTER YA) +114 0x0420 Р (CYRILLIC CAPITAL LETTER ER) +115 0x0421 С (CYRILLIC CAPITAL LETTER ES) +116 0x0422 Т (CYRILLIC CAPITAL LETTER TE) +117 0x0423 У (CYRILLIC CAPITAL LETTER U) +118 0x0416 Ж (CYRILLIC CAPITAL LETTER ZHE) +119 0x0412 В (CYRILLIC CAPITAL LETTER VE) +120 0x042C Ь (CYRILLIC CAPITAL LETTER SOFT SIGN) +121 0x042B Ы (CYRILLIC CAPITAL LETTER YERU) +122 0x0417 З (CYRILLIC CAPITAL LETTER ZE) +123 0x0428 Ш (CYRILLIC CAPITAL LETTER SHA) +124 0x042D Э (CYRILLIC CAPITAL LETTER E) +125 0x0429 Щ (CYRILLIC CAPITAL LETTER SHCHA) +126 0x0427 Ч (CYRILLIC CAPITAL LETTER CHE) +127 0x042A Ъ (CYRILLIC CAPITAL LETTER HARD SIGN) diff --git a/sub_crates/text_encoding/encoding_tables/index-macintosh.txt b/sub_crates/text_encoding/encoding_tables/index-macintosh.txt new file mode 100644 index 0000000..e841a89 --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-macintosh.txt @@ -0,0 +1,134 @@ +# For details on index index-macintosh.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: f2c6a4f6406b3e86a50a5dba4d2b7dd48e2e33c0d82aefe764535c934ec11764 +# Date: 2018-01-06 + + 0 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) + 1 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) + 2 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) + 3 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) + 4 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE) + 5 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) + 6 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) + 7 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) + 8 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE) + 9 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) + 10 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) + 11 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE) + 12 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) + 13 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) + 14 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) + 15 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE) + 16 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX) + 17 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) + 18 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) + 19 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE) + 20 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) + 21 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) + 22 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE) + 23 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) + 24 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE) + 25 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) + 26 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) + 27 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE) + 28 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) + 29 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE) + 30 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) + 31 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) + 32 0x2020 † (DAGGER) + 33 0x00B0 ° (DEGREE SIGN) + 34 0x00A2 ¢ (CENT SIGN) + 35 0x00A3 £ (POUND SIGN) + 36 0x00A7 § (SECTION SIGN) + 37 0x2022 • (BULLET) + 38 0x00B6 ¶ (PILCROW SIGN) + 39 0x00DF ß (LATIN SMALL LETTER SHARP S) + 40 0x00AE ® (REGISTERED SIGN) + 41 0x00A9 © (COPYRIGHT SIGN) + 42 0x2122 ™ (TRADE MARK SIGN) + 43 0x00B4 ´ (ACUTE ACCENT) + 44 0x00A8 ¨ (DIAERESIS) + 45 0x2260 ≠ (NOT EQUAL TO) + 46 0x00C6 Æ (LATIN CAPITAL LETTER AE) + 47 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) + 48 0x221E ∞ (INFINITY) + 49 0x00B1 ± (PLUS-MINUS SIGN) + 50 0x2264 ≤ (LESS-THAN OR EQUAL TO) + 51 0x2265 ≥ (GREATER-THAN OR EQUAL TO) + 52 0x00A5 ¥ (YEN SIGN) + 53 0x00B5 µ (MICRO SIGN) + 54 0x2202 ∂ (PARTIAL DIFFERENTIAL) + 55 0x2211 ∑ (N-ARY SUMMATION) + 56 0x220F ∏ (N-ARY PRODUCT) + 57 0x03C0 π (GREEK SMALL LETTER PI) + 58 0x222B ∫ (INTEGRAL) + 59 0x00AA ª (FEMININE ORDINAL INDICATOR) + 60 0x00BA º (MASCULINE ORDINAL INDICATOR) + 61 0x03A9 Ω (GREEK CAPITAL LETTER OMEGA) + 62 0x00E6 æ (LATIN SMALL LETTER AE) + 63 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) + 64 0x00BF ¿ (INVERTED QUESTION MARK) + 65 0x00A1 ¡ (INVERTED EXCLAMATION MARK) + 66 0x00AC ¬ (NOT SIGN) + 67 0x221A √ (SQUARE ROOT) + 68 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK) + 69 0x2248 ≈ (ALMOST EQUAL TO) + 70 0x2206 ∆ (INCREMENT) + 71 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) + 72 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) + 73 0x2026 … (HORIZONTAL ELLIPSIS) + 74 0x00A0   (NO-BREAK SPACE) + 75 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE) + 76 0x00C3 à (LATIN CAPITAL LETTER A WITH TILDE) + 77 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE) + 78 0x0152 Œ (LATIN CAPITAL LIGATURE OE) + 79 0x0153 œ (LATIN SMALL LIGATURE OE) + 80 0x2013 – (EN DASH) + 81 0x2014 — (EM DASH) + 82 0x201C “ (LEFT DOUBLE QUOTATION MARK) + 83 0x201D ” (RIGHT DOUBLE QUOTATION MARK) + 84 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) + 85 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) + 86 0x00F7 ÷ (DIVISION SIGN) + 87 0x25CA ◊ (LOZENGE) + 88 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS) + 89 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS) + 90 0x2044 ⁄ (FRACTION SLASH) + 91 0x20AC € (EURO SIGN) + 92 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) + 93 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) + 94 0xFB01 fi (LATIN SMALL LIGATURE FI) + 95 0xFB02 fl (LATIN SMALL LIGATURE FL) + 96 0x2021 ‡ (DOUBLE DAGGER) + 97 0x00B7 · (MIDDLE DOT) + 98 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) + 99 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) +100 0x2030 ‰ (PER MILLE SIGN) +101 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) +102 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX) +103 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) +104 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) +105 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE) +106 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) +107 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) +108 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS) +109 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE) +110 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) +111 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) +112 0xF8FF  () +113 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE) +114 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) +115 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) +116 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE) +117 0x0131 ı (LATIN SMALL LETTER DOTLESS I) +118 0x02C6 ˆ (MODIFIER LETTER CIRCUMFLEX ACCENT) +119 0x02DC ˜ (SMALL TILDE) +120 0x00AF ¯ (MACRON) +121 0x02D8 ˘ (BREVE) +122 0x02D9 ˙ (DOT ABOVE) +123 0x02DA ˚ (RING ABOVE) +124 0x00B8 ¸ (CEDILLA) +125 0x02DD ˝ (DOUBLE ACUTE ACCENT) +126 0x02DB ˛ (OGONEK) +127 0x02C7 ˇ (CARON) diff --git a/sub_crates/text_encoding/encoding_tables/index-windows-1250.txt b/sub_crates/text_encoding/encoding_tables/index-windows-1250.txt new file mode 100644 index 0000000..870946a --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-windows-1250.txt @@ -0,0 +1,134 @@ +# For details on index index-windows-1250.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: 0669455a7a1c70ba6003ea737991e8ee9adc455125c13cfe6705a361358de5fa +# Date: 2018-01-06 + + 0 0x20AC € (EURO SIGN) + 1 0x0081  () + 2 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) + 3 0x0083 ƒ () + 4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) + 5 0x2026 … (HORIZONTAL ELLIPSIS) + 6 0x2020 † (DAGGER) + 7 0x2021 ‡ (DOUBLE DAGGER) + 8 0x0088 ˆ () + 9 0x2030 ‰ (PER MILLE SIGN) + 10 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) + 11 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) + 12 0x015A Ś (LATIN CAPITAL LETTER S WITH ACUTE) + 13 0x0164 Ť (LATIN CAPITAL LETTER T WITH CARON) + 14 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON) + 15 0x0179 Ź (LATIN CAPITAL LETTER Z WITH ACUTE) + 16 0x0090  () + 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) + 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) + 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) + 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) + 21 0x2022 • (BULLET) + 22 0x2013 – (EN DASH) + 23 0x2014 — (EM DASH) + 24 0x0098 ˜ () + 25 0x2122 ™ (TRADE MARK SIGN) + 26 0x0161 š (LATIN SMALL LETTER S WITH CARON) + 27 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) + 28 0x015B ś (LATIN SMALL LETTER S WITH ACUTE) + 29 0x0165 ť (LATIN SMALL LETTER T WITH CARON) + 30 0x017E ž (LATIN SMALL LETTER Z WITH CARON) + 31 0x017A ź (LATIN SMALL LETTER Z WITH ACUTE) + 32 0x00A0   (NO-BREAK SPACE) + 33 0x02C7 ˇ (CARON) + 34 0x02D8 ˘ (BREVE) + 35 0x0141 Ł (LATIN CAPITAL LETTER L WITH STROKE) + 36 0x00A4 ¤ (CURRENCY SIGN) + 37 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK) + 38 0x00A6 ¦ (BROKEN BAR) + 39 0x00A7 § (SECTION SIGN) + 40 0x00A8 ¨ (DIAERESIS) + 41 0x00A9 © (COPYRIGHT SIGN) + 42 0x015E Ş (LATIN CAPITAL LETTER S WITH CEDILLA) + 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) + 44 0x00AC ¬ (NOT SIGN) + 45 0x00AD ­ (SOFT HYPHEN) + 46 0x00AE ® (REGISTERED SIGN) + 47 0x017B Ż (LATIN CAPITAL LETTER Z WITH DOT ABOVE) + 48 0x00B0 ° (DEGREE SIGN) + 49 0x00B1 ± (PLUS-MINUS SIGN) + 50 0x02DB ˛ (OGONEK) + 51 0x0142 ł (LATIN SMALL LETTER L WITH STROKE) + 52 0x00B4 ´ (ACUTE ACCENT) + 53 0x00B5 µ (MICRO SIGN) + 54 0x00B6 ¶ (PILCROW SIGN) + 55 0x00B7 · (MIDDLE DOT) + 56 0x00B8 ¸ (CEDILLA) + 57 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK) + 58 0x015F ş (LATIN SMALL LETTER S WITH CEDILLA) + 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) + 60 0x013D Ľ (LATIN CAPITAL LETTER L WITH CARON) + 61 0x02DD ˝ (DOUBLE ACUTE ACCENT) + 62 0x013E ľ (LATIN SMALL LETTER L WITH CARON) + 63 0x017C ż (LATIN SMALL LETTER Z WITH DOT ABOVE) + 64 0x0154 Ŕ (LATIN CAPITAL LETTER R WITH ACUTE) + 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) + 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) + 67 0x0102 Ă (LATIN CAPITAL LETTER A WITH BREVE) + 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) + 69 0x0139 Ĺ (LATIN CAPITAL LETTER L WITH ACUTE) + 70 0x0106 Ć (LATIN CAPITAL LETTER C WITH ACUTE) + 71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) + 72 0x010C Č (LATIN CAPITAL LETTER C WITH CARON) + 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) + 74 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK) + 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) + 76 0x011A Ě (LATIN CAPITAL LETTER E WITH CARON) + 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) + 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) + 79 0x010E Ď (LATIN CAPITAL LETTER D WITH CARON) + 80 0x0110 Đ (LATIN CAPITAL LETTER D WITH STROKE) + 81 0x0143 Ń (LATIN CAPITAL LETTER N WITH ACUTE) + 82 0x0147 Ň (LATIN CAPITAL LETTER N WITH CARON) + 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) + 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) + 85 0x0150 Ő (LATIN CAPITAL LETTER O WITH DOUBLE ACUTE) + 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) + 87 0x00D7 × (MULTIPLICATION SIGN) + 88 0x0158 Ř (LATIN CAPITAL LETTER R WITH CARON) + 89 0x016E Ů (LATIN CAPITAL LETTER U WITH RING ABOVE) + 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) + 91 0x0170 Ű (LATIN CAPITAL LETTER U WITH DOUBLE ACUTE) + 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) + 93 0x00DD Ý (LATIN CAPITAL LETTER Y WITH ACUTE) + 94 0x0162 Ţ (LATIN CAPITAL LETTER T WITH CEDILLA) + 95 0x00DF ß (LATIN SMALL LETTER SHARP S) + 96 0x0155 ŕ (LATIN SMALL LETTER R WITH ACUTE) + 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) + 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) + 99 0x0103 ă (LATIN SMALL LETTER A WITH BREVE) +100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) +101 0x013A ĺ (LATIN SMALL LETTER L WITH ACUTE) +102 0x0107 ć (LATIN SMALL LETTER C WITH ACUTE) +103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) +104 0x010D č (LATIN SMALL LETTER C WITH CARON) +105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) +106 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK) +107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) +108 0x011B ě (LATIN SMALL LETTER E WITH CARON) +109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) +110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) +111 0x010F ď (LATIN SMALL LETTER D WITH CARON) +112 0x0111 đ (LATIN SMALL LETTER D WITH STROKE) +113 0x0144 ń (LATIN SMALL LETTER N WITH ACUTE) +114 0x0148 ň (LATIN SMALL LETTER N WITH CARON) +115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) +116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) +117 0x0151 ő (LATIN SMALL LETTER O WITH DOUBLE ACUTE) +118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) +119 0x00F7 ÷ (DIVISION SIGN) +120 0x0159 ř (LATIN SMALL LETTER R WITH CARON) +121 0x016F ů (LATIN SMALL LETTER U WITH RING ABOVE) +122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) +123 0x0171 ű (LATIN SMALL LETTER U WITH DOUBLE ACUTE) +124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) +125 0x00FD ý (LATIN SMALL LETTER Y WITH ACUTE) +126 0x0163 ţ (LATIN SMALL LETTER T WITH CEDILLA) +127 0x02D9 ˙ (DOT ABOVE) diff --git a/sub_crates/text_encoding/encoding_tables/index-windows-1251.txt b/sub_crates/text_encoding/encoding_tables/index-windows-1251.txt new file mode 100644 index 0000000..319e813 --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-windows-1251.txt @@ -0,0 +1,134 @@ +# For details on index index-windows-1251.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: 7592ef921679ba168b00a9e9afa3b4eebd67bf13dc7e84c4b6e120de856826e0 +# Date: 2018-01-06 + + 0 0x0402 Ђ (CYRILLIC CAPITAL LETTER DJE) + 1 0x0403 Ѓ (CYRILLIC CAPITAL LETTER GJE) + 2 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) + 3 0x0453 ѓ (CYRILLIC SMALL LETTER GJE) + 4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) + 5 0x2026 … (HORIZONTAL ELLIPSIS) + 6 0x2020 † (DAGGER) + 7 0x2021 ‡ (DOUBLE DAGGER) + 8 0x20AC € (EURO SIGN) + 9 0x2030 ‰ (PER MILLE SIGN) + 10 0x0409 Љ (CYRILLIC CAPITAL LETTER LJE) + 11 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) + 12 0x040A Њ (CYRILLIC CAPITAL LETTER NJE) + 13 0x040C Ќ (CYRILLIC CAPITAL LETTER KJE) + 14 0x040B Ћ (CYRILLIC CAPITAL LETTER TSHE) + 15 0x040F Џ (CYRILLIC CAPITAL LETTER DZHE) + 16 0x0452 ђ (CYRILLIC SMALL LETTER DJE) + 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) + 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) + 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) + 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) + 21 0x2022 • (BULLET) + 22 0x2013 – (EN DASH) + 23 0x2014 — (EM DASH) + 24 0x0098 ˜ () + 25 0x2122 ™ (TRADE MARK SIGN) + 26 0x0459 љ (CYRILLIC SMALL LETTER LJE) + 27 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) + 28 0x045A њ (CYRILLIC SMALL LETTER NJE) + 29 0x045C ќ (CYRILLIC SMALL LETTER KJE) + 30 0x045B ћ (CYRILLIC SMALL LETTER TSHE) + 31 0x045F џ (CYRILLIC SMALL LETTER DZHE) + 32 0x00A0   (NO-BREAK SPACE) + 33 0x040E Ў (CYRILLIC CAPITAL LETTER SHORT U) + 34 0x045E ў (CYRILLIC SMALL LETTER SHORT U) + 35 0x0408 Ј (CYRILLIC CAPITAL LETTER JE) + 36 0x00A4 ¤ (CURRENCY SIGN) + 37 0x0490 Ґ (CYRILLIC CAPITAL LETTER GHE WITH UPTURN) + 38 0x00A6 ¦ (BROKEN BAR) + 39 0x00A7 § (SECTION SIGN) + 40 0x0401 Ё (CYRILLIC CAPITAL LETTER IO) + 41 0x00A9 © (COPYRIGHT SIGN) + 42 0x0404 Є (CYRILLIC CAPITAL LETTER UKRAINIAN IE) + 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) + 44 0x00AC ¬ (NOT SIGN) + 45 0x00AD ­ (SOFT HYPHEN) + 46 0x00AE ® (REGISTERED SIGN) + 47 0x0407 Ї (CYRILLIC CAPITAL LETTER YI) + 48 0x00B0 ° (DEGREE SIGN) + 49 0x00B1 ± (PLUS-MINUS SIGN) + 50 0x0406 І (CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I) + 51 0x0456 і (CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I) + 52 0x0491 ґ (CYRILLIC SMALL LETTER GHE WITH UPTURN) + 53 0x00B5 µ (MICRO SIGN) + 54 0x00B6 ¶ (PILCROW SIGN) + 55 0x00B7 · (MIDDLE DOT) + 56 0x0451 ё (CYRILLIC SMALL LETTER IO) + 57 0x2116 № (NUMERO SIGN) + 58 0x0454 є (CYRILLIC SMALL LETTER UKRAINIAN IE) + 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) + 60 0x0458 ј (CYRILLIC SMALL LETTER JE) + 61 0x0405 Ѕ (CYRILLIC CAPITAL LETTER DZE) + 62 0x0455 ѕ (CYRILLIC SMALL LETTER DZE) + 63 0x0457 ї (CYRILLIC SMALL LETTER YI) + 64 0x0410 А (CYRILLIC CAPITAL LETTER A) + 65 0x0411 Б (CYRILLIC CAPITAL LETTER BE) + 66 0x0412 В (CYRILLIC CAPITAL LETTER VE) + 67 0x0413 Г (CYRILLIC CAPITAL LETTER GHE) + 68 0x0414 Д (CYRILLIC CAPITAL LETTER DE) + 69 0x0415 Е (CYRILLIC CAPITAL LETTER IE) + 70 0x0416 Ж (CYRILLIC CAPITAL LETTER ZHE) + 71 0x0417 З (CYRILLIC CAPITAL LETTER ZE) + 72 0x0418 И (CYRILLIC CAPITAL LETTER I) + 73 0x0419 Й (CYRILLIC CAPITAL LETTER SHORT I) + 74 0x041A К (CYRILLIC CAPITAL LETTER KA) + 75 0x041B Л (CYRILLIC CAPITAL LETTER EL) + 76 0x041C М (CYRILLIC CAPITAL LETTER EM) + 77 0x041D Н (CYRILLIC CAPITAL LETTER EN) + 78 0x041E О (CYRILLIC CAPITAL LETTER O) + 79 0x041F П (CYRILLIC CAPITAL LETTER PE) + 80 0x0420 Р (CYRILLIC CAPITAL LETTER ER) + 81 0x0421 С (CYRILLIC CAPITAL LETTER ES) + 82 0x0422 Т (CYRILLIC CAPITAL LETTER TE) + 83 0x0423 У (CYRILLIC CAPITAL LETTER U) + 84 0x0424 Ф (CYRILLIC CAPITAL LETTER EF) + 85 0x0425 Х (CYRILLIC CAPITAL LETTER HA) + 86 0x0426 Ц (CYRILLIC CAPITAL LETTER TSE) + 87 0x0427 Ч (CYRILLIC CAPITAL LETTER CHE) + 88 0x0428 Ш (CYRILLIC CAPITAL LETTER SHA) + 89 0x0429 Щ (CYRILLIC CAPITAL LETTER SHCHA) + 90 0x042A Ъ (CYRILLIC CAPITAL LETTER HARD SIGN) + 91 0x042B Ы (CYRILLIC CAPITAL LETTER YERU) + 92 0x042C Ь (CYRILLIC CAPITAL LETTER SOFT SIGN) + 93 0x042D Э (CYRILLIC CAPITAL LETTER E) + 94 0x042E Ю (CYRILLIC CAPITAL LETTER YU) + 95 0x042F Я (CYRILLIC CAPITAL LETTER YA) + 96 0x0430 а (CYRILLIC SMALL LETTER A) + 97 0x0431 б (CYRILLIC SMALL LETTER BE) + 98 0x0432 в (CYRILLIC SMALL LETTER VE) + 99 0x0433 г (CYRILLIC SMALL LETTER GHE) +100 0x0434 д (CYRILLIC SMALL LETTER DE) +101 0x0435 е (CYRILLIC SMALL LETTER IE) +102 0x0436 ж (CYRILLIC SMALL LETTER ZHE) +103 0x0437 з (CYRILLIC SMALL LETTER ZE) +104 0x0438 и (CYRILLIC SMALL LETTER I) +105 0x0439 й (CYRILLIC SMALL LETTER SHORT I) +106 0x043A к (CYRILLIC SMALL LETTER KA) +107 0x043B л (CYRILLIC SMALL LETTER EL) +108 0x043C м (CYRILLIC SMALL LETTER EM) +109 0x043D н (CYRILLIC SMALL LETTER EN) +110 0x043E о (CYRILLIC SMALL LETTER O) +111 0x043F п (CYRILLIC SMALL LETTER PE) +112 0x0440 р (CYRILLIC SMALL LETTER ER) +113 0x0441 с (CYRILLIC SMALL LETTER ES) +114 0x0442 т (CYRILLIC SMALL LETTER TE) +115 0x0443 у (CYRILLIC SMALL LETTER U) +116 0x0444 ф (CYRILLIC SMALL LETTER EF) +117 0x0445 х (CYRILLIC SMALL LETTER HA) +118 0x0446 ц (CYRILLIC SMALL LETTER TSE) +119 0x0447 ч (CYRILLIC SMALL LETTER CHE) +120 0x0448 ш (CYRILLIC SMALL LETTER SHA) +121 0x0449 щ (CYRILLIC SMALL LETTER SHCHA) +122 0x044A ъ (CYRILLIC SMALL LETTER HARD SIGN) +123 0x044B ы (CYRILLIC SMALL LETTER YERU) +124 0x044C ь (CYRILLIC SMALL LETTER SOFT SIGN) +125 0x044D э (CYRILLIC SMALL LETTER E) +126 0x044E ю (CYRILLIC SMALL LETTER YU) +127 0x044F я (CYRILLIC SMALL LETTER YA) diff --git a/sub_crates/text_encoding/encoding_tables/index-windows-1252.txt b/sub_crates/text_encoding/encoding_tables/index-windows-1252.txt new file mode 100644 index 0000000..56c5a0d --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-windows-1252.txt @@ -0,0 +1,134 @@ +# For details on index index-windows-1252.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: e56d49d9176e9a412283cf29ac9bd613f5620462f2a080a84eceaf974cfa18b7 +# Date: 2018-01-06 + + 0 0x20AC € (EURO SIGN) + 1 0x0081  () + 2 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) + 3 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK) + 4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) + 5 0x2026 … (HORIZONTAL ELLIPSIS) + 6 0x2020 † (DAGGER) + 7 0x2021 ‡ (DOUBLE DAGGER) + 8 0x02C6 ˆ (MODIFIER LETTER CIRCUMFLEX ACCENT) + 9 0x2030 ‰ (PER MILLE SIGN) + 10 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) + 11 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) + 12 0x0152 Œ (LATIN CAPITAL LIGATURE OE) + 13 0x008D  () + 14 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON) + 15 0x008F  () + 16 0x0090  () + 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) + 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) + 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) + 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) + 21 0x2022 • (BULLET) + 22 0x2013 – (EN DASH) + 23 0x2014 — (EM DASH) + 24 0x02DC ˜ (SMALL TILDE) + 25 0x2122 ™ (TRADE MARK SIGN) + 26 0x0161 š (LATIN SMALL LETTER S WITH CARON) + 27 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) + 28 0x0153 œ (LATIN SMALL LIGATURE OE) + 29 0x009D  () + 30 0x017E ž (LATIN SMALL LETTER Z WITH CARON) + 31 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS) + 32 0x00A0   (NO-BREAK SPACE) + 33 0x00A1 ¡ (INVERTED EXCLAMATION MARK) + 34 0x00A2 ¢ (CENT SIGN) + 35 0x00A3 £ (POUND SIGN) + 36 0x00A4 ¤ (CURRENCY SIGN) + 37 0x00A5 ¥ (YEN SIGN) + 38 0x00A6 ¦ (BROKEN BAR) + 39 0x00A7 § (SECTION SIGN) + 40 0x00A8 ¨ (DIAERESIS) + 41 0x00A9 © (COPYRIGHT SIGN) + 42 0x00AA ª (FEMININE ORDINAL INDICATOR) + 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) + 44 0x00AC ¬ (NOT SIGN) + 45 0x00AD ­ (SOFT HYPHEN) + 46 0x00AE ® (REGISTERED SIGN) + 47 0x00AF ¯ (MACRON) + 48 0x00B0 ° (DEGREE SIGN) + 49 0x00B1 ± (PLUS-MINUS SIGN) + 50 0x00B2 ² (SUPERSCRIPT TWO) + 51 0x00B3 ³ (SUPERSCRIPT THREE) + 52 0x00B4 ´ (ACUTE ACCENT) + 53 0x00B5 µ (MICRO SIGN) + 54 0x00B6 ¶ (PILCROW SIGN) + 55 0x00B7 · (MIDDLE DOT) + 56 0x00B8 ¸ (CEDILLA) + 57 0x00B9 ¹ (SUPERSCRIPT ONE) + 58 0x00BA º (MASCULINE ORDINAL INDICATOR) + 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) + 60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER) + 61 0x00BD ½ (VULGAR FRACTION ONE HALF) + 62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS) + 63 0x00BF ¿ (INVERTED QUESTION MARK) + 64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE) + 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) + 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) + 67 0x00C3 à (LATIN CAPITAL LETTER A WITH TILDE) + 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) + 69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) + 70 0x00C6 Æ (LATIN CAPITAL LETTER AE) + 71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) + 72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE) + 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) + 74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX) + 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) + 76 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE) + 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) + 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) + 79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS) + 80 0x00D0 Ð (LATIN CAPITAL LETTER ETH) + 81 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE) + 82 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE) + 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) + 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) + 85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE) + 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) + 87 0x00D7 × (MULTIPLICATION SIGN) + 88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) + 89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE) + 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) + 91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) + 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) + 93 0x00DD Ý (LATIN CAPITAL LETTER Y WITH ACUTE) + 94 0x00DE Þ (LATIN CAPITAL LETTER THORN) + 95 0x00DF ß (LATIN SMALL LETTER SHARP S) + 96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE) + 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) + 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) + 99 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE) +100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) +101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) +102 0x00E6 æ (LATIN SMALL LETTER AE) +103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) +104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE) +105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) +106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX) +107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) +108 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE) +109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) +110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) +111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) +112 0x00F0 ð (LATIN SMALL LETTER ETH) +113 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE) +114 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE) +115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) +116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) +117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE) +118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) +119 0x00F7 ÷ (DIVISION SIGN) +120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) +121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE) +122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) +123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) +124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) +125 0x00FD ý (LATIN SMALL LETTER Y WITH ACUTE) +126 0x00FE þ (LATIN SMALL LETTER THORN) +127 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS) diff --git a/sub_crates/text_encoding/encoding_tables/index-windows-1253.txt b/sub_crates/text_encoding/encoding_tables/index-windows-1253.txt new file mode 100644 index 0000000..9092f57 --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-windows-1253.txt @@ -0,0 +1,131 @@ +# For details on index index-windows-1253.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: 49fdc881a3488904dd1e8dfba9aef3258454249958b611bcded1d4c981ab5561 +# Date: 2018-01-06 + + 0 0x20AC € (EURO SIGN) + 1 0x0081  () + 2 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) + 3 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK) + 4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) + 5 0x2026 … (HORIZONTAL ELLIPSIS) + 6 0x2020 † (DAGGER) + 7 0x2021 ‡ (DOUBLE DAGGER) + 8 0x0088 ˆ () + 9 0x2030 ‰ (PER MILLE SIGN) + 10 0x008A Š () + 11 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) + 12 0x008C Œ () + 13 0x008D  () + 14 0x008E Ž () + 15 0x008F  () + 16 0x0090  () + 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) + 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) + 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) + 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) + 21 0x2022 • (BULLET) + 22 0x2013 – (EN DASH) + 23 0x2014 — (EM DASH) + 24 0x0098 ˜ () + 25 0x2122 ™ (TRADE MARK SIGN) + 26 0x009A š () + 27 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) + 28 0x009C œ () + 29 0x009D  () + 30 0x009E ž () + 31 0x009F Ÿ () + 32 0x00A0   (NO-BREAK SPACE) + 33 0x0385 ΅ (GREEK DIALYTIKA TONOS) + 34 0x0386 Ά (GREEK CAPITAL LETTER ALPHA WITH TONOS) + 35 0x00A3 £ (POUND SIGN) + 36 0x00A4 ¤ (CURRENCY SIGN) + 37 0x00A5 ¥ (YEN SIGN) + 38 0x00A6 ¦ (BROKEN BAR) + 39 0x00A7 § (SECTION SIGN) + 40 0x00A8 ¨ (DIAERESIS) + 41 0x00A9 © (COPYRIGHT SIGN) + 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) + 44 0x00AC ¬ (NOT SIGN) + 45 0x00AD ­ (SOFT HYPHEN) + 46 0x00AE ® (REGISTERED SIGN) + 47 0x2015 ― (HORIZONTAL BAR) + 48 0x00B0 ° (DEGREE SIGN) + 49 0x00B1 ± (PLUS-MINUS SIGN) + 50 0x00B2 ² (SUPERSCRIPT TWO) + 51 0x00B3 ³ (SUPERSCRIPT THREE) + 52 0x0384 ΄ (GREEK TONOS) + 53 0x00B5 µ (MICRO SIGN) + 54 0x00B6 ¶ (PILCROW SIGN) + 55 0x00B7 · (MIDDLE DOT) + 56 0x0388 Έ (GREEK CAPITAL LETTER EPSILON WITH TONOS) + 57 0x0389 Ή (GREEK CAPITAL LETTER ETA WITH TONOS) + 58 0x038A Ί (GREEK CAPITAL LETTER IOTA WITH TONOS) + 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) + 60 0x038C Ό (GREEK CAPITAL LETTER OMICRON WITH TONOS) + 61 0x00BD ½ (VULGAR FRACTION ONE HALF) + 62 0x038E Ύ (GREEK CAPITAL LETTER UPSILON WITH TONOS) + 63 0x038F Ώ (GREEK CAPITAL LETTER OMEGA WITH TONOS) + 64 0x0390 ΐ (GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS) + 65 0x0391 Α (GREEK CAPITAL LETTER ALPHA) + 66 0x0392 Β (GREEK CAPITAL LETTER BETA) + 67 0x0393 Γ (GREEK CAPITAL LETTER GAMMA) + 68 0x0394 Δ (GREEK CAPITAL LETTER DELTA) + 69 0x0395 Ε (GREEK CAPITAL LETTER EPSILON) + 70 0x0396 Ζ (GREEK CAPITAL LETTER ZETA) + 71 0x0397 Η (GREEK CAPITAL LETTER ETA) + 72 0x0398 Θ (GREEK CAPITAL LETTER THETA) + 73 0x0399 Ι (GREEK CAPITAL LETTER IOTA) + 74 0x039A Κ (GREEK CAPITAL LETTER KAPPA) + 75 0x039B Λ (GREEK CAPITAL LETTER LAMDA) + 76 0x039C Μ (GREEK CAPITAL LETTER MU) + 77 0x039D Ν (GREEK CAPITAL LETTER NU) + 78 0x039E Ξ (GREEK CAPITAL LETTER XI) + 79 0x039F Ο (GREEK CAPITAL LETTER OMICRON) + 80 0x03A0 Π (GREEK CAPITAL LETTER PI) + 81 0x03A1 Ρ (GREEK CAPITAL LETTER RHO) + 83 0x03A3 Σ (GREEK CAPITAL LETTER SIGMA) + 84 0x03A4 Τ (GREEK CAPITAL LETTER TAU) + 85 0x03A5 Υ (GREEK CAPITAL LETTER UPSILON) + 86 0x03A6 Φ (GREEK CAPITAL LETTER PHI) + 87 0x03A7 Χ (GREEK CAPITAL LETTER CHI) + 88 0x03A8 Ψ (GREEK CAPITAL LETTER PSI) + 89 0x03A9 Ω (GREEK CAPITAL LETTER OMEGA) + 90 0x03AA Ϊ (GREEK CAPITAL LETTER IOTA WITH DIALYTIKA) + 91 0x03AB Ϋ (GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA) + 92 0x03AC ά (GREEK SMALL LETTER ALPHA WITH TONOS) + 93 0x03AD έ (GREEK SMALL LETTER EPSILON WITH TONOS) + 94 0x03AE ή (GREEK SMALL LETTER ETA WITH TONOS) + 95 0x03AF ί (GREEK SMALL LETTER IOTA WITH TONOS) + 96 0x03B0 ΰ (GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS) + 97 0x03B1 α (GREEK SMALL LETTER ALPHA) + 98 0x03B2 β (GREEK SMALL LETTER BETA) + 99 0x03B3 γ (GREEK SMALL LETTER GAMMA) +100 0x03B4 δ (GREEK SMALL LETTER DELTA) +101 0x03B5 ε (GREEK SMALL LETTER EPSILON) +102 0x03B6 ζ (GREEK SMALL LETTER ZETA) +103 0x03B7 η (GREEK SMALL LETTER ETA) +104 0x03B8 θ (GREEK SMALL LETTER THETA) +105 0x03B9 ι (GREEK SMALL LETTER IOTA) +106 0x03BA κ (GREEK SMALL LETTER KAPPA) +107 0x03BB λ (GREEK SMALL LETTER LAMDA) +108 0x03BC μ (GREEK SMALL LETTER MU) +109 0x03BD ν (GREEK SMALL LETTER NU) +110 0x03BE ξ (GREEK SMALL LETTER XI) +111 0x03BF ο (GREEK SMALL LETTER OMICRON) +112 0x03C0 π (GREEK SMALL LETTER PI) +113 0x03C1 ρ (GREEK SMALL LETTER RHO) +114 0x03C2 ς (GREEK SMALL LETTER FINAL SIGMA) +115 0x03C3 σ (GREEK SMALL LETTER SIGMA) +116 0x03C4 τ (GREEK SMALL LETTER TAU) +117 0x03C5 υ (GREEK SMALL LETTER UPSILON) +118 0x03C6 φ (GREEK SMALL LETTER PHI) +119 0x03C7 χ (GREEK SMALL LETTER CHI) +120 0x03C8 ψ (GREEK SMALL LETTER PSI) +121 0x03C9 ω (GREEK SMALL LETTER OMEGA) +122 0x03CA ϊ (GREEK SMALL LETTER IOTA WITH DIALYTIKA) +123 0x03CB ϋ (GREEK SMALL LETTER UPSILON WITH DIALYTIKA) +124 0x03CC ό (GREEK SMALL LETTER OMICRON WITH TONOS) +125 0x03CD ύ (GREEK SMALL LETTER UPSILON WITH TONOS) +126 0x03CE ώ (GREEK SMALL LETTER OMEGA WITH TONOS) diff --git a/sub_crates/text_encoding/encoding_tables/index-windows-1254.txt b/sub_crates/text_encoding/encoding_tables/index-windows-1254.txt new file mode 100644 index 0000000..e8694a7 --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-windows-1254.txt @@ -0,0 +1,134 @@ +# For details on index index-windows-1254.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: e80a27adf377438be8ba5bd223875ea56d6a4d47f958cce1c957a2c446825caa +# Date: 2018-01-06 + + 0 0x20AC € (EURO SIGN) + 1 0x0081  () + 2 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) + 3 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK) + 4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) + 5 0x2026 … (HORIZONTAL ELLIPSIS) + 6 0x2020 † (DAGGER) + 7 0x2021 ‡ (DOUBLE DAGGER) + 8 0x02C6 ˆ (MODIFIER LETTER CIRCUMFLEX ACCENT) + 9 0x2030 ‰ (PER MILLE SIGN) + 10 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) + 11 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) + 12 0x0152 Œ (LATIN CAPITAL LIGATURE OE) + 13 0x008D  () + 14 0x008E Ž () + 15 0x008F  () + 16 0x0090  () + 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) + 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) + 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) + 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) + 21 0x2022 • (BULLET) + 22 0x2013 – (EN DASH) + 23 0x2014 — (EM DASH) + 24 0x02DC ˜ (SMALL TILDE) + 25 0x2122 ™ (TRADE MARK SIGN) + 26 0x0161 š (LATIN SMALL LETTER S WITH CARON) + 27 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) + 28 0x0153 œ (LATIN SMALL LIGATURE OE) + 29 0x009D  () + 30 0x009E ž () + 31 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS) + 32 0x00A0   (NO-BREAK SPACE) + 33 0x00A1 ¡ (INVERTED EXCLAMATION MARK) + 34 0x00A2 ¢ (CENT SIGN) + 35 0x00A3 £ (POUND SIGN) + 36 0x00A4 ¤ (CURRENCY SIGN) + 37 0x00A5 ¥ (YEN SIGN) + 38 0x00A6 ¦ (BROKEN BAR) + 39 0x00A7 § (SECTION SIGN) + 40 0x00A8 ¨ (DIAERESIS) + 41 0x00A9 © (COPYRIGHT SIGN) + 42 0x00AA ª (FEMININE ORDINAL INDICATOR) + 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) + 44 0x00AC ¬ (NOT SIGN) + 45 0x00AD ­ (SOFT HYPHEN) + 46 0x00AE ® (REGISTERED SIGN) + 47 0x00AF ¯ (MACRON) + 48 0x00B0 ° (DEGREE SIGN) + 49 0x00B1 ± (PLUS-MINUS SIGN) + 50 0x00B2 ² (SUPERSCRIPT TWO) + 51 0x00B3 ³ (SUPERSCRIPT THREE) + 52 0x00B4 ´ (ACUTE ACCENT) + 53 0x00B5 µ (MICRO SIGN) + 54 0x00B6 ¶ (PILCROW SIGN) + 55 0x00B7 · (MIDDLE DOT) + 56 0x00B8 ¸ (CEDILLA) + 57 0x00B9 ¹ (SUPERSCRIPT ONE) + 58 0x00BA º (MASCULINE ORDINAL INDICATOR) + 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) + 60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER) + 61 0x00BD ½ (VULGAR FRACTION ONE HALF) + 62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS) + 63 0x00BF ¿ (INVERTED QUESTION MARK) + 64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE) + 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) + 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) + 67 0x00C3 à (LATIN CAPITAL LETTER A WITH TILDE) + 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) + 69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) + 70 0x00C6 Æ (LATIN CAPITAL LETTER AE) + 71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) + 72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE) + 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) + 74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX) + 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) + 76 0x00CC Ì (LATIN CAPITAL LETTER I WITH GRAVE) + 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) + 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) + 79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS) + 80 0x011E Ğ (LATIN CAPITAL LETTER G WITH BREVE) + 81 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE) + 82 0x00D2 Ò (LATIN CAPITAL LETTER O WITH GRAVE) + 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) + 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) + 85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE) + 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) + 87 0x00D7 × (MULTIPLICATION SIGN) + 88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) + 89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE) + 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) + 91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) + 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) + 93 0x0130 İ (LATIN CAPITAL LETTER I WITH DOT ABOVE) + 94 0x015E Ş (LATIN CAPITAL LETTER S WITH CEDILLA) + 95 0x00DF ß (LATIN SMALL LETTER SHARP S) + 96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE) + 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) + 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) + 99 0x00E3 ã (LATIN SMALL LETTER A WITH TILDE) +100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) +101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) +102 0x00E6 æ (LATIN SMALL LETTER AE) +103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) +104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE) +105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) +106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX) +107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) +108 0x00EC ì (LATIN SMALL LETTER I WITH GRAVE) +109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) +110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) +111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) +112 0x011F ğ (LATIN SMALL LETTER G WITH BREVE) +113 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE) +114 0x00F2 ò (LATIN SMALL LETTER O WITH GRAVE) +115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) +116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) +117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE) +118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) +119 0x00F7 ÷ (DIVISION SIGN) +120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) +121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE) +122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) +123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) +124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) +125 0x0131 ı (LATIN SMALL LETTER DOTLESS I) +126 0x015F ş (LATIN SMALL LETTER S WITH CEDILLA) +127 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS) diff --git a/sub_crates/text_encoding/encoding_tables/index-windows-1255.txt b/sub_crates/text_encoding/encoding_tables/index-windows-1255.txt new file mode 100644 index 0000000..2c9deee --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-windows-1255.txt @@ -0,0 +1,124 @@ +# For details on index index-windows-1255.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: cd7fb43c97eefa1651084d92d02af53ad668bd848528c18c3b1af5c06b499651 +# Date: 2018-01-06 + + 0 0x20AC € (EURO SIGN) + 1 0x0081  () + 2 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) + 3 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK) + 4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) + 5 0x2026 … (HORIZONTAL ELLIPSIS) + 6 0x2020 † (DAGGER) + 7 0x2021 ‡ (DOUBLE DAGGER) + 8 0x02C6 ˆ (MODIFIER LETTER CIRCUMFLEX ACCENT) + 9 0x2030 ‰ (PER MILLE SIGN) + 10 0x008A Š () + 11 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) + 12 0x008C Œ () + 13 0x008D  () + 14 0x008E Ž () + 15 0x008F  () + 16 0x0090  () + 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) + 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) + 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) + 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) + 21 0x2022 • (BULLET) + 22 0x2013 – (EN DASH) + 23 0x2014 — (EM DASH) + 24 0x02DC ˜ (SMALL TILDE) + 25 0x2122 ™ (TRADE MARK SIGN) + 26 0x009A š () + 27 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) + 28 0x009C œ () + 29 0x009D  () + 30 0x009E ž () + 31 0x009F Ÿ () + 32 0x00A0   (NO-BREAK SPACE) + 33 0x00A1 ¡ (INVERTED EXCLAMATION MARK) + 34 0x00A2 ¢ (CENT SIGN) + 35 0x00A3 £ (POUND SIGN) + 36 0x20AA ₪ (NEW SHEQEL SIGN) + 37 0x00A5 ¥ (YEN SIGN) + 38 0x00A6 ¦ (BROKEN BAR) + 39 0x00A7 § (SECTION SIGN) + 40 0x00A8 ¨ (DIAERESIS) + 41 0x00A9 © (COPYRIGHT SIGN) + 42 0x00D7 × (MULTIPLICATION SIGN) + 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) + 44 0x00AC ¬ (NOT SIGN) + 45 0x00AD ­ (SOFT HYPHEN) + 46 0x00AE ® (REGISTERED SIGN) + 47 0x00AF ¯ (MACRON) + 48 0x00B0 ° (DEGREE SIGN) + 49 0x00B1 ± (PLUS-MINUS SIGN) + 50 0x00B2 ² (SUPERSCRIPT TWO) + 51 0x00B3 ³ (SUPERSCRIPT THREE) + 52 0x00B4 ´ (ACUTE ACCENT) + 53 0x00B5 µ (MICRO SIGN) + 54 0x00B6 ¶ (PILCROW SIGN) + 55 0x00B7 · (MIDDLE DOT) + 56 0x00B8 ¸ (CEDILLA) + 57 0x00B9 ¹ (SUPERSCRIPT ONE) + 58 0x00F7 ÷ (DIVISION SIGN) + 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) + 60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER) + 61 0x00BD ½ (VULGAR FRACTION ONE HALF) + 62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS) + 63 0x00BF ¿ (INVERTED QUESTION MARK) + 64 0x05B0 ְ (HEBREW POINT SHEVA) + 65 0x05B1 ֱ (HEBREW POINT HATAF SEGOL) + 66 0x05B2 ֲ (HEBREW POINT HATAF PATAH) + 67 0x05B3 ֳ (HEBREW POINT HATAF QAMATS) + 68 0x05B4 ִ (HEBREW POINT HIRIQ) + 69 0x05B5 ֵ (HEBREW POINT TSERE) + 70 0x05B6 ֶ (HEBREW POINT SEGOL) + 71 0x05B7 ַ (HEBREW POINT PATAH) + 72 0x05B8 ָ (HEBREW POINT QAMATS) + 73 0x05B9 ֹ (HEBREW POINT HOLAM) + 74 0x05BA ֺ (HEBREW POINT HOLAM HASER FOR VAV) + 75 0x05BB ֻ (HEBREW POINT QUBUTS) + 76 0x05BC ּ (HEBREW POINT DAGESH OR MAPIQ) + 77 0x05BD ֽ (HEBREW POINT METEG) + 78 0x05BE ־ (HEBREW PUNCTUATION MAQAF) + 79 0x05BF ֿ (HEBREW POINT RAFE) + 80 0x05C0 ׀ (HEBREW PUNCTUATION PASEQ) + 81 0x05C1 ׁ (HEBREW POINT SHIN DOT) + 82 0x05C2 ׂ (HEBREW POINT SIN DOT) + 83 0x05C3 ׃ (HEBREW PUNCTUATION SOF PASUQ) + 84 0x05F0 װ (HEBREW LIGATURE YIDDISH DOUBLE VAV) + 85 0x05F1 ױ (HEBREW LIGATURE YIDDISH VAV YOD) + 86 0x05F2 ײ (HEBREW LIGATURE YIDDISH DOUBLE YOD) + 87 0x05F3 ׳ (HEBREW PUNCTUATION GERESH) + 88 0x05F4 ״ (HEBREW PUNCTUATION GERSHAYIM) + 96 0x05D0 א (HEBREW LETTER ALEF) + 97 0x05D1 ב (HEBREW LETTER BET) + 98 0x05D2 ג (HEBREW LETTER GIMEL) + 99 0x05D3 ד (HEBREW LETTER DALET) +100 0x05D4 ה (HEBREW LETTER HE) +101 0x05D5 ו (HEBREW LETTER VAV) +102 0x05D6 ז (HEBREW LETTER ZAYIN) +103 0x05D7 ח (HEBREW LETTER HET) +104 0x05D8 ט (HEBREW LETTER TET) +105 0x05D9 י (HEBREW LETTER YOD) +106 0x05DA ך (HEBREW LETTER FINAL KAF) +107 0x05DB כ (HEBREW LETTER KAF) +108 0x05DC ל (HEBREW LETTER LAMED) +109 0x05DD ם (HEBREW LETTER FINAL MEM) +110 0x05DE מ (HEBREW LETTER MEM) +111 0x05DF ן (HEBREW LETTER FINAL NUN) +112 0x05E0 נ (HEBREW LETTER NUN) +113 0x05E1 ס (HEBREW LETTER SAMEKH) +114 0x05E2 ע (HEBREW LETTER AYIN) +115 0x05E3 ף (HEBREW LETTER FINAL PE) +116 0x05E4 פ (HEBREW LETTER PE) +117 0x05E5 ץ (HEBREW LETTER FINAL TSADI) +118 0x05E6 צ (HEBREW LETTER TSADI) +119 0x05E7 ק (HEBREW LETTER QOF) +120 0x05E8 ר (HEBREW LETTER RESH) +121 0x05E9 ש (HEBREW LETTER SHIN) +122 0x05EA ת (HEBREW LETTER TAV) +125 0x200E ‎ (LEFT-TO-RIGHT MARK) +126 0x200F ‏ (RIGHT-TO-LEFT MARK) diff --git a/sub_crates/text_encoding/encoding_tables/index-windows-1256.txt b/sub_crates/text_encoding/encoding_tables/index-windows-1256.txt new file mode 100644 index 0000000..0ab9736 --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-windows-1256.txt @@ -0,0 +1,134 @@ +# For details on index index-windows-1256.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: 161bdb381f16408e8bebcc8f5310c4190af0e359de8d9bbaa3628ce2f0875509 +# Date: 2018-01-06 + + 0 0x20AC € (EURO SIGN) + 1 0x067E پ (ARABIC LETTER PEH) + 2 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) + 3 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK) + 4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) + 5 0x2026 … (HORIZONTAL ELLIPSIS) + 6 0x2020 † (DAGGER) + 7 0x2021 ‡ (DOUBLE DAGGER) + 8 0x02C6 ˆ (MODIFIER LETTER CIRCUMFLEX ACCENT) + 9 0x2030 ‰ (PER MILLE SIGN) + 10 0x0679 ٹ (ARABIC LETTER TTEH) + 11 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) + 12 0x0152 Œ (LATIN CAPITAL LIGATURE OE) + 13 0x0686 چ (ARABIC LETTER TCHEH) + 14 0x0698 ژ (ARABIC LETTER JEH) + 15 0x0688 ڈ (ARABIC LETTER DDAL) + 16 0x06AF گ (ARABIC LETTER GAF) + 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) + 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) + 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) + 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) + 21 0x2022 • (BULLET) + 22 0x2013 – (EN DASH) + 23 0x2014 — (EM DASH) + 24 0x06A9 ک (ARABIC LETTER KEHEH) + 25 0x2122 ™ (TRADE MARK SIGN) + 26 0x0691 ڑ (ARABIC LETTER RREH) + 27 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) + 28 0x0153 œ (LATIN SMALL LIGATURE OE) + 29 0x200C ‌ (ZERO WIDTH NON-JOINER) + 30 0x200D ‍ (ZERO WIDTH JOINER) + 31 0x06BA ں (ARABIC LETTER NOON GHUNNA) + 32 0x00A0   (NO-BREAK SPACE) + 33 0x060C ، (ARABIC COMMA) + 34 0x00A2 ¢ (CENT SIGN) + 35 0x00A3 £ (POUND SIGN) + 36 0x00A4 ¤ (CURRENCY SIGN) + 37 0x00A5 ¥ (YEN SIGN) + 38 0x00A6 ¦ (BROKEN BAR) + 39 0x00A7 § (SECTION SIGN) + 40 0x00A8 ¨ (DIAERESIS) + 41 0x00A9 © (COPYRIGHT SIGN) + 42 0x06BE ھ (ARABIC LETTER HEH DOACHASHMEE) + 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) + 44 0x00AC ¬ (NOT SIGN) + 45 0x00AD ­ (SOFT HYPHEN) + 46 0x00AE ® (REGISTERED SIGN) + 47 0x00AF ¯ (MACRON) + 48 0x00B0 ° (DEGREE SIGN) + 49 0x00B1 ± (PLUS-MINUS SIGN) + 50 0x00B2 ² (SUPERSCRIPT TWO) + 51 0x00B3 ³ (SUPERSCRIPT THREE) + 52 0x00B4 ´ (ACUTE ACCENT) + 53 0x00B5 µ (MICRO SIGN) + 54 0x00B6 ¶ (PILCROW SIGN) + 55 0x00B7 · (MIDDLE DOT) + 56 0x00B8 ¸ (CEDILLA) + 57 0x00B9 ¹ (SUPERSCRIPT ONE) + 58 0x061B ؛ (ARABIC SEMICOLON) + 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) + 60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER) + 61 0x00BD ½ (VULGAR FRACTION ONE HALF) + 62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS) + 63 0x061F ؟ (ARABIC QUESTION MARK) + 64 0x06C1 ہ (ARABIC LETTER HEH GOAL) + 65 0x0621 ء (ARABIC LETTER HAMZA) + 66 0x0622 آ (ARABIC LETTER ALEF WITH MADDA ABOVE) + 67 0x0623 أ (ARABIC LETTER ALEF WITH HAMZA ABOVE) + 68 0x0624 ؤ (ARABIC LETTER WAW WITH HAMZA ABOVE) + 69 0x0625 إ (ARABIC LETTER ALEF WITH HAMZA BELOW) + 70 0x0626 ئ (ARABIC LETTER YEH WITH HAMZA ABOVE) + 71 0x0627 ا (ARABIC LETTER ALEF) + 72 0x0628 ب (ARABIC LETTER BEH) + 73 0x0629 ة (ARABIC LETTER TEH MARBUTA) + 74 0x062A ت (ARABIC LETTER TEH) + 75 0x062B ث (ARABIC LETTER THEH) + 76 0x062C ج (ARABIC LETTER JEEM) + 77 0x062D ح (ARABIC LETTER HAH) + 78 0x062E خ (ARABIC LETTER KHAH) + 79 0x062F د (ARABIC LETTER DAL) + 80 0x0630 ذ (ARABIC LETTER THAL) + 81 0x0631 ر (ARABIC LETTER REH) + 82 0x0632 ز (ARABIC LETTER ZAIN) + 83 0x0633 س (ARABIC LETTER SEEN) + 84 0x0634 ش (ARABIC LETTER SHEEN) + 85 0x0635 ص (ARABIC LETTER SAD) + 86 0x0636 ض (ARABIC LETTER DAD) + 87 0x00D7 × (MULTIPLICATION SIGN) + 88 0x0637 ط (ARABIC LETTER TAH) + 89 0x0638 ظ (ARABIC LETTER ZAH) + 90 0x0639 ع (ARABIC LETTER AIN) + 91 0x063A غ (ARABIC LETTER GHAIN) + 92 0x0640 ـ (ARABIC TATWEEL) + 93 0x0641 ف (ARABIC LETTER FEH) + 94 0x0642 ق (ARABIC LETTER QAF) + 95 0x0643 ك (ARABIC LETTER KAF) + 96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE) + 97 0x0644 ل (ARABIC LETTER LAM) + 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) + 99 0x0645 م (ARABIC LETTER MEEM) +100 0x0646 ن (ARABIC LETTER NOON) +101 0x0647 ه (ARABIC LETTER HEH) +102 0x0648 و (ARABIC LETTER WAW) +103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) +104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE) +105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) +106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX) +107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) +108 0x0649 ى (ARABIC LETTER ALEF MAKSURA) +109 0x064A ي (ARABIC LETTER YEH) +110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) +111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) +112 0x064B ً (ARABIC FATHATAN) +113 0x064C ٌ (ARABIC DAMMATAN) +114 0x064D ٍ (ARABIC KASRATAN) +115 0x064E َ (ARABIC FATHA) +116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) +117 0x064F ُ (ARABIC DAMMA) +118 0x0650 ِ (ARABIC KASRA) +119 0x00F7 ÷ (DIVISION SIGN) +120 0x0651 ّ (ARABIC SHADDA) +121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE) +122 0x0652 ْ (ARABIC SUKUN) +123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) +124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) +125 0x200E ‎ (LEFT-TO-RIGHT MARK) +126 0x200F ‏ (RIGHT-TO-LEFT MARK) +127 0x06D2 ے (ARABIC LETTER YEH BARREE) diff --git a/sub_crates/text_encoding/encoding_tables/index-windows-1257.txt b/sub_crates/text_encoding/encoding_tables/index-windows-1257.txt new file mode 100644 index 0000000..da72914 --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-windows-1257.txt @@ -0,0 +1,132 @@ +# For details on index index-windows-1257.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: cc7256bdd10a5b8dc7fb6f994659f307dfcae60def9aa6c29d811f85e2842c47 +# Date: 2018-01-06 + + 0 0x20AC € (EURO SIGN) + 1 0x0081  () + 2 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) + 3 0x0083 ƒ () + 4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) + 5 0x2026 … (HORIZONTAL ELLIPSIS) + 6 0x2020 † (DAGGER) + 7 0x2021 ‡ (DOUBLE DAGGER) + 8 0x0088 ˆ () + 9 0x2030 ‰ (PER MILLE SIGN) + 10 0x008A Š () + 11 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) + 12 0x008C Œ () + 13 0x00A8 ¨ (DIAERESIS) + 14 0x02C7 ˇ (CARON) + 15 0x00B8 ¸ (CEDILLA) + 16 0x0090  () + 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) + 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) + 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) + 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) + 21 0x2022 • (BULLET) + 22 0x2013 – (EN DASH) + 23 0x2014 — (EM DASH) + 24 0x0098 ˜ () + 25 0x2122 ™ (TRADE MARK SIGN) + 26 0x009A š () + 27 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) + 28 0x009C œ () + 29 0x00AF ¯ (MACRON) + 30 0x02DB ˛ (OGONEK) + 31 0x009F Ÿ () + 32 0x00A0   (NO-BREAK SPACE) + 34 0x00A2 ¢ (CENT SIGN) + 35 0x00A3 £ (POUND SIGN) + 36 0x00A4 ¤ (CURRENCY SIGN) + 38 0x00A6 ¦ (BROKEN BAR) + 39 0x00A7 § (SECTION SIGN) + 40 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) + 41 0x00A9 © (COPYRIGHT SIGN) + 42 0x0156 Ŗ (LATIN CAPITAL LETTER R WITH CEDILLA) + 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) + 44 0x00AC ¬ (NOT SIGN) + 45 0x00AD ­ (SOFT HYPHEN) + 46 0x00AE ® (REGISTERED SIGN) + 47 0x00C6 Æ (LATIN CAPITAL LETTER AE) + 48 0x00B0 ° (DEGREE SIGN) + 49 0x00B1 ± (PLUS-MINUS SIGN) + 50 0x00B2 ² (SUPERSCRIPT TWO) + 51 0x00B3 ³ (SUPERSCRIPT THREE) + 52 0x00B4 ´ (ACUTE ACCENT) + 53 0x00B5 µ (MICRO SIGN) + 54 0x00B6 ¶ (PILCROW SIGN) + 55 0x00B7 · (MIDDLE DOT) + 56 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) + 57 0x00B9 ¹ (SUPERSCRIPT ONE) + 58 0x0157 ŗ (LATIN SMALL LETTER R WITH CEDILLA) + 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) + 60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER) + 61 0x00BD ½ (VULGAR FRACTION ONE HALF) + 62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS) + 63 0x00E6 æ (LATIN SMALL LETTER AE) + 64 0x0104 Ą (LATIN CAPITAL LETTER A WITH OGONEK) + 65 0x012E Į (LATIN CAPITAL LETTER I WITH OGONEK) + 66 0x0100 Ā (LATIN CAPITAL LETTER A WITH MACRON) + 67 0x0106 Ć (LATIN CAPITAL LETTER C WITH ACUTE) + 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) + 69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) + 70 0x0118 Ę (LATIN CAPITAL LETTER E WITH OGONEK) + 71 0x0112 Ē (LATIN CAPITAL LETTER E WITH MACRON) + 72 0x010C Č (LATIN CAPITAL LETTER C WITH CARON) + 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) + 74 0x0179 Ź (LATIN CAPITAL LETTER Z WITH ACUTE) + 75 0x0116 Ė (LATIN CAPITAL LETTER E WITH DOT ABOVE) + 76 0x0122 Ģ (LATIN CAPITAL LETTER G WITH CEDILLA) + 77 0x0136 Ķ (LATIN CAPITAL LETTER K WITH CEDILLA) + 78 0x012A Ī (LATIN CAPITAL LETTER I WITH MACRON) + 79 0x013B Ļ (LATIN CAPITAL LETTER L WITH CEDILLA) + 80 0x0160 Š (LATIN CAPITAL LETTER S WITH CARON) + 81 0x0143 Ń (LATIN CAPITAL LETTER N WITH ACUTE) + 82 0x0145 Ņ (LATIN CAPITAL LETTER N WITH CEDILLA) + 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) + 84 0x014C Ō (LATIN CAPITAL LETTER O WITH MACRON) + 85 0x00D5 Õ (LATIN CAPITAL LETTER O WITH TILDE) + 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) + 87 0x00D7 × (MULTIPLICATION SIGN) + 88 0x0172 Ų (LATIN CAPITAL LETTER U WITH OGONEK) + 89 0x0141 Ł (LATIN CAPITAL LETTER L WITH STROKE) + 90 0x015A Ś (LATIN CAPITAL LETTER S WITH ACUTE) + 91 0x016A Ū (LATIN CAPITAL LETTER U WITH MACRON) + 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) + 93 0x017B Ż (LATIN CAPITAL LETTER Z WITH DOT ABOVE) + 94 0x017D Ž (LATIN CAPITAL LETTER Z WITH CARON) + 95 0x00DF ß (LATIN SMALL LETTER SHARP S) + 96 0x0105 ą (LATIN SMALL LETTER A WITH OGONEK) + 97 0x012F į (LATIN SMALL LETTER I WITH OGONEK) + 98 0x0101 ā (LATIN SMALL LETTER A WITH MACRON) + 99 0x0107 ć (LATIN SMALL LETTER C WITH ACUTE) +100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) +101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) +102 0x0119 ę (LATIN SMALL LETTER E WITH OGONEK) +103 0x0113 ē (LATIN SMALL LETTER E WITH MACRON) +104 0x010D č (LATIN SMALL LETTER C WITH CARON) +105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) +106 0x017A ź (LATIN SMALL LETTER Z WITH ACUTE) +107 0x0117 ė (LATIN SMALL LETTER E WITH DOT ABOVE) +108 0x0123 ģ (LATIN SMALL LETTER G WITH CEDILLA) +109 0x0137 ķ (LATIN SMALL LETTER K WITH CEDILLA) +110 0x012B ī (LATIN SMALL LETTER I WITH MACRON) +111 0x013C ļ (LATIN SMALL LETTER L WITH CEDILLA) +112 0x0161 š (LATIN SMALL LETTER S WITH CARON) +113 0x0144 ń (LATIN SMALL LETTER N WITH ACUTE) +114 0x0146 ņ (LATIN SMALL LETTER N WITH CEDILLA) +115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) +116 0x014D ō (LATIN SMALL LETTER O WITH MACRON) +117 0x00F5 õ (LATIN SMALL LETTER O WITH TILDE) +118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) +119 0x00F7 ÷ (DIVISION SIGN) +120 0x0173 ų (LATIN SMALL LETTER U WITH OGONEK) +121 0x0142 ł (LATIN SMALL LETTER L WITH STROKE) +122 0x015B ś (LATIN SMALL LETTER S WITH ACUTE) +123 0x016B ū (LATIN SMALL LETTER U WITH MACRON) +124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) +125 0x017C ż (LATIN SMALL LETTER Z WITH DOT ABOVE) +126 0x017E ž (LATIN SMALL LETTER Z WITH CARON) +127 0x02D9 ˙ (DOT ABOVE) diff --git a/sub_crates/text_encoding/encoding_tables/index-windows-1258.txt b/sub_crates/text_encoding/encoding_tables/index-windows-1258.txt new file mode 100644 index 0000000..141a066 --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-windows-1258.txt @@ -0,0 +1,134 @@ +# For details on index index-windows-1258.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: 198bacedfcf24390e219240a7b776b6cec34cff070330b08a601a69c67f7eb24 +# Date: 2018-01-06 + + 0 0x20AC € (EURO SIGN) + 1 0x0081  () + 2 0x201A ‚ (SINGLE LOW-9 QUOTATION MARK) + 3 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK) + 4 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) + 5 0x2026 … (HORIZONTAL ELLIPSIS) + 6 0x2020 † (DAGGER) + 7 0x2021 ‡ (DOUBLE DAGGER) + 8 0x02C6 ˆ (MODIFIER LETTER CIRCUMFLEX ACCENT) + 9 0x2030 ‰ (PER MILLE SIGN) + 10 0x008A Š () + 11 0x2039 ‹ (SINGLE LEFT-POINTING ANGLE QUOTATION MARK) + 12 0x0152 Œ (LATIN CAPITAL LIGATURE OE) + 13 0x008D  () + 14 0x008E Ž () + 15 0x008F  () + 16 0x0090  () + 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) + 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) + 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) + 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) + 21 0x2022 • (BULLET) + 22 0x2013 – (EN DASH) + 23 0x2014 — (EM DASH) + 24 0x02DC ˜ (SMALL TILDE) + 25 0x2122 ™ (TRADE MARK SIGN) + 26 0x009A š () + 27 0x203A › (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK) + 28 0x0153 œ (LATIN SMALL LIGATURE OE) + 29 0x009D  () + 30 0x009E ž () + 31 0x0178 Ÿ (LATIN CAPITAL LETTER Y WITH DIAERESIS) + 32 0x00A0   (NO-BREAK SPACE) + 33 0x00A1 ¡ (INVERTED EXCLAMATION MARK) + 34 0x00A2 ¢ (CENT SIGN) + 35 0x00A3 £ (POUND SIGN) + 36 0x00A4 ¤ (CURRENCY SIGN) + 37 0x00A5 ¥ (YEN SIGN) + 38 0x00A6 ¦ (BROKEN BAR) + 39 0x00A7 § (SECTION SIGN) + 40 0x00A8 ¨ (DIAERESIS) + 41 0x00A9 © (COPYRIGHT SIGN) + 42 0x00AA ª (FEMININE ORDINAL INDICATOR) + 43 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) + 44 0x00AC ¬ (NOT SIGN) + 45 0x00AD ­ (SOFT HYPHEN) + 46 0x00AE ® (REGISTERED SIGN) + 47 0x00AF ¯ (MACRON) + 48 0x00B0 ° (DEGREE SIGN) + 49 0x00B1 ± (PLUS-MINUS SIGN) + 50 0x00B2 ² (SUPERSCRIPT TWO) + 51 0x00B3 ³ (SUPERSCRIPT THREE) + 52 0x00B4 ´ (ACUTE ACCENT) + 53 0x00B5 µ (MICRO SIGN) + 54 0x00B6 ¶ (PILCROW SIGN) + 55 0x00B7 · (MIDDLE DOT) + 56 0x00B8 ¸ (CEDILLA) + 57 0x00B9 ¹ (SUPERSCRIPT ONE) + 58 0x00BA º (MASCULINE ORDINAL INDICATOR) + 59 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) + 60 0x00BC ¼ (VULGAR FRACTION ONE QUARTER) + 61 0x00BD ½ (VULGAR FRACTION ONE HALF) + 62 0x00BE ¾ (VULGAR FRACTION THREE QUARTERS) + 63 0x00BF ¿ (INVERTED QUESTION MARK) + 64 0x00C0 À (LATIN CAPITAL LETTER A WITH GRAVE) + 65 0x00C1 Á (LATIN CAPITAL LETTER A WITH ACUTE) + 66 0x00C2  (LATIN CAPITAL LETTER A WITH CIRCUMFLEX) + 67 0x0102 Ă (LATIN CAPITAL LETTER A WITH BREVE) + 68 0x00C4 Ä (LATIN CAPITAL LETTER A WITH DIAERESIS) + 69 0x00C5 Å (LATIN CAPITAL LETTER A WITH RING ABOVE) + 70 0x00C6 Æ (LATIN CAPITAL LETTER AE) + 71 0x00C7 Ç (LATIN CAPITAL LETTER C WITH CEDILLA) + 72 0x00C8 È (LATIN CAPITAL LETTER E WITH GRAVE) + 73 0x00C9 É (LATIN CAPITAL LETTER E WITH ACUTE) + 74 0x00CA Ê (LATIN CAPITAL LETTER E WITH CIRCUMFLEX) + 75 0x00CB Ë (LATIN CAPITAL LETTER E WITH DIAERESIS) + 76 0x0300 ̀ (COMBINING GRAVE ACCENT) + 77 0x00CD Í (LATIN CAPITAL LETTER I WITH ACUTE) + 78 0x00CE Î (LATIN CAPITAL LETTER I WITH CIRCUMFLEX) + 79 0x00CF Ï (LATIN CAPITAL LETTER I WITH DIAERESIS) + 80 0x0110 Đ (LATIN CAPITAL LETTER D WITH STROKE) + 81 0x00D1 Ñ (LATIN CAPITAL LETTER N WITH TILDE) + 82 0x0309 ̉ (COMBINING HOOK ABOVE) + 83 0x00D3 Ó (LATIN CAPITAL LETTER O WITH ACUTE) + 84 0x00D4 Ô (LATIN CAPITAL LETTER O WITH CIRCUMFLEX) + 85 0x01A0 Ơ (LATIN CAPITAL LETTER O WITH HORN) + 86 0x00D6 Ö (LATIN CAPITAL LETTER O WITH DIAERESIS) + 87 0x00D7 × (MULTIPLICATION SIGN) + 88 0x00D8 Ø (LATIN CAPITAL LETTER O WITH STROKE) + 89 0x00D9 Ù (LATIN CAPITAL LETTER U WITH GRAVE) + 90 0x00DA Ú (LATIN CAPITAL LETTER U WITH ACUTE) + 91 0x00DB Û (LATIN CAPITAL LETTER U WITH CIRCUMFLEX) + 92 0x00DC Ü (LATIN CAPITAL LETTER U WITH DIAERESIS) + 93 0x01AF Ư (LATIN CAPITAL LETTER U WITH HORN) + 94 0x0303 ̃ (COMBINING TILDE) + 95 0x00DF ß (LATIN SMALL LETTER SHARP S) + 96 0x00E0 à (LATIN SMALL LETTER A WITH GRAVE) + 97 0x00E1 á (LATIN SMALL LETTER A WITH ACUTE) + 98 0x00E2 â (LATIN SMALL LETTER A WITH CIRCUMFLEX) + 99 0x0103 ă (LATIN SMALL LETTER A WITH BREVE) +100 0x00E4 ä (LATIN SMALL LETTER A WITH DIAERESIS) +101 0x00E5 å (LATIN SMALL LETTER A WITH RING ABOVE) +102 0x00E6 æ (LATIN SMALL LETTER AE) +103 0x00E7 ç (LATIN SMALL LETTER C WITH CEDILLA) +104 0x00E8 è (LATIN SMALL LETTER E WITH GRAVE) +105 0x00E9 é (LATIN SMALL LETTER E WITH ACUTE) +106 0x00EA ê (LATIN SMALL LETTER E WITH CIRCUMFLEX) +107 0x00EB ë (LATIN SMALL LETTER E WITH DIAERESIS) +108 0x0301 ́ (COMBINING ACUTE ACCENT) +109 0x00ED í (LATIN SMALL LETTER I WITH ACUTE) +110 0x00EE î (LATIN SMALL LETTER I WITH CIRCUMFLEX) +111 0x00EF ï (LATIN SMALL LETTER I WITH DIAERESIS) +112 0x0111 đ (LATIN SMALL LETTER D WITH STROKE) +113 0x00F1 ñ (LATIN SMALL LETTER N WITH TILDE) +114 0x0323 ̣ (COMBINING DOT BELOW) +115 0x00F3 ó (LATIN SMALL LETTER O WITH ACUTE) +116 0x00F4 ô (LATIN SMALL LETTER O WITH CIRCUMFLEX) +117 0x01A1 ơ (LATIN SMALL LETTER O WITH HORN) +118 0x00F6 ö (LATIN SMALL LETTER O WITH DIAERESIS) +119 0x00F7 ÷ (DIVISION SIGN) +120 0x00F8 ø (LATIN SMALL LETTER O WITH STROKE) +121 0x00F9 ù (LATIN SMALL LETTER U WITH GRAVE) +122 0x00FA ú (LATIN SMALL LETTER U WITH ACUTE) +123 0x00FB û (LATIN SMALL LETTER U WITH CIRCUMFLEX) +124 0x00FC ü (LATIN SMALL LETTER U WITH DIAERESIS) +125 0x01B0 ư (LATIN SMALL LETTER U WITH HORN) +126 0x20AB ₫ (DONG SIGN) +127 0x00FF ÿ (LATIN SMALL LETTER Y WITH DIAERESIS) diff --git a/sub_crates/text_encoding/encoding_tables/index-windows-874.txt b/sub_crates/text_encoding/encoding_tables/index-windows-874.txt new file mode 100644 index 0000000..21db6df --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-windows-874.txt @@ -0,0 +1,126 @@ +# For details on index index-windows-874.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: b416583ce125e38474381b31b401a98b19ecf2e57e0998e78a1e18b14894905d +# Date: 2018-01-06 + + 0 0x20AC € (EURO SIGN) + 1 0x0081  () + 2 0x0082 ‚ () + 3 0x0083 ƒ () + 4 0x0084 „ () + 5 0x2026 … (HORIZONTAL ELLIPSIS) + 6 0x0086 † () + 7 0x0087 ‡ () + 8 0x0088 ˆ () + 9 0x0089 ‰ () + 10 0x008A Š () + 11 0x008B ‹ () + 12 0x008C Œ () + 13 0x008D  () + 14 0x008E Ž () + 15 0x008F  () + 16 0x0090  () + 17 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) + 18 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) + 19 0x201C “ (LEFT DOUBLE QUOTATION MARK) + 20 0x201D ” (RIGHT DOUBLE QUOTATION MARK) + 21 0x2022 • (BULLET) + 22 0x2013 – (EN DASH) + 23 0x2014 — (EM DASH) + 24 0x0098 ˜ () + 25 0x0099 ™ () + 26 0x009A š () + 27 0x009B › () + 28 0x009C œ () + 29 0x009D  () + 30 0x009E ž () + 31 0x009F Ÿ () + 32 0x00A0   (NO-BREAK SPACE) + 33 0x0E01 ก (THAI CHARACTER KO KAI) + 34 0x0E02 ข (THAI CHARACTER KHO KHAI) + 35 0x0E03 ฃ (THAI CHARACTER KHO KHUAT) + 36 0x0E04 ค (THAI CHARACTER KHO KHWAI) + 37 0x0E05 ฅ (THAI CHARACTER KHO KHON) + 38 0x0E06 ฆ (THAI CHARACTER KHO RAKHANG) + 39 0x0E07 ง (THAI CHARACTER NGO NGU) + 40 0x0E08 จ (THAI CHARACTER CHO CHAN) + 41 0x0E09 ฉ (THAI CHARACTER CHO CHING) + 42 0x0E0A ช (THAI CHARACTER CHO CHANG) + 43 0x0E0B ซ (THAI CHARACTER SO SO) + 44 0x0E0C ฌ (THAI CHARACTER CHO CHOE) + 45 0x0E0D ญ (THAI CHARACTER YO YING) + 46 0x0E0E ฎ (THAI CHARACTER DO CHADA) + 47 0x0E0F ฏ (THAI CHARACTER TO PATAK) + 48 0x0E10 ฐ (THAI CHARACTER THO THAN) + 49 0x0E11 ฑ (THAI CHARACTER THO NANGMONTHO) + 50 0x0E12 ฒ (THAI CHARACTER THO PHUTHAO) + 51 0x0E13 ณ (THAI CHARACTER NO NEN) + 52 0x0E14 ด (THAI CHARACTER DO DEK) + 53 0x0E15 ต (THAI CHARACTER TO TAO) + 54 0x0E16 ถ (THAI CHARACTER THO THUNG) + 55 0x0E17 ท (THAI CHARACTER THO THAHAN) + 56 0x0E18 ธ (THAI CHARACTER THO THONG) + 57 0x0E19 น (THAI CHARACTER NO NU) + 58 0x0E1A บ (THAI CHARACTER BO BAIMAI) + 59 0x0E1B ป (THAI CHARACTER PO PLA) + 60 0x0E1C ผ (THAI CHARACTER PHO PHUNG) + 61 0x0E1D ฝ (THAI CHARACTER FO FA) + 62 0x0E1E พ (THAI CHARACTER PHO PHAN) + 63 0x0E1F ฟ (THAI CHARACTER FO FAN) + 64 0x0E20 ภ (THAI CHARACTER PHO SAMPHAO) + 65 0x0E21 ม (THAI CHARACTER MO MA) + 66 0x0E22 ย (THAI CHARACTER YO YAK) + 67 0x0E23 ร (THAI CHARACTER RO RUA) + 68 0x0E24 ฤ (THAI CHARACTER RU) + 69 0x0E25 ล (THAI CHARACTER LO LING) + 70 0x0E26 ฦ (THAI CHARACTER LU) + 71 0x0E27 ว (THAI CHARACTER WO WAEN) + 72 0x0E28 ศ (THAI CHARACTER SO SALA) + 73 0x0E29 ษ (THAI CHARACTER SO RUSI) + 74 0x0E2A ส (THAI CHARACTER SO SUA) + 75 0x0E2B ห (THAI CHARACTER HO HIP) + 76 0x0E2C ฬ (THAI CHARACTER LO CHULA) + 77 0x0E2D อ (THAI CHARACTER O ANG) + 78 0x0E2E ฮ (THAI CHARACTER HO NOKHUK) + 79 0x0E2F ฯ (THAI CHARACTER PAIYANNOI) + 80 0x0E30 ะ (THAI CHARACTER SARA A) + 81 0x0E31 ั (THAI CHARACTER MAI HAN-AKAT) + 82 0x0E32 า (THAI CHARACTER SARA AA) + 83 0x0E33 ำ (THAI CHARACTER SARA AM) + 84 0x0E34 ิ (THAI CHARACTER SARA I) + 85 0x0E35 ี (THAI CHARACTER SARA II) + 86 0x0E36 ึ (THAI CHARACTER SARA UE) + 87 0x0E37 ื (THAI CHARACTER SARA UEE) + 88 0x0E38 ุ (THAI CHARACTER SARA U) + 89 0x0E39 ู (THAI CHARACTER SARA UU) + 90 0x0E3A ฺ (THAI CHARACTER PHINTHU) + 95 0x0E3F ฿ (THAI CURRENCY SYMBOL BAHT) + 96 0x0E40 เ (THAI CHARACTER SARA E) + 97 0x0E41 แ (THAI CHARACTER SARA AE) + 98 0x0E42 โ (THAI CHARACTER SARA O) + 99 0x0E43 ใ (THAI CHARACTER SARA AI MAIMUAN) +100 0x0E44 ไ (THAI CHARACTER SARA AI MAIMALAI) +101 0x0E45 ๅ (THAI CHARACTER LAKKHANGYAO) +102 0x0E46 ๆ (THAI CHARACTER MAIYAMOK) +103 0x0E47 ็ (THAI CHARACTER MAITAIKHU) +104 0x0E48 ่ (THAI CHARACTER MAI EK) +105 0x0E49 ้ (THAI CHARACTER MAI THO) +106 0x0E4A ๊ (THAI CHARACTER MAI TRI) +107 0x0E4B ๋ (THAI CHARACTER MAI CHATTAWA) +108 0x0E4C ์ (THAI CHARACTER THANTHAKHAT) +109 0x0E4D ํ (THAI CHARACTER NIKHAHIT) +110 0x0E4E ๎ (THAI CHARACTER YAMAKKAN) +111 0x0E4F ๏ (THAI CHARACTER FONGMAN) +112 0x0E50 ๐ (THAI DIGIT ZERO) +113 0x0E51 ๑ (THAI DIGIT ONE) +114 0x0E52 ๒ (THAI DIGIT TWO) +115 0x0E53 ๓ (THAI DIGIT THREE) +116 0x0E54 ๔ (THAI DIGIT FOUR) +117 0x0E55 ๕ (THAI DIGIT FIVE) +118 0x0E56 ๖ (THAI DIGIT SIX) +119 0x0E57 ๗ (THAI DIGIT SEVEN) +120 0x0E58 ๘ (THAI DIGIT EIGHT) +121 0x0E59 ๙ (THAI DIGIT NINE) +122 0x0E5A ๚ (THAI CHARACTER ANGKHANKHU) +123 0x0E5B ๛ (THAI CHARACTER KHOMUT) diff --git a/sub_crates/text_encoding/encoding_tables/index-x-mac-cyrillic.txt b/sub_crates/text_encoding/encoding_tables/index-x-mac-cyrillic.txt new file mode 100644 index 0000000..de05e25 --- /dev/null +++ b/sub_crates/text_encoding/encoding_tables/index-x-mac-cyrillic.txt @@ -0,0 +1,134 @@ +# For details on index index-x-mac-cyrillic.txt see the Encoding Standard +# https://encoding.spec.whatwg.org/ +# +# Identifier: 73e8e7642c6fa9de29d42819b47fba55b58666fb1e339faeb4a89a0bd7c24d43 +# Date: 2018-01-06 + + 0 0x0410 А (CYRILLIC CAPITAL LETTER A) + 1 0x0411 Б (CYRILLIC CAPITAL LETTER BE) + 2 0x0412 В (CYRILLIC CAPITAL LETTER VE) + 3 0x0413 Г (CYRILLIC CAPITAL LETTER GHE) + 4 0x0414 Д (CYRILLIC CAPITAL LETTER DE) + 5 0x0415 Е (CYRILLIC CAPITAL LETTER IE) + 6 0x0416 Ж (CYRILLIC CAPITAL LETTER ZHE) + 7 0x0417 З (CYRILLIC CAPITAL LETTER ZE) + 8 0x0418 И (CYRILLIC CAPITAL LETTER I) + 9 0x0419 Й (CYRILLIC CAPITAL LETTER SHORT I) + 10 0x041A К (CYRILLIC CAPITAL LETTER KA) + 11 0x041B Л (CYRILLIC CAPITAL LETTER EL) + 12 0x041C М (CYRILLIC CAPITAL LETTER EM) + 13 0x041D Н (CYRILLIC CAPITAL LETTER EN) + 14 0x041E О (CYRILLIC CAPITAL LETTER O) + 15 0x041F П (CYRILLIC CAPITAL LETTER PE) + 16 0x0420 Р (CYRILLIC CAPITAL LETTER ER) + 17 0x0421 С (CYRILLIC CAPITAL LETTER ES) + 18 0x0422 Т (CYRILLIC CAPITAL LETTER TE) + 19 0x0423 У (CYRILLIC CAPITAL LETTER U) + 20 0x0424 Ф (CYRILLIC CAPITAL LETTER EF) + 21 0x0425 Х (CYRILLIC CAPITAL LETTER HA) + 22 0x0426 Ц (CYRILLIC CAPITAL LETTER TSE) + 23 0x0427 Ч (CYRILLIC CAPITAL LETTER CHE) + 24 0x0428 Ш (CYRILLIC CAPITAL LETTER SHA) + 25 0x0429 Щ (CYRILLIC CAPITAL LETTER SHCHA) + 26 0x042A Ъ (CYRILLIC CAPITAL LETTER HARD SIGN) + 27 0x042B Ы (CYRILLIC CAPITAL LETTER YERU) + 28 0x042C Ь (CYRILLIC CAPITAL LETTER SOFT SIGN) + 29 0x042D Э (CYRILLIC CAPITAL LETTER E) + 30 0x042E Ю (CYRILLIC CAPITAL LETTER YU) + 31 0x042F Я (CYRILLIC CAPITAL LETTER YA) + 32 0x2020 † (DAGGER) + 33 0x00B0 ° (DEGREE SIGN) + 34 0x0490 Ґ (CYRILLIC CAPITAL LETTER GHE WITH UPTURN) + 35 0x00A3 £ (POUND SIGN) + 36 0x00A7 § (SECTION SIGN) + 37 0x2022 • (BULLET) + 38 0x00B6 ¶ (PILCROW SIGN) + 39 0x0406 І (CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I) + 40 0x00AE ® (REGISTERED SIGN) + 41 0x00A9 © (COPYRIGHT SIGN) + 42 0x2122 ™ (TRADE MARK SIGN) + 43 0x0402 Ђ (CYRILLIC CAPITAL LETTER DJE) + 44 0x0452 ђ (CYRILLIC SMALL LETTER DJE) + 45 0x2260 ≠ (NOT EQUAL TO) + 46 0x0403 Ѓ (CYRILLIC CAPITAL LETTER GJE) + 47 0x0453 ѓ (CYRILLIC SMALL LETTER GJE) + 48 0x221E ∞ (INFINITY) + 49 0x00B1 ± (PLUS-MINUS SIGN) + 50 0x2264 ≤ (LESS-THAN OR EQUAL TO) + 51 0x2265 ≥ (GREATER-THAN OR EQUAL TO) + 52 0x0456 і (CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I) + 53 0x00B5 µ (MICRO SIGN) + 54 0x0491 ґ (CYRILLIC SMALL LETTER GHE WITH UPTURN) + 55 0x0408 Ј (CYRILLIC CAPITAL LETTER JE) + 56 0x0404 Є (CYRILLIC CAPITAL LETTER UKRAINIAN IE) + 57 0x0454 є (CYRILLIC SMALL LETTER UKRAINIAN IE) + 58 0x0407 Ї (CYRILLIC CAPITAL LETTER YI) + 59 0x0457 ї (CYRILLIC SMALL LETTER YI) + 60 0x0409 Љ (CYRILLIC CAPITAL LETTER LJE) + 61 0x0459 љ (CYRILLIC SMALL LETTER LJE) + 62 0x040A Њ (CYRILLIC CAPITAL LETTER NJE) + 63 0x045A њ (CYRILLIC SMALL LETTER NJE) + 64 0x0458 ј (CYRILLIC SMALL LETTER JE) + 65 0x0405 Ѕ (CYRILLIC CAPITAL LETTER DZE) + 66 0x00AC ¬ (NOT SIGN) + 67 0x221A √ (SQUARE ROOT) + 68 0x0192 ƒ (LATIN SMALL LETTER F WITH HOOK) + 69 0x2248 ≈ (ALMOST EQUAL TO) + 70 0x2206 ∆ (INCREMENT) + 71 0x00AB « (LEFT-POINTING DOUBLE ANGLE QUOTATION MARK) + 72 0x00BB » (RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK) + 73 0x2026 … (HORIZONTAL ELLIPSIS) + 74 0x00A0   (NO-BREAK SPACE) + 75 0x040B Ћ (CYRILLIC CAPITAL LETTER TSHE) + 76 0x045B ћ (CYRILLIC SMALL LETTER TSHE) + 77 0x040C Ќ (CYRILLIC CAPITAL LETTER KJE) + 78 0x045C ќ (CYRILLIC SMALL LETTER KJE) + 79 0x0455 ѕ (CYRILLIC SMALL LETTER DZE) + 80 0x2013 – (EN DASH) + 81 0x2014 — (EM DASH) + 82 0x201C “ (LEFT DOUBLE QUOTATION MARK) + 83 0x201D ” (RIGHT DOUBLE QUOTATION MARK) + 84 0x2018 ‘ (LEFT SINGLE QUOTATION MARK) + 85 0x2019 ’ (RIGHT SINGLE QUOTATION MARK) + 86 0x00F7 ÷ (DIVISION SIGN) + 87 0x201E „ (DOUBLE LOW-9 QUOTATION MARK) + 88 0x040E Ў (CYRILLIC CAPITAL LETTER SHORT U) + 89 0x045E ў (CYRILLIC SMALL LETTER SHORT U) + 90 0x040F Џ (CYRILLIC CAPITAL LETTER DZHE) + 91 0x045F џ (CYRILLIC SMALL LETTER DZHE) + 92 0x2116 № (NUMERO SIGN) + 93 0x0401 Ё (CYRILLIC CAPITAL LETTER IO) + 94 0x0451 ё (CYRILLIC SMALL LETTER IO) + 95 0x044F я (CYRILLIC SMALL LETTER YA) + 96 0x0430 а (CYRILLIC SMALL LETTER A) + 97 0x0431 б (CYRILLIC SMALL LETTER BE) + 98 0x0432 в (CYRILLIC SMALL LETTER VE) + 99 0x0433 г (CYRILLIC SMALL LETTER GHE) +100 0x0434 д (CYRILLIC SMALL LETTER DE) +101 0x0435 е (CYRILLIC SMALL LETTER IE) +102 0x0436 ж (CYRILLIC SMALL LETTER ZHE) +103 0x0437 з (CYRILLIC SMALL LETTER ZE) +104 0x0438 и (CYRILLIC SMALL LETTER I) +105 0x0439 й (CYRILLIC SMALL LETTER SHORT I) +106 0x043A к (CYRILLIC SMALL LETTER KA) +107 0x043B л (CYRILLIC SMALL LETTER EL) +108 0x043C м (CYRILLIC SMALL LETTER EM) +109 0x043D н (CYRILLIC SMALL LETTER EN) +110 0x043E о (CYRILLIC SMALL LETTER O) +111 0x043F п (CYRILLIC SMALL LETTER PE) +112 0x0440 р (CYRILLIC SMALL LETTER ER) +113 0x0441 с (CYRILLIC SMALL LETTER ES) +114 0x0442 т (CYRILLIC SMALL LETTER TE) +115 0x0443 у (CYRILLIC SMALL LETTER U) +116 0x0444 ф (CYRILLIC SMALL LETTER EF) +117 0x0445 х (CYRILLIC SMALL LETTER HA) +118 0x0446 ц (CYRILLIC SMALL LETTER TSE) +119 0x0447 ч (CYRILLIC SMALL LETTER CHE) +120 0x0448 ш (CYRILLIC SMALL LETTER SHA) +121 0x0449 щ (CYRILLIC SMALL LETTER SHCHA) +122 0x044A ъ (CYRILLIC SMALL LETTER HARD SIGN) +123 0x044B ы (CYRILLIC SMALL LETTER YERU) +124 0x044C ь (CYRILLIC SMALL LETTER SOFT SIGN) +125 0x044D э (CYRILLIC SMALL LETTER E) +126 0x044E ю (CYRILLIC SMALL LETTER YU) +127 0x20AC € (EURO SIGN) diff --git a/sub_crates/text_encoding/src/lib.rs b/sub_crates/text_encoding/src/lib.rs index 0259916..a27df83 100644 --- a/sub_crates/text_encoding/src/lib.rs +++ b/sub_crates/text_encoding/src/lib.rs @@ -4,6 +4,7 @@ //! text encodings. mod latin1; +mod single_byte; mod utf16_be; mod utf16_le; mod utf32_be; @@ -12,6 +13,8 @@ mod utf8; mod utils; mod windows1252; +use single_byte::{ibm866, iso_8859_2}; + /// Encodes text from utf8 to a destination encoding. pub fn encode_from_str<'a>( output_encoding: Encoding, @@ -24,7 +27,9 @@ pub fn encode_from_str<'a>( Encoding::Utf16LE => utf16_le::encode_from_str(input, output), Encoding::Utf32BE => utf32_be::encode_from_str(input, output), Encoding::Utf32LE => utf32_le::encode_from_str(input, output), + Encoding::IBM866 => ibm866::encode_from_str(input, output), Encoding::Latin1 => latin1::encode_from_str(input, output), + Encoding::ISO8859_2 => iso_8859_2::encode_from_str(input, output), Encoding::Windows1252 => windows1252::encode_from_str(input, output), } } @@ -41,7 +46,9 @@ pub fn decode_to_str<'a>( Encoding::Utf16LE => utf16_le::decode_to_str(input, output), Encoding::Utf32BE => utf32_be::decode_to_str(input, output), Encoding::Utf32LE => utf32_le::decode_to_str(input, output), + Encoding::IBM866 => ibm866::decode_to_str(input, output), Encoding::Latin1 => latin1::decode_to_str(input, output), + Encoding::ISO8859_2 => iso_8859_2::decode_to_str(input, output), Encoding::Windows1252 => windows1252::decode_to_str(input, output), } } @@ -57,7 +64,9 @@ pub enum Encoding { // ShiftJIS, // EUC_JP, // Big5, + IBM866, // IBM 866 Latin1, // ISO/IEC 8859-1 + ISO8859_2, // ISO/IEC 8859-2 Windows1252, // Windows code page 1252 } diff --git a/sub_crates/text_encoding/src/single_byte.rs b/sub_crates/text_encoding/src/single_byte.rs new file mode 100644 index 0000000..85973ba --- /dev/null +++ b/sub_crates/text_encoding/src/single_byte.rs @@ -0,0 +1,226 @@ +//! Single byte encodings that extend ascii. Their code is auto-generated +//! by build.rs + +use core; +use {DecodeError, DecodeResult, EncodeError, EncodeResult}; + +pub mod ibm866 { + // Generated by build.rs + include!(concat!(env!("OUT_DIR"), "/ibm866.rs")); +} + +pub mod iso_8859_2 { + // Generated by build.rs + include!(concat!(env!("OUT_DIR"), "/iso-8859-2.rs")); +} + +// pub mod iso_8859_3 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/iso-8859-3.rs")); +// } + +// pub mod iso_8859_4 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/iso-8859-4.rs")); +// } + +// pub mod iso_8859_5 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/iso-8859-5.rs")); +// } + +// pub mod iso_8859_6 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/iso-8859-6.rs")); +// } + +// pub mod iso_8859_7 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/iso-8859-7.rs")); +// } + +// pub mod iso_8859_8 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/iso-8859-8.rs")); +// } + +// pub mod iso_8859_10 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/iso-8859-10.rs")); +// } + +// pub mod iso_8859_13 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/iso-8859-13.rs")); +// } + +// pub mod iso_8859_14 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/iso-8859-14.rs")); +// } + +// pub mod iso_8859_15 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/iso-8859-15.rs")); +// } + +// pub mod iso_8859_16 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/iso-8859-16.rs")); +// } + +// pub mod koi8_r { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/koi8-r.rs")); +// } + +// pub mod koi8_u { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/koi8-u.rs")); +// } + +// pub mod macintosh { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/macintosh.rs")); +// } + +// pub mod windows874 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/windows-874.rs")); +// } + +// pub mod windows1250 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/windows-1250.rs")); +// } + +// pub mod windows1251 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/windows-1251.rs")); +// } + +// pub mod windows1252 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/windows-1252.rs")); +// } + +// pub mod windows1253 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/windows-1253.rs")); +// } + +// pub mod windows1254 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/windows-1254.rs")); +// } + +// pub mod windows1255 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/windows-1255.rs")); +// } + +// pub mod windows1256 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/windows-1256.rs")); +// } + +// pub mod windows1257 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/windows-1257.rs")); +// } + +// pub mod windows1258 { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/windows-1258.rs")); +// } + +// pub mod x_mac_cyrillic { +// // Generated by build.rs +// include!(concat!(env!("OUT_DIR"), "/x-mac-cyrillic.rs")); +// } + +/// This is shared among the single byte encoders, and is shallowly +/// wrapped in each of their modules. +#[inline] +fn single_byte_encode_from_str<'a>( + table: &[(char, u8)], + input: &str, + output: &'a mut [u8], +) -> EncodeResult<'a> { + // Do the encode. + let mut input_i = 0; + let mut output_i = 0; + for (offset, c) in input.char_indices() { + if output_i >= output.len() { + break; + } + if let Ok(i) = table.binary_search_by_key(&c, |x| x.0) { + output[output_i] = table[i].1; + output_i += 1; + input_i = offset + 1; + } else { + return Err(EncodeError { + character: c, + error_range: (offset, offset + c.len_utf8()), + output_bytes_written: output_i, + }); + } + } + + // Calculate how much of the input was consumed. + if input_i > input.len() { + input_i = input.len(); + } else { + while !input.is_char_boundary(input_i) { + input_i += 1; + } + } + + Ok((input_i, &output[..output_i])) +} + +/// This is shared among the single byte decoders, and is shallowly +/// wrapped in each of their modules. +#[inline] +fn single_byte_decode_to_str<'a>( + table: &[char; 128], + input: &[u8], + output: &'a mut [u8], +) -> DecodeResult<'a> { + let mut input_i = 0; + let mut output_i = 0; + for &byte in input.iter() { + if byte < 0x80 { + // 1-byte case + if output_i >= output.len() { + break; + } + output[output_i] = byte; + input_i += 1; + output_i += 1; + } else { + // Use lookup table. + let code = table[byte as usize - 0x80]; + if code == '�' { + // Error: undefined byte. + return Err(DecodeError { + error_range: (input_i, input_i + 1), + output_bytes_written: output_i, + }); + } + // Encode to utf8 + let mut buf = [0u8; 4]; + let s = code.encode_utf8(&mut buf); + if (output_i + s.len()) > output.len() { + break; + } + output[output_i..(output_i + s.len())].copy_from_slice(s.as_bytes()); + input_i += 1; + output_i += s.len(); + } + } + + Ok((input_i, unsafe { + core::str::from_utf8_unchecked(&output[..output_i]) + })) +}