Use lz4 for the in-executable dictionary data.
It's way faster to decompress, and is stills small enough.
This commit is contained in:
parent
1d64afe430
commit
8df226190b
21
Cargo.lock
generated
21
Cargo.lock
generated
|
@ -182,9 +182,9 @@ dependencies = [
|
|||
name = "furigana_gen"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"lz4_flex",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"ruzstd",
|
||||
"vibrato",
|
||||
]
|
||||
|
||||
|
@ -256,6 +256,15 @@ dependencies = [
|
|||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lz4_flex"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5"
|
||||
dependencies = [
|
||||
"twox-hash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.7.4"
|
||||
|
@ -441,16 +450,6 @@ version = "1.0.17"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6"
|
||||
|
||||
[[package]]
|
||||
name = "ruzstd"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1c8b8f3d26bd9f945e5cbae77f7cdfbf37af9a66956f1115eb4516e45df519f4"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"twox-hash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.18"
|
||||
|
|
|
@ -7,6 +7,6 @@ edition = "2021"
|
|||
|
||||
[dependencies]
|
||||
vibrato = "0.5"
|
||||
ruzstd = "0.7"
|
||||
lz4_flex = "0.11"
|
||||
regex = "1.10"
|
||||
once_cell = "1.19"
|
||||
|
|
BIN
dictionary/system.dic
Normal file
BIN
dictionary/system.dic
Normal file
Binary file not shown.
BIN
dictionary/system.dic.lz4
Normal file
BIN
dictionary/system.dic.lz4
Normal file
Binary file not shown.
BIN
dictionary/system.dic.xz
Normal file
BIN
dictionary/system.dic.xz
Normal file
Binary file not shown.
Binary file not shown.
14
src/main.rs
14
src/main.rs
|
@ -3,17 +3,23 @@ use std::{
|
|||
io::{Cursor, Read},
|
||||
};
|
||||
|
||||
use lz4_flex::frame::FrameDecoder;
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use ruzstd::StreamingDecoder;
|
||||
use vibrato::{Dictionary, Tokenizer};
|
||||
|
||||
const DICT: &[u8] = include_bytes!("../dictionary/system.dic.zst");
|
||||
const DICT: &[u8] = include_bytes!("../dictionary/system.dic.lz4");
|
||||
|
||||
fn main() {
|
||||
let dict = {
|
||||
let decoder = StreamingDecoder::new(Cursor::new(DICT)).unwrap();
|
||||
Dictionary::read(decoder).unwrap()
|
||||
// Note: we could just pass the decoder straight to `Dictionary::read()`
|
||||
// below, and it would work. However, that ends up being slower than
|
||||
// first decompressing the whole thing ahead of time.
|
||||
let mut decoder = FrameDecoder::new(Cursor::new(DICT));
|
||||
let mut data = Vec::new();
|
||||
decoder.read_to_end(&mut data).unwrap();
|
||||
|
||||
Dictionary::read(Cursor::new(&data)).unwrap()
|
||||
};
|
||||
|
||||
let text = {
|
||||
|
|
Loading…
Reference in New Issue
Block a user