Add option to include pitch accent information with the furigana

This commit is contained in:
Nathan Vegdahl 2024-09-18 12:10:22 +02:00
parent 7361240e49
commit adb58983a7
11 changed files with 124435 additions and 83 deletions

View File

@ -29,11 +29,11 @@ fn main() {
f.write_all("\n];".as_bytes()).unwrap(); f.write_all("\n];".as_bytes()).unwrap();
} }
// Write compressed dictionary to .lz4 file. // Write compressed parsing dictionary to .lz4 file.
{ {
// Read and decompress file from .xz. // Read and decompress file from .xz.
let dict_data = { let dict_data = {
let f = File::open("data/dictionary/system.dic.xz").unwrap(); let f = File::open("data/ipadic-mecab-2_7_0/system.dic.xz").unwrap();
let mut data = Vec::new(); let mut data = Vec::new();
lzma_rs::xz_decompress(&mut BufReader::new(f), &mut data).unwrap(); lzma_rs::xz_decompress(&mut BufReader::new(f), &mut data).unwrap();
@ -47,4 +47,23 @@ fn main() {
encoder.write(&dict_data).unwrap(); encoder.write(&dict_data).unwrap();
encoder.finish().unwrap(); encoder.finish().unwrap();
} }
// Write compressed pitch accent dictionary to .lz4 file.
{
// Read and decompress file from .xz.
let dict_data = {
let f = File::open("data/accents.tsv.xz").unwrap();
let mut data = Vec::new();
lzma_rs::xz_decompress(&mut BufReader::new(f), &mut data).unwrap();
data
};
// Recompress to .lz4.
let dest_path = Path::new(&out_dir).join("accents.tsv.lz4");
let f = File::create(dest_path).unwrap();
let mut encoder = lz4_flex::frame::FrameEncoder::new(f);
encoder.write(&dict_data).unwrap();
encoder.finish().unwrap();
}
} }

124138
data/accents.tsv Normal file

File diff suppressed because it is too large Load Diff

BIN
data/accents.tsv.xz Normal file

Binary file not shown.

View File

@ -1,31 +0,0 @@
Copyright (c) 2011-2021, The UniDic Consortium
Copyright (c) 2023, LegalOn Technologies, Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the
distribution.
* Neither the name of the UniDic Consortium nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,7 +0,0 @@
This software includes a binary version of data from
https://clrd.ninjal.ac.jp/unidic_archive/cwj/3.1.1/unidic-cwj-3.1.1-full.zip
where the costs and connection ids are retrained using CORE data in BCCWJ (except the PN category)
https://clrd.ninjal.ac.jp/bccwj/.

Binary file not shown.

View File

@ -0,0 +1,73 @@
Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
and Technology.
Copyright 2023, LegalOn Technologies, Inc.
All Rights Reserved.
Use, reproduction, and distribution of this software is permitted.
Any copy of this software, whether in its original form or modified,
must include both the above copyright notice and the following
paragraphs.
Nara Institute of Science and Technology (NAIST),
the copyright holders, disclaims all warranties with regard to this
software, including all implied warranties of merchantability and
fitness, in no event shall NAIST be liable for
any special, indirect or consequential damages or any damages
whatsoever resulting from loss of use, data or profits, whether in an
action of contract, negligence or other tortuous action, arising out
of or in connection with the use or performance of this software.
A large portion of the dictionary entries
originate from ICOT Free Software. The following conditions for ICOT
Free Software applies to the current dictionary as well.
Each User may also freely distribute the Program, whether in its
original form or modified, to any third party or parties, PROVIDED
that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
on, or be attached to, the Program, which is distributed substantially
in the same form as set out herein and that such intended
distribution, if actually made, will neither violate or otherwise
contravene any of the laws and regulations of the countries having
jurisdiction over the User or the intended distribution itself.
NO WARRANTY
The program was produced on an experimental basis in the course of the
research and development conducted during the project and is provided
to users as so produced on an experimental basis. Accordingly, the
program is provided without any warranty whatsoever, whether express,
implied, statutory or otherwise. The term "warranty" used herein
includes, but is not limited to, any warranty of the quality,
performance, merchantability and fitness for a particular purpose of
the program and the nonexistence of any infringement or violation of
any right of any third party.
Each user of the program will agree and understand, and be deemed to
have agreed and understood, that there is no warranty whatsoever for
the program and, accordingly, the entire risk arising from or
otherwise connected with the program is assumed by the user.
Therefore, neither ICOT, the copyright holder, or any other
organization that participated in or was otherwise related to the
development of the program and their respective officials, directors,
officers and other employees shall be held liable for any and all
damages, including, without limitation, general, special, incidental
and consequential damages, arising out of or otherwise in connection
with the use or inability to use the program or any product, material
or result produced or otherwise obtained by using the program,
regardless of whether they have been advised of, or otherwise had
knowledge of, the possibility of such damages at any time during the
project or thereafter. Each user will be deemed to have agreed to the
foregoing by his or her commencement of use of the program. The term
"use" as used herein includes, but is not limited to, the use,
modification, copying and distribution of the program and the
production of secondary products from the program.
In the case where the program, whether in its original form or
modified, was distributed or delivered to or received by a user from
any person, organization or entity other than ICOT, unless it makes or
grants independently of ICOT any specific warranty to the user in
writing, such person, organization or entity, will also be exempted
from and not be held liable to the user for any such damages as noted
above as far as the program is concerned.
÷÷

View File

@ -0,0 +1,7 @@
This software includes a binary version of data from
http://jaist.dl.sourceforge.net/project/mecab/mecab-ipadic/2.7.0-20070801/mecab-ipadic-2.7.0-20070801.tar.gz,
where the connection ids are remapped using CORE data in BCCWJ (except the PN category)
https://clrd.ninjal.ac.jp/bccwj/.

Binary file not shown.

61
src/accent.rs Normal file
View File

@ -0,0 +1,61 @@
use std::{
borrow::Cow,
io::{Cursor, Read},
};
use fnv::FnvHashMap;
use lz4_flex::frame::FrameDecoder;
// Pitch accent dictionary.
const ACCENT: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/accents.tsv.lz4"));
#[derive(Debug)]
pub struct AccentDict {
table: FnvHashMap<(Cow<'static, str>, Cow<'static, str>), Vec<u8>>,
}
pub fn build_accent_dictionary() -> AccentDict {
let text = {
let mut decoder = FrameDecoder::new(Cursor::new(ACCENT));
let mut text = String::new();
decoder.read_to_string(&mut text).unwrap();
text
};
let mut table = FnvHashMap::default();
for line in text.lines() {
let items: Vec<_> = line.split("\t").map(|t| t.trim()).collect();
let word = items[0];
let kana = if items[1].is_empty() {
items[0]
} else {
items[1]
};
let pitches = items[2]
.split(",")
.filter_map(|p| p.parse::<u8>().ok())
.collect();
table.insert(
(
Cow::Owned(word.into()),
Cow::Owned(crate::hiragana_to_katakana_string(kana)),
),
pitches,
);
}
AccentDict { table: table }
}
impl AccentDict {
pub fn get<'a>(&'a self, word: &'a str, kana: &'a str) -> &'a [u8] {
if let Some(p) = self.table.get(&(Cow::from(word), Cow::from(kana))) {
&p[..]
} else {
&[]
}
}
}

View File

@ -1,3 +1,4 @@
mod accent;
mod learner; mod learner;
use std::{ use std::{
@ -10,32 +11,32 @@ use lz4_flex::frame::FrameDecoder;
use quick_xml::events::Event; use quick_xml::events::Event;
use vibrato::{Dictionary, Tokenizer}; use vibrato::{Dictionary, Tokenizer};
use accent::AccentDict;
use learner::Learner; use learner::Learner;
// Include KANJI_FREQ, a frequency-ordered array of kanji characters. // Include KANJI_FREQ, a frequency-ordered array of kanji characters.
include!(concat!(env!("OUT_DIR"), "/kanji_freq_inc.rs")); include!(concat!(env!("OUT_DIR"), "/kanji_freq_inc.rs"));
// Parsing dictionary.
const DICT: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/system.dic.lz4")); const DICT: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/system.dic.lz4"));
/// A list of words that the tokenizer insists on using the less common reading /// A list of words that the tokenizer insists on using the less common reading
/// for, with the more common reading that should be substituted. /// for, with the more common reading that should be substituted.
/// ///
/// (surface, feature, substitute_feature) /// (surface, kana, substitute_kana)
const COMMON_SUBS: &[(&str, &str, &str)] = &[ const COMMON_SUBS: &[(&str, &str, &str)] = &[
("", "名詞-普通名詞-一般,ガク", "名詞-普通名詞-一般,ヒタイ"), ("", "ガク", "ヒタイ"),
( ("", "", "ホカ"),
"", ("", "ワタクシ", "ワタシ"),
"名詞-普通名詞-副詞可能,タ",
"名詞-普通名詞-副詞可能,ホカ",
),
("", "代名詞,ワタクシ", "代名詞,ワタシ"),
]; ];
pub struct FuriganaGenerator { pub struct FuriganaGenerator {
tokenizer: Tokenizer, tokenizer: Tokenizer,
accent_dict: AccentDict,
exclude_kanji: FnvHashSet<char>, exclude_kanji: FnvHashSet<char>,
subs: FnvHashMap<(Cow<'static, str>, Cow<'static, str>), String>, subs: FnvHashMap<(Cow<'static, str>, Cow<'static, str>), String>,
use_hiragana: bool, use_hiragana: bool,
mark_accent: bool,
} }
impl FuriganaGenerator { impl FuriganaGenerator {
@ -43,7 +44,7 @@ impl FuriganaGenerator {
// Specifically, words made up *entirely* of those kanji will be excluded. // Specifically, words made up *entirely* of those kanji will be excluded.
// If a word has some kanji that aren't in that set, even if it also has // If a word has some kanji that aren't in that set, even if it also has
// some that are, it will still get furigana. // some that are, it will still get furigana.
pub fn new(exclude_count: usize, use_hiragana: bool) -> Self { pub fn new(exclude_count: usize, use_hiragana: bool, mark_accent: bool) -> Self {
let dict = { let dict = {
// Note: we could just pass the decoder straight to `Dictionary::read()` // Note: we could just pass the decoder straight to `Dictionary::read()`
// below, and it would work. However, that ends up being slower than // below, and it would work. However, that ends up being slower than
@ -73,29 +74,35 @@ impl FuriganaGenerator {
Self { Self {
tokenizer: Tokenizer::new(dict), tokenizer: Tokenizer::new(dict),
accent_dict: accent::build_accent_dictionary(),
exclude_kanji: exclude_kanji, exclude_kanji: exclude_kanji,
subs: subs, subs: subs,
use_hiragana: use_hiragana, use_hiragana: use_hiragana,
mark_accent: mark_accent,
} }
} }
pub fn new_session(&self, learn_mode: bool) -> Session<'_> { pub fn new_session(&self, learn_mode: bool) -> Session<'_> {
Session { Session {
tokenizer: &self.tokenizer, tokenizer: &self.tokenizer,
accent_dict: &self.accent_dict,
exclude_kanji: &self.exclude_kanji, exclude_kanji: &self.exclude_kanji,
subs: &self.subs, subs: &self.subs,
learner: Learner::new(if learn_mode { 3 } else { usize::MAX }), learner: Learner::new(if learn_mode { 3 } else { usize::MAX }),
use_hiragana: self.use_hiragana, use_hiragana: self.use_hiragana,
mark_accent: self.mark_accent,
} }
} }
} }
pub struct Session<'a> { pub struct Session<'a> {
tokenizer: &'a Tokenizer, tokenizer: &'a Tokenizer,
accent_dict: &'a AccentDict,
exclude_kanji: &'a FnvHashSet<char>, exclude_kanji: &'a FnvHashSet<char>,
subs: &'a FnvHashMap<(Cow<'a, str>, Cow<'a, str>), String>, subs: &'a FnvHashMap<(Cow<'a, str>, Cow<'a, str>), String>,
learner: Learner, learner: Learner,
use_hiragana: bool, use_hiragana: bool,
mark_accent: bool,
} }
impl<'a> Session<'a> { impl<'a> Session<'a> {
@ -116,10 +123,12 @@ impl<'a> Session<'a> {
add_html_furigana_skip_already_ruby( add_html_furigana_skip_already_ruby(
&text, &text,
&self.tokenizer, &self.tokenizer,
&self.accent_dict,
&self.exclude_kanji, &self.exclude_kanji,
&self.subs, &self.subs,
&mut self.learner, &mut self.learner,
self.use_hiragana, self.use_hiragana,
self.mark_accent,
) )
} }
} }
@ -132,10 +141,12 @@ fn to_str<B: std::ops::Deref<Target = [u8]>>(bytes: &B) -> &str {
fn add_html_furigana_skip_already_ruby( fn add_html_furigana_skip_already_ruby(
text: &str, text: &str,
tokenizer: &Tokenizer, tokenizer: &Tokenizer,
accent_dict: &AccentDict,
exclude_kanji: &FnvHashSet<char>, exclude_kanji: &FnvHashSet<char>,
subs: &FnvHashMap<(Cow<str>, Cow<str>), String>, subs: &FnvHashMap<(Cow<str>, Cow<str>), String>,
learner: &mut Learner, learner: &mut Learner,
use_hiragana: bool, use_hiragana: bool,
mark_accent: bool,
) -> String { ) -> String {
let mut reader = quick_xml::Reader::from_str(text); let mut reader = quick_xml::Reader::from_str(text);
@ -171,10 +182,12 @@ fn add_html_furigana_skip_already_ruby(
new_text.push_str(&add_html_furigana( new_text.push_str(&add_html_furigana(
to_str(&e), to_str(&e),
tokenizer, tokenizer,
accent_dict,
exclude_kanji, exclude_kanji,
subs, subs,
learner, learner,
use_hiragana, use_hiragana,
mark_accent,
)); ));
} else { } else {
write_xml(&mut new_text, &Event::Text(e)); write_xml(&mut new_text, &Event::Text(e));
@ -255,10 +268,12 @@ fn write_xml(text: &mut String, event: &quick_xml::events::Event) {
fn add_html_furigana( fn add_html_furigana(
text: &str, text: &str,
tokenizer: &Tokenizer, tokenizer: &Tokenizer,
accent_dict: &AccentDict,
exclude_kanji: &FnvHashSet<char>, exclude_kanji: &FnvHashSet<char>,
subs: &FnvHashMap<(Cow<str>, Cow<str>), String>, subs: &FnvHashMap<(Cow<str>, Cow<str>), String>,
learner: &mut Learner, learner: &mut Learner,
use_hiragana: bool, use_hiragana: bool,
mark_accent: bool,
) -> String { ) -> String {
let mut worker = tokenizer.new_worker(); let mut worker = tokenizer.new_worker();
@ -268,15 +283,28 @@ fn add_html_furigana(
let mut new_text = String::new(); let mut new_text = String::new();
for i in 0..worker.num_tokens() { for i in 0..worker.num_tokens() {
let t = worker.token(i); let t = worker.token(i);
let (surface, feature) = { let (surface, kana, pitches) = {
let surface = t.surface(); let surface = t.surface();
let feature = t.feature(); let feature = t.feature();
if let Some(sub_feature) = subs.get(&(Cow::from(surface), Cow::from(feature))) { let kana_1 = feature.rsplit(",").nth(0).unwrap();
(surface, sub_feature.as_str()) let kana_2 = feature.rsplit(",").nth(1).unwrap();
let word = feature.rsplit(",").nth(2).unwrap();
let (kana, pkana) =
if let Some(sub_kana) = subs.get(&(Cow::from(surface), Cow::from(kana_1))) {
(sub_kana.as_str(), sub_kana.as_str())
} else { } else {
(surface, feature) (kana_1, kana_2)
} };
let pitches = if mark_accent {
accent_dict.get(word, pkana)
} else {
&[]
};
(surface, kana, pitches)
}; };
let needs_help = learner.needs_help(surface); let needs_help = learner.needs_help(surface);
@ -287,17 +315,21 @@ fn add_html_furigana(
continue; continue;
} }
let kana = { let kana = if use_hiragana {
let kana = feature.split(",").nth(1).unwrap();
if use_hiragana {
katakana_to_hiragana_string(kana) katakana_to_hiragana_string(kana)
} else { } else {
kana.into() kana.into()
}
}; };
let furigana_text = apply_furigana(surface, &kana, exclude_kanji); let furigana_text = apply_furigana(surface, &kana, exclude_kanji);
if furigana_text.is_empty() {
new_text.push_str(surface);
} else {
for pitch in pitches {
new_text.push_str(&format!("<sup>{}</sup>", pitch));
}
for (surf, furi) in furigana_text.iter() { for (surf, furi) in furigana_text.iter() {
if furi.is_empty() { if furi.is_empty() {
new_text.push_str(surf); new_text.push_str(surf);
@ -311,6 +343,7 @@ fn add_html_furigana(
new_text.push_str("</rt></ruby>"); new_text.push_str("</rt></ruby>");
} }
} }
}
new_text new_text
} }
@ -326,9 +359,8 @@ fn apply_furigana<'a>(
) -> Vec<(&'a str, &'a str)> { ) -> Vec<(&'a str, &'a str)> {
let mut out = Vec::new(); let mut out = Vec::new();
if furigana_unneeded(surface, exclude_kanji) { if furigana_unneeded(surface, exclude_kanji) || !is_kana_str(kana) {
out.push((surface, "")); return Vec::new();
return out;
} }
let mut surface = surface; let mut surface = surface;
@ -454,6 +486,10 @@ pub fn is_kana(c: char) -> bool {
return false; return false;
} }
pub fn is_kana_str(text: &str) -> bool {
text.chars().all(|c| is_kana(c))
}
pub fn normalize_kana(c: char) -> Option<char> { pub fn normalize_kana(c: char) -> Option<char> {
if !is_kana(c) { if !is_kana(c) {
return None; return None;
@ -497,6 +533,16 @@ pub fn katakana_to_hiragana_string(text: &str) -> String {
new_text new_text
} }
pub fn hiragana_to_katakana_string(text: &str) -> String {
let mut new_text = String::new();
for c in text.chars() {
new_text.push(hiragana_to_katakana(c).unwrap_or(c));
}
new_text
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@ -505,7 +551,12 @@ mod tests {
pub fn get_furigana_gen() -> &'static FuriganaGenerator { pub fn get_furigana_gen() -> &'static FuriganaGenerator {
use std::sync::OnceLock; use std::sync::OnceLock;
static FURIGEN: OnceLock<FuriganaGenerator> = OnceLock::new(); static FURIGEN: OnceLock<FuriganaGenerator> = OnceLock::new();
FURIGEN.get_or_init(|| FuriganaGenerator::new(0, false)) FURIGEN.get_or_init(|| FuriganaGenerator::new(0, false, false))
}
pub fn get_furigana_gen_with_accent() -> &'static FuriganaGenerator {
use std::sync::OnceLock;
static FURIGEN: OnceLock<FuriganaGenerator> = OnceLock::new();
FURIGEN.get_or_init(|| FuriganaGenerator::new(0, false, true))
} }
#[test] #[test]
@ -514,7 +565,7 @@ mod tests {
let kana = "ヘー"; let kana = "ヘー";
let pairs = apply_furigana(surface, kana, &FnvHashSet::default()); let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
assert_eq!(&[("へぇ", "")], &pairs[..]); assert!(pairs.is_empty());
} }
#[test] #[test]
@ -523,7 +574,7 @@ mod tests {
let kana = "ヘー"; let kana = "ヘー";
let pairs = apply_furigana(surface, kana, &FnvHashSet::default()); let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
assert_eq!(&[("へぇー", "")], &pairs[..]); assert!(pairs.is_empty());
} }
#[test] #[test]
@ -532,7 +583,7 @@ mod tests {
let kana = ""; let kana = "";
let pairs = apply_furigana(surface, kana, &FnvHashSet::default()); let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
assert_eq!(&[("", "")], &pairs[..]); assert!(pairs.is_empty());
} }
#[test] #[test]
@ -606,39 +657,80 @@ mod tests {
assert_eq!(3, worker.num_tokens()); assert_eq!(3, worker.num_tokens());
assert_eq!("食べ", worker.token(0).surface()); assert_eq!("食べ", worker.token(0).surface());
assert_eq!("動詞-一般,タベ", worker.token(0).feature()); assert_eq!(
"動詞,自立,*,*,一段,連用形,食べる,タベ,タベ",
worker.token(0).feature()
);
assert_eq!("", worker.token(1).surface()); assert_eq!("", worker.token(1).surface());
assert_eq!("助詞-接続助詞,テ", worker.token(1).feature()); assert_eq!("助詞,接続助詞,*,*,*,*,て,テ,テ", worker.token(1).feature());
assert_eq!("いる", worker.token(2).surface()); assert_eq!("いる", worker.token(2).surface());
assert_eq!("動詞-非自立可能,イル", worker.token(2).feature()); assert_eq!(
"動詞,非自立,*,*,一段,基本形,いる,イル,イル",
worker.token(2).feature()
);
}
#[test]
fn tokenize_02() {
let mut worker = get_furigana_gen().tokenizer.new_worker();
worker.reset_sentence("そう");
worker.tokenize();
assert_eq!(1, worker.num_tokens());
assert_eq!(
"副詞,助詞類接続,*,*,*,*,そう,ソウ,ソー",
worker.token(0).feature()
);
} }
#[test] #[test]
fn add_html_furigana_01() { fn add_html_furigana_01() {
let mut gen = get_furigana_gen().new_session(false); let mut gen = get_furigana_gen().new_session(false);
let mut gen_accent = get_furigana_gen_with_accent().new_session(false);
let text = gen let text = r#"<sup class="食う">食べる</sup>のは<ruby>良</ruby>いね!<hi />"#;
.add_html_furigana(r#"<sup class="食う">食べる</sup>のは<ruby>良</ruby>いね!<hi />"#); let furi_1 = gen.add_html_furigana(text);
let furi_2 = gen_accent.add_html_furigana(text);
assert_eq!( assert_eq!(
text, furi_1,
r#"<sup class="食う"><ruby>食<rt>タ</rt></ruby>べる</sup>のは<ruby>良</ruby>いね!<hi />"# r#"<sup class="食う"><ruby>食<rt>タ</rt></ruby>べる</sup>のは<ruby>良</ruby>いね!<hi />"#
); );
assert_eq!(
furi_2,
r#"<sup class="食う"><sup>2</sup><ruby>食<rt>タ</rt></ruby>べる</sup>のは<ruby>良</ruby>いね!<hi />"#
);
} }
// Testing custom substitutions. // Testing custom substitutions.
#[test] #[test]
fn add_html_furigana_02() { fn add_html_furigana_02() {
let mut gen = get_furigana_gen().new_session(false); let mut gen = get_furigana_gen().new_session(false);
let mut gen_accent = get_furigana_gen_with_accent().new_session(false);
assert_eq!( assert_eq!(
gen.add_html_furigana(""), gen.add_html_furigana(""),
"<ruby>額<rt>ヒタイ</rt></ruby>" "<ruby>額<rt>ヒタイ</rt></ruby>"
); );
assert_eq!(
gen_accent.add_html_furigana(""),
"<sup>0</sup><ruby>額<rt>ヒタイ</rt></ruby>"
);
assert_eq!(gen.add_html_furigana(""), "<ruby>他<rt>ホカ</rt></ruby>"); assert_eq!(gen.add_html_furigana(""), "<ruby>他<rt>ホカ</rt></ruby>");
assert_eq!(
gen_accent.add_html_furigana(""),
"<sup>0</sup><ruby>他<rt>ホカ</rt></ruby>"
);
assert_eq!( assert_eq!(
gen.add_html_furigana(""), gen.add_html_furigana(""),
"<ruby>私<rt>ワタシ</rt></ruby>" "<ruby>私<rt>ワタシ</rt></ruby>"
); );
assert_eq!(
gen_accent.add_html_furigana(""),
"<sup>0</sup><ruby>私<rt>ワタシ</rt></ruby>"
);
} }
} }