Attach pitch accent indicators in a more reasonable way.
This commit is contained in:
parent
adb58983a7
commit
5c7c167d94
77
src/lib.rs
77
src/lib.rs
|
@ -321,15 +321,11 @@ fn add_html_furigana(
|
|||
kana.into()
|
||||
};
|
||||
|
||||
let furigana_text = apply_furigana(surface, &kana, exclude_kanji);
|
||||
let furigana_text = apply_furigana(surface, &kana, pitches, exclude_kanji);
|
||||
|
||||
if furigana_text.is_empty() {
|
||||
new_text.push_str(surface);
|
||||
} else {
|
||||
for pitch in pitches {
|
||||
new_text.push_str(&format!("<sup>{}</sup>", pitch));
|
||||
}
|
||||
|
||||
for (surf, furi) in furigana_text.iter() {
|
||||
if furi.is_empty() {
|
||||
new_text.push_str(surf);
|
||||
|
@ -355,9 +351,10 @@ fn add_html_furigana(
|
|||
fn apply_furigana<'a>(
|
||||
surface: &'a str,
|
||||
kana: &'a str,
|
||||
pitches: &[u8],
|
||||
exclude_kanji: &FnvHashSet<char>,
|
||||
) -> Vec<(&'a str, &'a str)> {
|
||||
let mut out = Vec::new();
|
||||
) -> Vec<(&'a str, String)> {
|
||||
let mut out: Vec<(&str, String)> = Vec::new();
|
||||
|
||||
if furigana_unneeded(surface, exclude_kanji) || !is_kana_str(kana) {
|
||||
return Vec::new();
|
||||
|
@ -378,7 +375,7 @@ fn apply_furigana<'a>(
|
|||
break;
|
||||
}
|
||||
}
|
||||
out.push((&surface[..start_s], ""));
|
||||
out.push((&surface[..start_s], "".into()));
|
||||
surface = &surface[start_s..];
|
||||
kana = &kana[start_k..];
|
||||
}
|
||||
|
@ -395,7 +392,7 @@ fn apply_furigana<'a>(
|
|||
break;
|
||||
}
|
||||
}
|
||||
out.push((&surface[end_s..], ""));
|
||||
out.push((&surface[end_s..], "".into()));
|
||||
surface = &surface[..end_s];
|
||||
kana = &kana[..end_k];
|
||||
}
|
||||
|
@ -420,16 +417,35 @@ fn apply_furigana<'a>(
|
|||
.unwrap();
|
||||
|
||||
// Insert the segments.
|
||||
out.insert(out.len() - 2, (&surface[..si], &kana[..ki]));
|
||||
out.insert(out.len() - 2, (&surface[si..(si + sc.len_utf8())], ""));
|
||||
out.insert(out.len() - 2, (&surface[..si], kana[..ki].into()));
|
||||
out.insert(
|
||||
out.len() - 2,
|
||||
(&surface[si..(si + sc.len_utf8())], "".into()),
|
||||
);
|
||||
surface = &surface[(si + sc.len_utf8())..];
|
||||
kana = &kana[(ki + kc.len_utf8())..];
|
||||
}
|
||||
|
||||
// Left over.
|
||||
out.insert(out.len() - 1, (surface, kana));
|
||||
out.insert(out.len() - 1, (surface, kana.into()));
|
||||
|
||||
out.iter().filter(|(s, _)| !s.is_empty()).copied().collect()
|
||||
out.retain(|(s, _)| !s.is_empty());
|
||||
|
||||
// Attach pitch accent indicator(s) if we have any.
|
||||
if !pitches.is_empty() {
|
||||
let last = out.last_mut().unwrap();
|
||||
last.1.push_str(" <sup>");
|
||||
for (i, pitch) in pitches.iter().enumerate() {
|
||||
last.1.push_str(&format!("{}", pitch));
|
||||
// If it's not the last one.
|
||||
if (i + 1) < pitches.len() {
|
||||
last.1.push_str(",");
|
||||
}
|
||||
}
|
||||
last.1.push_str("</sup> ");
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
/// Due to the way this is used, this isn't meant to be exact, but instead
|
||||
|
@ -563,7 +579,7 @@ mod tests {
|
|||
fn apply_furigana_01() {
|
||||
let surface = "へぇ";
|
||||
let kana = "ヘー";
|
||||
let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
|
||||
let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
|
||||
|
||||
assert!(pairs.is_empty());
|
||||
}
|
||||
|
@ -572,7 +588,7 @@ mod tests {
|
|||
fn apply_furigana_02() {
|
||||
let surface = "へぇー";
|
||||
let kana = "ヘー";
|
||||
let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
|
||||
let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
|
||||
|
||||
assert!(pairs.is_empty());
|
||||
}
|
||||
|
@ -581,7 +597,7 @@ mod tests {
|
|||
fn apply_furigana_03() {
|
||||
let surface = "へ";
|
||||
let kana = "え";
|
||||
let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
|
||||
let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
|
||||
|
||||
assert!(pairs.is_empty());
|
||||
}
|
||||
|
@ -590,19 +606,24 @@ mod tests {
|
|||
fn apply_furigana_04() {
|
||||
let surface = "食べる";
|
||||
let kana = "タベル";
|
||||
let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
|
||||
let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
|
||||
|
||||
assert_eq!(&[("食", "タ"), ("べる", "")], &pairs[..]);
|
||||
assert_eq!(&[("食", "タ".into()), ("べる", "".into())], &pairs[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn apply_furigana_05() {
|
||||
let surface = "流れ出す";
|
||||
let kana = "ながれだす";
|
||||
let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
|
||||
let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
|
||||
|
||||
assert_eq!(
|
||||
&[("流", "なが"), ("れ", ""), ("出", "だ"), ("す", "")],
|
||||
&[
|
||||
("流", "なが".into()),
|
||||
("れ", "".into()),
|
||||
("出", "だ".into()),
|
||||
("す", "".into())
|
||||
],
|
||||
&pairs[..]
|
||||
);
|
||||
}
|
||||
|
@ -611,18 +632,18 @@ mod tests {
|
|||
fn apply_furigana_06() {
|
||||
let surface = "物の怪";
|
||||
let kana = "もののけ";
|
||||
let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
|
||||
let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
|
||||
|
||||
assert_eq!(&[("物の怪", "もののけ")], &pairs[..]);
|
||||
assert_eq!(&[("物の怪", "もののけ".into())], &pairs[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn apply_furigana_07() {
|
||||
let surface = "ご飯";
|
||||
let kana = "ゴハン";
|
||||
let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
|
||||
let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
|
||||
|
||||
assert_eq!(&[("ご", ""), ("飯", "ハン")], &pairs[..]);
|
||||
assert_eq!(&[("ご", "".into()), ("飯", "ハン".into())], &pairs[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -699,7 +720,7 @@ mod tests {
|
|||
);
|
||||
assert_eq!(
|
||||
furi_2,
|
||||
r#"<sup class="食う"><sup>2</sup><ruby>食<rt>タ</rt></ruby>べる</sup>のは<ruby>良</ruby>いね!<hi />"#
|
||||
r#"<sup class="食う"><ruby>食<rt>タ</rt></ruby><ruby>べる<rt> <sup>2</sup> </rt></ruby></sup>のは<ruby>良</ruby>いね!<hi />"#
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -715,13 +736,13 @@ mod tests {
|
|||
);
|
||||
assert_eq!(
|
||||
gen_accent.add_html_furigana("額"),
|
||||
"<sup>0</sup><ruby>額<rt>ヒタイ</rt></ruby>"
|
||||
"<ruby>額<rt>ヒタイ <sup>0</sup> </rt></ruby>"
|
||||
);
|
||||
|
||||
assert_eq!(gen.add_html_furigana("他"), "<ruby>他<rt>ホカ</rt></ruby>");
|
||||
assert_eq!(
|
||||
gen_accent.add_html_furigana("他"),
|
||||
"<sup>0</sup><ruby>他<rt>ホカ</rt></ruby>"
|
||||
"<ruby>他<rt>ホカ <sup>0</sup> </rt></ruby>"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
|
@ -730,7 +751,7 @@ mod tests {
|
|||
);
|
||||
assert_eq!(
|
||||
gen_accent.add_html_furigana("私"),
|
||||
"<sup>0</sup><ruby>私<rt>ワタシ</rt></ruby>"
|
||||
"<ruby>私<rt>ワタシ <sup>0</sup> </rt></ruby>"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user