Attach pitch accent indicators in a more reasonable way.
This commit is contained in:
parent
adb58983a7
commit
5c7c167d94
77
src/lib.rs
77
src/lib.rs
|
@ -321,15 +321,11 @@ fn add_html_furigana(
|
||||||
kana.into()
|
kana.into()
|
||||||
};
|
};
|
||||||
|
|
||||||
let furigana_text = apply_furigana(surface, &kana, exclude_kanji);
|
let furigana_text = apply_furigana(surface, &kana, pitches, exclude_kanji);
|
||||||
|
|
||||||
if furigana_text.is_empty() {
|
if furigana_text.is_empty() {
|
||||||
new_text.push_str(surface);
|
new_text.push_str(surface);
|
||||||
} else {
|
} else {
|
||||||
for pitch in pitches {
|
|
||||||
new_text.push_str(&format!("<sup>{}</sup>", pitch));
|
|
||||||
}
|
|
||||||
|
|
||||||
for (surf, furi) in furigana_text.iter() {
|
for (surf, furi) in furigana_text.iter() {
|
||||||
if furi.is_empty() {
|
if furi.is_empty() {
|
||||||
new_text.push_str(surf);
|
new_text.push_str(surf);
|
||||||
|
@ -355,9 +351,10 @@ fn add_html_furigana(
|
||||||
fn apply_furigana<'a>(
|
fn apply_furigana<'a>(
|
||||||
surface: &'a str,
|
surface: &'a str,
|
||||||
kana: &'a str,
|
kana: &'a str,
|
||||||
|
pitches: &[u8],
|
||||||
exclude_kanji: &FnvHashSet<char>,
|
exclude_kanji: &FnvHashSet<char>,
|
||||||
) -> Vec<(&'a str, &'a str)> {
|
) -> Vec<(&'a str, String)> {
|
||||||
let mut out = Vec::new();
|
let mut out: Vec<(&str, String)> = Vec::new();
|
||||||
|
|
||||||
if furigana_unneeded(surface, exclude_kanji) || !is_kana_str(kana) {
|
if furigana_unneeded(surface, exclude_kanji) || !is_kana_str(kana) {
|
||||||
return Vec::new();
|
return Vec::new();
|
||||||
|
@ -378,7 +375,7 @@ fn apply_furigana<'a>(
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out.push((&surface[..start_s], ""));
|
out.push((&surface[..start_s], "".into()));
|
||||||
surface = &surface[start_s..];
|
surface = &surface[start_s..];
|
||||||
kana = &kana[start_k..];
|
kana = &kana[start_k..];
|
||||||
}
|
}
|
||||||
|
@ -395,7 +392,7 @@ fn apply_furigana<'a>(
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out.push((&surface[end_s..], ""));
|
out.push((&surface[end_s..], "".into()));
|
||||||
surface = &surface[..end_s];
|
surface = &surface[..end_s];
|
||||||
kana = &kana[..end_k];
|
kana = &kana[..end_k];
|
||||||
}
|
}
|
||||||
|
@ -420,16 +417,35 @@ fn apply_furigana<'a>(
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
// Insert the segments.
|
// Insert the segments.
|
||||||
out.insert(out.len() - 2, (&surface[..si], &kana[..ki]));
|
out.insert(out.len() - 2, (&surface[..si], kana[..ki].into()));
|
||||||
out.insert(out.len() - 2, (&surface[si..(si + sc.len_utf8())], ""));
|
out.insert(
|
||||||
|
out.len() - 2,
|
||||||
|
(&surface[si..(si + sc.len_utf8())], "".into()),
|
||||||
|
);
|
||||||
surface = &surface[(si + sc.len_utf8())..];
|
surface = &surface[(si + sc.len_utf8())..];
|
||||||
kana = &kana[(ki + kc.len_utf8())..];
|
kana = &kana[(ki + kc.len_utf8())..];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Left over.
|
// Left over.
|
||||||
out.insert(out.len() - 1, (surface, kana));
|
out.insert(out.len() - 1, (surface, kana.into()));
|
||||||
|
|
||||||
out.iter().filter(|(s, _)| !s.is_empty()).copied().collect()
|
out.retain(|(s, _)| !s.is_empty());
|
||||||
|
|
||||||
|
// Attach pitch accent indicator(s) if we have any.
|
||||||
|
if !pitches.is_empty() {
|
||||||
|
let last = out.last_mut().unwrap();
|
||||||
|
last.1.push_str(" <sup>");
|
||||||
|
for (i, pitch) in pitches.iter().enumerate() {
|
||||||
|
last.1.push_str(&format!("{}", pitch));
|
||||||
|
// If it's not the last one.
|
||||||
|
if (i + 1) < pitches.len() {
|
||||||
|
last.1.push_str(",");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
last.1.push_str("</sup> ");
|
||||||
|
}
|
||||||
|
|
||||||
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Due to the way this is used, this isn't meant to be exact, but instead
|
/// Due to the way this is used, this isn't meant to be exact, but instead
|
||||||
|
@ -563,7 +579,7 @@ mod tests {
|
||||||
fn apply_furigana_01() {
|
fn apply_furigana_01() {
|
||||||
let surface = "へぇ";
|
let surface = "へぇ";
|
||||||
let kana = "ヘー";
|
let kana = "ヘー";
|
||||||
let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
|
let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
|
||||||
|
|
||||||
assert!(pairs.is_empty());
|
assert!(pairs.is_empty());
|
||||||
}
|
}
|
||||||
|
@ -572,7 +588,7 @@ mod tests {
|
||||||
fn apply_furigana_02() {
|
fn apply_furigana_02() {
|
||||||
let surface = "へぇー";
|
let surface = "へぇー";
|
||||||
let kana = "ヘー";
|
let kana = "ヘー";
|
||||||
let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
|
let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
|
||||||
|
|
||||||
assert!(pairs.is_empty());
|
assert!(pairs.is_empty());
|
||||||
}
|
}
|
||||||
|
@ -581,7 +597,7 @@ mod tests {
|
||||||
fn apply_furigana_03() {
|
fn apply_furigana_03() {
|
||||||
let surface = "へ";
|
let surface = "へ";
|
||||||
let kana = "え";
|
let kana = "え";
|
||||||
let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
|
let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
|
||||||
|
|
||||||
assert!(pairs.is_empty());
|
assert!(pairs.is_empty());
|
||||||
}
|
}
|
||||||
|
@ -590,19 +606,24 @@ mod tests {
|
||||||
fn apply_furigana_04() {
|
fn apply_furigana_04() {
|
||||||
let surface = "食べる";
|
let surface = "食べる";
|
||||||
let kana = "タベル";
|
let kana = "タベル";
|
||||||
let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
|
let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
|
||||||
|
|
||||||
assert_eq!(&[("食", "タ"), ("べる", "")], &pairs[..]);
|
assert_eq!(&[("食", "タ".into()), ("べる", "".into())], &pairs[..]);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn apply_furigana_05() {
|
fn apply_furigana_05() {
|
||||||
let surface = "流れ出す";
|
let surface = "流れ出す";
|
||||||
let kana = "ながれだす";
|
let kana = "ながれだす";
|
||||||
let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
|
let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&[("流", "なが"), ("れ", ""), ("出", "だ"), ("す", "")],
|
&[
|
||||||
|
("流", "なが".into()),
|
||||||
|
("れ", "".into()),
|
||||||
|
("出", "だ".into()),
|
||||||
|
("す", "".into())
|
||||||
|
],
|
||||||
&pairs[..]
|
&pairs[..]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -611,18 +632,18 @@ mod tests {
|
||||||
fn apply_furigana_06() {
|
fn apply_furigana_06() {
|
||||||
let surface = "物の怪";
|
let surface = "物の怪";
|
||||||
let kana = "もののけ";
|
let kana = "もののけ";
|
||||||
let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
|
let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
|
||||||
|
|
||||||
assert_eq!(&[("物の怪", "もののけ")], &pairs[..]);
|
assert_eq!(&[("物の怪", "もののけ".into())], &pairs[..]);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn apply_furigana_07() {
|
fn apply_furigana_07() {
|
||||||
let surface = "ご飯";
|
let surface = "ご飯";
|
||||||
let kana = "ゴハン";
|
let kana = "ゴハン";
|
||||||
let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
|
let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
|
||||||
|
|
||||||
assert_eq!(&[("ご", ""), ("飯", "ハン")], &pairs[..]);
|
assert_eq!(&[("ご", "".into()), ("飯", "ハン".into())], &pairs[..]);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -699,7 +720,7 @@ mod tests {
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
furi_2,
|
furi_2,
|
||||||
r#"<sup class="食う"><sup>2</sup><ruby>食<rt>タ</rt></ruby>べる</sup>のは<ruby>良</ruby>いね!<hi />"#
|
r#"<sup class="食う"><ruby>食<rt>タ</rt></ruby><ruby>べる<rt> <sup>2</sup> </rt></ruby></sup>のは<ruby>良</ruby>いね!<hi />"#
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -715,13 +736,13 @@ mod tests {
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
gen_accent.add_html_furigana("額"),
|
gen_accent.add_html_furigana("額"),
|
||||||
"<sup>0</sup><ruby>額<rt>ヒタイ</rt></ruby>"
|
"<ruby>額<rt>ヒタイ <sup>0</sup> </rt></ruby>"
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(gen.add_html_furigana("他"), "<ruby>他<rt>ホカ</rt></ruby>");
|
assert_eq!(gen.add_html_furigana("他"), "<ruby>他<rt>ホカ</rt></ruby>");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
gen_accent.add_html_furigana("他"),
|
gen_accent.add_html_furigana("他"),
|
||||||
"<sup>0</sup><ruby>他<rt>ホカ</rt></ruby>"
|
"<ruby>他<rt>ホカ <sup>0</sup> </rt></ruby>"
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
@ -730,7 +751,7 @@ mod tests {
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
gen_accent.add_html_furigana("私"),
|
gen_accent.add_html_furigana("私"),
|
||||||
"<sup>0</sup><ruby>私<rt>ワタシ</rt></ruby>"
|
"<ruby>私<rt>ワタシ <sup>0</sup> </rt></ruby>"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user