diff --git a/src/lib.rs b/src/lib.rs
index e60c508..cd28cd8 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -321,15 +321,11 @@ fn add_html_furigana(
kana.into()
};
- let furigana_text = apply_furigana(surface, &kana, exclude_kanji);
+ let furigana_text = apply_furigana(surface, &kana, pitches, exclude_kanji);
if furigana_text.is_empty() {
new_text.push_str(surface);
} else {
- for pitch in pitches {
- new_text.push_str(&format!("{}", pitch));
- }
-
for (surf, furi) in furigana_text.iter() {
if furi.is_empty() {
new_text.push_str(surf);
@@ -355,9 +351,10 @@ fn add_html_furigana(
fn apply_furigana<'a>(
surface: &'a str,
kana: &'a str,
+ pitches: &[u8],
exclude_kanji: &FnvHashSet,
-) -> Vec<(&'a str, &'a str)> {
- let mut out = Vec::new();
+) -> Vec<(&'a str, String)> {
+ let mut out: Vec<(&str, String)> = Vec::new();
if furigana_unneeded(surface, exclude_kanji) || !is_kana_str(kana) {
return Vec::new();
@@ -378,7 +375,7 @@ fn apply_furigana<'a>(
break;
}
}
- out.push((&surface[..start_s], ""));
+ out.push((&surface[..start_s], "".into()));
surface = &surface[start_s..];
kana = &kana[start_k..];
}
@@ -395,7 +392,7 @@ fn apply_furigana<'a>(
break;
}
}
- out.push((&surface[end_s..], ""));
+ out.push((&surface[end_s..], "".into()));
surface = &surface[..end_s];
kana = &kana[..end_k];
}
@@ -420,16 +417,35 @@ fn apply_furigana<'a>(
.unwrap();
// Insert the segments.
- out.insert(out.len() - 2, (&surface[..si], &kana[..ki]));
- out.insert(out.len() - 2, (&surface[si..(si + sc.len_utf8())], ""));
+ out.insert(out.len() - 2, (&surface[..si], kana[..ki].into()));
+ out.insert(
+ out.len() - 2,
+ (&surface[si..(si + sc.len_utf8())], "".into()),
+ );
surface = &surface[(si + sc.len_utf8())..];
kana = &kana[(ki + kc.len_utf8())..];
}
// Left over.
- out.insert(out.len() - 1, (surface, kana));
+ out.insert(out.len() - 1, (surface, kana.into()));
- out.iter().filter(|(s, _)| !s.is_empty()).copied().collect()
+ out.retain(|(s, _)| !s.is_empty());
+
+ // Attach pitch accent indicator(s) if we have any.
+ if !pitches.is_empty() {
+ let last = out.last_mut().unwrap();
+ last.1.push_str(" ");
+ for (i, pitch) in pitches.iter().enumerate() {
+ last.1.push_str(&format!("{}", pitch));
+ // If it's not the last one.
+ if (i + 1) < pitches.len() {
+ last.1.push_str(",");
+ }
+ }
+ last.1.push_str(" ");
+ }
+
+ out
}
/// Due to the way this is used, this isn't meant to be exact, but instead
@@ -563,7 +579,7 @@ mod tests {
fn apply_furigana_01() {
let surface = "へぇ";
let kana = "ヘー";
- let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
+ let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
assert!(pairs.is_empty());
}
@@ -572,7 +588,7 @@ mod tests {
fn apply_furigana_02() {
let surface = "へぇー";
let kana = "ヘー";
- let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
+ let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
assert!(pairs.is_empty());
}
@@ -581,7 +597,7 @@ mod tests {
fn apply_furigana_03() {
let surface = "へ";
let kana = "え";
- let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
+ let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
assert!(pairs.is_empty());
}
@@ -590,19 +606,24 @@ mod tests {
fn apply_furigana_04() {
let surface = "食べる";
let kana = "タベル";
- let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
+ let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
- assert_eq!(&[("食", "タ"), ("べる", "")], &pairs[..]);
+ assert_eq!(&[("食", "タ".into()), ("べる", "".into())], &pairs[..]);
}
#[test]
fn apply_furigana_05() {
let surface = "流れ出す";
let kana = "ながれだす";
- let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
+ let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
assert_eq!(
- &[("流", "なが"), ("れ", ""), ("出", "だ"), ("す", "")],
+ &[
+ ("流", "なが".into()),
+ ("れ", "".into()),
+ ("出", "だ".into()),
+ ("す", "".into())
+ ],
&pairs[..]
);
}
@@ -611,18 +632,18 @@ mod tests {
fn apply_furigana_06() {
let surface = "物の怪";
let kana = "もののけ";
- let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
+ let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
- assert_eq!(&[("物の怪", "もののけ")], &pairs[..]);
+ assert_eq!(&[("物の怪", "もののけ".into())], &pairs[..]);
}
#[test]
fn apply_furigana_07() {
let surface = "ご飯";
let kana = "ゴハン";
- let pairs = apply_furigana(surface, kana, &FnvHashSet::default());
+ let pairs = apply_furigana(surface, kana, &[], &FnvHashSet::default());
- assert_eq!(&[("ご", ""), ("飯", "ハン")], &pairs[..]);
+ assert_eq!(&[("ご", "".into()), ("飯", "ハン".into())], &pairs[..]);
}
#[test]
@@ -699,7 +720,7 @@ mod tests {
);
assert_eq!(
furi_2,
- r#"2食べるのは良いね!"#
+ r#"食べるのは良いね!"#
);
}
@@ -715,13 +736,13 @@ mod tests {
);
assert_eq!(
gen_accent.add_html_furigana("額"),
- "0額"
+ "額"
);
assert_eq!(gen.add_html_furigana("他"), "他");
assert_eq!(
gen_accent.add_html_furigana("他"),
- "0他"
+ "他"
);
assert_eq!(
@@ -730,7 +751,7 @@ mod tests {
);
assert_eq!(
gen_accent.add_html_furigana("私"),
- "0私"
+ "私"
);
}
}