diff --git a/src/accent.rs b/src/accent.rs index b7e7f24..b234dbd 100644 --- a/src/accent.rs +++ b/src/accent.rs @@ -83,7 +83,7 @@ pub fn accent_number_to_byte_idx(kana: &str, accent_number: u8) -> Option } _ => false, }; - if next_is_mod { + if !next_is_mod { current += 1; } diff --git a/src/lib.rs b/src/lib.rs index 4738ab5..ae78db7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -437,19 +437,30 @@ fn apply_furigana<'a>( out.insert(out.len() - 1, (surface.into(), kana.into())); out.retain(|(s, _)| !s.is_empty()); - // Attach pitch accent indicator if we have any. - if !pitches.is_empty() && pitches[0] > 0 { - let mut byte_idx = accent::accent_number_to_byte_idx(kana, pitches[0]).unwrap(); - for (ref mut s, ref mut k) in out.iter_mut() { + // Attach pitch accent indicator if there is one and it's unambiguous. + if pitches.len() == 1 { + if pitches[0] == 0 { + // 平板. + let (s, k) = out.last_mut().unwrap(); let text = if k.is_empty() { s } else { k }; - if byte_idx < text.len() - && text.is_char_boundary(byte_idx) - && text.is_char_boundary(byte_idx + 3) - { - text.insert_str(byte_idx + 3, ""); - text.insert_str(byte_idx, ""); + if text.len() >= 3 && text.is_char_boundary(text.len() - 3) { + text.insert_str(text.len() - 3, ""); + text.insert_str(text.len(), ""); + } + } else { + // Everything else. + let mut byte_idx = accent::accent_number_to_byte_idx(kana, pitches[0]).unwrap(); + for (s, k) in out.iter_mut() { + let text = if k.is_empty() { s } else { k }; + if byte_idx < text.len() + && text.is_char_boundary(byte_idx) + && text.is_char_boundary(byte_idx + 3) + { + text.insert_str(byte_idx + 3, ""); + text.insert_str(byte_idx, ""); + } + byte_idx -= text.len(); } - byte_idx -= text.len(); } } @@ -742,70 +753,107 @@ mod tests { #[test] fn add_html_furigana_02() { let mut gen = get_furigana_gen().new_session(false); - let mut gen_accent = get_furigana_gen_with_accent().new_session(false); assert_eq!( gen.add_html_furigana("額"), r#"ヒタイ"# ); - assert_eq!( - gen_accent.add_html_furigana("額"), - r#"ヒタイ"# - ); assert_eq!( gen.add_html_furigana("他"), r#"ホカ"# ); - assert_eq!( - gen_accent.add_html_furigana("他"), - r#"ホカ"# - ); assert_eq!( gen.add_html_furigana("私"), r#"ワタシ"# ); - assert_eq!( - gen_accent.add_html_furigana("私"), - r#"ワタシ"# - ); // The added 卵 is to trigger the parse we're testing of 等. assert_eq!( gen.add_html_furigana("卵等"), r#"タマゴナド"# ); - assert_eq!( - gen_accent.add_html_furigana("卵等"), - r#"タマゴ"# - ); assert_eq!( gen.add_html_furigana("大分"), r#"大分ダイブ"# ); - assert_eq!( - gen_accent.add_html_furigana("大分"), - r#"大分ダイブ"# - ); assert_eq!( gen.add_html_furigana("日本"), r#"日本ニホン"# ); - assert_eq!( - gen_accent.add_html_furigana("日本"), - r#"日本ニホン"# - ); assert_eq!( gen.add_html_furigana("日本人"), r#"日本人ニホンジン"# ); + } + + // Testing accent markers. + #[test] + fn add_html_furigana_03() { + let mut gen = get_furigana_gen_with_accent().new_session(false); + + // Ichidan verb. Should only get pitch accent marking in full dictionary form. assert_eq!( - gen_accent.add_html_furigana("日本人"), - r#"日本人ニホンジン"# + gen.add_html_furigana("食べる"), + r#"る"# + ); + assert_eq!( + gen.add_html_furigana("食べます"), + r#"べます"# + ); + assert_eq!( + gen.add_html_furigana("食べ"), + r#"べ"# + ); + + // Godan verb. Should only get pitch accent marking in full dictionary form. + assert_eq!( + gen.add_html_furigana("泳ぐ"), + r#"ぐ"# + ); + assert_eq!( + gen.add_html_furigana("泳が"), + r#"オヨが"# + ); + assert_eq!( + gen.add_html_furigana("泳ぎます"), + r#"オヨぎます"# + ); + assert_eq!( + gen.add_html_furigana("泳ぎ"), + r#"オヨ"# + ); + + // I-adjective. Should only get pitch accent marking in full dictionary form. + assert_eq!( + gen.add_html_furigana("早い"), + r#"い"# + ); + assert_eq!( + gen.add_html_furigana("早く"), + r#"ハヤく"# + ); + + // Other. Should always get pitch accent markings. + assert_eq!( + gen.add_html_furigana("少し"), + r#"し"# + ); + assert_eq!( + gen.add_html_furigana("綺麗"), + r#"綺麗レイ"# + ); + assert_eq!( + gen.add_html_furigana("平板"), + r#"平板ヘイバ"# + ); + assert_eq!( + gen.add_html_furigana("他"), + r#""# ); } }