| // Copyright 2021 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "chrome/browser/ash/input_method/text_utils.h" |
| |
| // TODO(crbug/1223597) The rules to detect sentence end is not perfect, and we |
| // may want to use regex to improve readability. |
| namespace ash { |
| namespace input_method { |
| namespace { |
| |
| const int kMaxSearchRange = 200; |
| const int kSpecialWordMaxLength = 6; |
| // The index difference between a sentence end and the next sentence start. |
| // Setting it to 1 is sufficient for grammar check model, but 2 is better since |
| // according to current rules, there is always a space or '\n' or '\r' after a |
| // sentence end. |
| const int kGapBetweenSentenceEndAndNextStart = 2; |
| |
| bool IsSentenceEndCharacter(char16_t c) { |
| return (c == u'.' || c == u'?' || c == u'!' || c == u'。' || c == u'。' || |
| c == u'.' || c == u'.' || c == u'?' || c == u'?' || c == u'!' || |
| c == u'!' || c == u'…'); |
| } |
| |
| bool EndsInSpecialPeriodWord(const std::u16string& text, uint32_t pos) { |
| uint32_t idx = pos; |
| while (idx <= pos && pos - idx <= kSpecialWordMaxLength && |
| text[idx] != u' ' && text[idx] != u'(') |
| idx--; |
| if (idx > pos || pos - idx > kSpecialWordMaxLength) |
| return false; |
| std::u16string last_word = text.substr(idx + 1, pos - idx); |
| return (last_word == u"c.f." || last_word == u"cf." || last_word == u"e.g." || |
| last_word == u"eg." || last_word == u"i.e." || last_word == u"ie." || |
| last_word == u"Mmes." || last_word == u"Mr." || |
| last_word == u"Mrs." || last_word == u"Ms." || |
| last_word == u"Mses." || last_word == u"Mssrs." || |
| last_word == u"Prof." || last_word == u"n.b." || last_word == u"nb."); |
| } |
| |
| bool IsSentenceEndSectionCharacter(char16_t c) { |
| return (c == u')' || c == u']' || c == u'}' || c == u'\'' || c == u'\"' || |
| c == u'ʺ' || c == u'˝' || c == u'ˮ' || c == u'"' || c == u'″' || |
| c == u'”' || c == u'»'); |
| } |
| |
| bool IsEmoticonEyes(char16_t c) { |
| return (c == ':' || c == ';'); |
| } |
| |
| bool IsEmoticonNose(char16_t c) { |
| return (c == u'-' || c == u'^' || c == u'{' || c == u'*'); |
| } |
| |
| bool IsEmoticonMouth(char16_t c) { |
| return (c == u')' || c == u'(' || c == u'\\' || c == u'|' || c == u'/'); |
| } |
| |
| bool EndsInEmoticon(const std::u16string& text, uint32_t pos) { |
| return ((pos >= 1 && IsEmoticonEyes(text[pos - 1]) && |
| IsEmoticonMouth(text[pos])) || |
| (pos >= 2 && IsEmoticonEyes(text[pos - 2]) && |
| IsEmoticonNose(text[pos - 1]) && IsEmoticonMouth(text[pos]))); |
| } |
| |
| bool IsSentenceEnd(const std::u16string& text, uint32_t pos) { |
| if (pos < text.size() - 1 && (text[pos + 1] == '\n' || text[pos + 1] == '\r')) |
| return true; |
| |
| // The character after the sentence end must be a space or the end of the |
| // text. |
| if (pos < 2 || (pos < text.size() - 1 && text[pos + 1] != u' ')) |
| return false; |
| |
| if (IsSentenceEndCharacter(text[pos]) && !EndsInSpecialPeriodWord(text, pos)) |
| return true; |
| |
| if (IsSentenceEndCharacter(text[pos - 1]) && |
| IsSentenceEndSectionCharacter(text[pos])) |
| return true; |
| |
| if (EndsInEmoticon(text, pos)) |
| return true; |
| |
| return false; |
| } |
| |
| } // namespace |
| |
| Sentence::Sentence() {} |
| |
| Sentence::Sentence(const gfx::Range& original_range, const std::u16string& text) |
| : original_range(original_range), text(text) {} |
| |
| Sentence::Sentence(const Sentence& other) = default; |
| |
| Sentence::~Sentence() = default; |
| |
| bool Sentence::operator==(const Sentence& other) const { |
| return original_range == other.original_range && text == other.text; |
| } |
| |
| bool Sentence::operator!=(const Sentence& other) const { |
| return !(*this == other); |
| } |
| |
| uint32_t FindLastSentenceEnd(const std::u16string& text, uint32_t pos) { |
| if (pos == 0 || pos > text.size()) |
| return kUndefined; |
| |
| for (size_t i = pos - 1; i > 0 && pos - i <= kMaxSearchRange; i--) { |
| if (IsSentenceEnd(text, i)) { |
| return i; |
| } |
| } |
| return kUndefined; |
| } |
| |
| uint32_t FindNextSentenceEnd(const std::u16string& text, uint32_t pos) { |
| if (pos >= text.size()) |
| return kUndefined; |
| |
| for (size_t i = pos; i < text.size() && i - pos <= kMaxSearchRange; i++) { |
| if (IsSentenceEnd(text, i)) { |
| return i; |
| } |
| } |
| return kUndefined; |
| } |
| |
| Sentence FindLastSentence(const std::u16string& text, uint32_t pos) { |
| if (pos > text.size()) |
| return Sentence(); |
| if (pos > 0 && |
| (pos == text.size() || text[pos] == '\n' || text[pos] == '\r')) { |
| pos--; |
| } |
| uint32_t end = FindLastSentenceEnd(text, pos); |
| if (end == kUndefined) { |
| return Sentence(); |
| } |
| uint32_t start = FindLastSentenceEnd(text, end); |
| if (start == kUndefined) { |
| start = 0; |
| } else { |
| start = start + kGapBetweenSentenceEndAndNextStart; |
| } |
| if (start >= end || end - start > kMaxSearchRange) { |
| return Sentence(); |
| } |
| return Sentence(gfx::Range(start, end + 1), |
| text.substr(start, end - start + 1)); |
| } |
| |
| Sentence FindCurrentSentence(const std::u16string& text, uint32_t pos) { |
| if (pos > text.size()) |
| return Sentence(); |
| if (pos > 0 && |
| (pos == text.size() || text[pos] == '\n' || text[pos] == '\r')) { |
| pos--; |
| } |
| uint32_t start = FindLastSentenceEnd(text, pos); |
| if (start == kUndefined) { |
| start = 0; |
| } else { |
| start = start + kGapBetweenSentenceEndAndNextStart; |
| } |
| |
| uint32_t end = FindNextSentenceEnd(text, pos); |
| if (end == kUndefined) { |
| end = text.length() - 1; |
| } |
| |
| if (start >= end || end - start > kMaxSearchRange) |
| return Sentence(); |
| |
| return Sentence(gfx::Range(start, end + 1), |
| text.substr(start, end - start + 1)); |
| } |
| |
| } // namespace input_method |
| } // namespace ash |