ui/ui-text-android/src/main/java/androidx/compose/ui/text/android/selection/WordIterator.kt - platform/frameworks/support - Git at Google

 /*
  * Copyright 2019 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package androidx.compose.ui.text.android.selection

 import androidx.compose.ui.text.android.CharSequenceCharacterIterator
 import java.text.BreakIterator
 import java.util.Locale
 import kotlin.math.max
 import kotlin.math.min

 /**
  * Walks through cursor positions at word boundaries.
  *
  * Also provides methods to determine word boundaries.
  *
  * Note: This file is copied from
  * [WordIterator.java](https://android.googlesource.com/platform/frameworks/base/+/master/core/java/android/text/method/WordIterator.java)
  * @constructor Constructs a new WordIterator for the specified locale.
  * @param locale The locale to be used for analyzing the text.
  * Caches [CharSequence] for performance reasons.
  */
 internal class WordIterator(
     private val charSequence: CharSequence,
     start: Int,
     end: Int,
     locale: Locale?
 ) {
     private val start: Int
     private val end: Int
     private val iterator: BreakIterator

     init {
         require(start in 0..charSequence.length) {
             "input start index is outside the CharSequence"
         }
         require(end in 0..charSequence.length) {
             "input end index is outside the CharSequence"
         }
         iterator = BreakIterator.getWordInstance(locale)
         this.start = max(0, start - WINDOW_WIDTH)
         this.end = min(charSequence.length, end + WINDOW_WIDTH)
         iterator.text = CharSequenceCharacterIterator(charSequence, start, end)
     }

     /**
      * Returns the position of next boundary after the given offset. Returns
      * `BreakIterator.DONE` if there is no boundary after the given offset.
      *
      * @param offset the given start position to search from.
      * @return the position of the last boundary preceding the given offset.
      */
     fun nextBoundary(offset: Int): Int {
         checkOffsetIsValid(offset)
         return iterator.following(offset)
     }

     /**
      * Returns the position of boundary preceding the given offset or
      * `BreakIterator.DONE` if the given offset specifies the starting position.
      *
      * @param offset the given start position to search from.
      * @return the position of the last boundary preceding the given offset.
      */
     fun prevBoundary(offset: Int): Int {
         checkOffsetIsValid(offset)
         return iterator.preceding(offset)
     }

     /**
      * If the `offset` is within a word or on a word boundary that can only be
      * considered the start of a word (e.g. _word where "_" is any character that would not
      * be considered part of the word) then this returns the index of the first character of
      * that word.
      *
      * If the offset is on a word boundary that can be considered the start and end of a
      * word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
      * between AA and BB, this would return the start of the previous word, AA.
      *
      * Returns BreakIterator.DONE if there is no previous boundary.
      *
      * @throws IllegalArgumentException is offset is not valid.
      */
     fun getPrevWordBeginningOnTwoWordsBoundary(offset: Int): Int {
         return getBeginning(offset, true)
     }

     /**
      * If the `offset` is within a word or on a word boundary that can only be
      * considered the end of a word (e.g. word_ where "_" is any character that would not
      * be considered part of the word) then this returns the index of the last character
      * plus one of that word.
      *
      * If the offset is on a word boundary that can be considered the start and end of a
      * word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
      * between AA and BB, this would return the end of the next word, BB.
      *
      * Returns BreakIterator.DONE if there is no next boundary.
      *
      * @throws IllegalArgumentException is offset is not valid.
      */
     fun getNextWordEndOnTwoWordBoundary(offset: Int): Int {
         return getEnd(offset, true)
     }

     /**
      * If `offset` is within a group of punctuation as defined
      * by [.isPunctuation], returns the index of the first character
      * of that group, otherwise returns BreakIterator.DONE.
      *
      * @param offset the offset to search from.
      */
     fun getPunctuationBeginning(offset: Int): Int {
         checkOffsetIsValid(offset)
         var result = offset
         while (result != BreakIterator.DONE && !isPunctuationStartBoundary(result)) {
             result = prevBoundary(result)
         }
         // No need to shift offset, prevBoundary handles that.
         return result
     }

     /**
      * If `offset` is within a group of punctuation as defined
      * by [.isPunctuation], returns the index of the last character
      * of that group plus one, otherwise returns BreakIterator.DONE.
      *
      * @param offset the offset to search from.
      */
     fun getPunctuationEnd(offset: Int): Int {
         checkOffsetIsValid(offset)
         var result = offset
         while (result != BreakIterator.DONE && !isPunctuationEndBoundary(result)) {
             result = nextBoundary(result)
         }
         // No need to shift offset, nextBoundary handles that.
         return result
     }

     /**
      * Indicates if the provided offset is after a punctuation character
      * as defined by [.isPunctuation].
      *
      * @param offset the offset to check from.
      * @return Whether the offset is after a punctuation character.
      */
     fun isAfterPunctuation(offset: Int): Boolean {
         if (offset in (start + 1)..end) {
             val codePoint = Character.codePointBefore(charSequence, offset)
             return isPunctuation(codePoint)
         }
         return false
     }

     /**
      * Indicates if the provided offset is at a punctuation character
      * as defined by [.isPunctuation].
      *
      * @param offset the offset to check from.
      * @return Whether the offset is at a punctuation character.
      */
     fun isOnPunctuation(offset: Int): Boolean {
         if (offset in start until end) {
             val codePoint = Character.codePointAt(charSequence, offset)
             return isPunctuation(codePoint)
         }
         return false
     }

     /**
      * If the `offset` is within a word or on a word boundary that can only be
      * considered the start of a word (e.g. _word where "_" is any character that would not
      * be considered part of the word) then this returns the index of the first character of
      * that word.
      *
      * If the offset is on a word boundary that can be considered the start and end of a
      * word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
      * between AA and BB, and getPrevWordBeginningOnTwoWordsBoundary is true then this would
      * return the start of the previous word, AA. Otherwise it would return the current offset,
      * the start of BB.
      *
      * Returns BreakIterator.DONE if there is no previous boundary.
      *
      * @throws IllegalArgumentException is offset is not valid.
      */
     private fun getBeginning(offset: Int, getPrevWordBeginningOnTwoWordsBoundary: Boolean): Int {
         checkOffsetIsValid(offset)
         if (isOnLetterOrDigit(offset)) {
             return if (iterator.isBoundary(offset) &&
                 (!isAfterLetterOrDigit(offset) || !getPrevWordBeginningOnTwoWordsBoundary)
             ) {
                 offset
             } else {
                 iterator.preceding(offset)
             }
         } else {
             if (isAfterLetterOrDigit(offset)) {
                 return iterator.preceding(offset)
             }
         }
         return BreakIterator.DONE
     }

     /**
      * If the `offset` is within a word or on a word boundary that can only be
      * considered the end of a word (e.g. word_ where "_" is any character that would not be
      * considered part of the word) then this returns the index of the last character plus one
      * of that word.
      *
      * If the offset is on a word boundary that can be considered the start and end of a
      * word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
      * between AA and BB, and getNextWordEndOnTwoWordBoundary is true then this would return
      * the end of the next word, BB. Otherwise it would return the current offset, the end
      * of AA.
      *
      * Returns BreakIterator.DONE if there is no next boundary.
      *
      * @throws IllegalArgumentException is offset is not valid.
      */
     private fun getEnd(offset: Int, getNextWordEndOnTwoWordBoundary: Boolean): Int {
         checkOffsetIsValid(offset)
         if (isAfterLetterOrDigit(offset)) {
             return if (iterator.isBoundary(offset) &&
                 (!isOnLetterOrDigit(offset) || !getNextWordEndOnTwoWordBoundary)
             ) {
                 offset
             } else {
                 iterator.following(offset)
             }
         } else {
             if (isOnLetterOrDigit(offset)) {
                 return iterator.following(offset)
             }
         }
         return BreakIterator.DONE
     }

     private fun isPunctuationStartBoundary(offset: Int): Boolean {
         return isOnPunctuation(offset) && !isAfterPunctuation(offset)
     }

     private fun isPunctuationEndBoundary(offset: Int): Boolean {
         return !isOnPunctuation(offset) && isAfterPunctuation(offset)
     }

     private fun isAfterLetterOrDigit(offset: Int): Boolean {
         if (offset in (start + 1)..end) {
             val codePoint = Character.codePointBefore(charSequence, offset)
             if (Character.isLetterOrDigit(codePoint)) return true
         }
         return false
     }

     private fun isOnLetterOrDigit(offset: Int): Boolean {
         if (offset in start until end) {
             val codePoint = Character.codePointAt(charSequence, offset)
             if (Character.isLetterOrDigit(codePoint)) return true
         }
         return false
     }

     /**
      * Check if the given offset is in the given range.
      */
     private fun checkOffsetIsValid(offset: Int) {
         require(offset in start..end) {
             ("Invalid offset: $offset. Valid range is [$start , $end]")
         }
     }

     companion object {
         // The size of the WINDOW_WIDTH is currently 50, as in Android.
         // According to Wikipedia https://en.wikipedia.org/wiki/Longest_word_in_English , the
         // longest English word in English contains 45 letters. Then 50 is a good number for
         // WINDOW_WIDTH. Size of the window for the word iterator, should be greater than the
         // longest word's length.
         private const val WINDOW_WIDTH = 50

         internal fun isPunctuation(cp: Int): Boolean {
             val type = Character.getType(cp)
             return type == Character.CONNECTOR_PUNCTUATION.toInt() ||
                     type == Character.DASH_PUNCTUATION.toInt() ||
                     type == Character.END_PUNCTUATION.toInt() ||
                     type == Character.FINAL_QUOTE_PUNCTUATION.toInt() ||
                     type == Character.INITIAL_QUOTE_PUNCTUATION.toInt() ||
                     type == Character.OTHER_PUNCTUATION.toInt() ||
                     type == Character.START_PUNCTUATION.toInt()
         }
     }
 }
	/*
	* Copyright 2019 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package androidx.compose.ui.text.android.selection

	import androidx.compose.ui.text.android.CharSequenceCharacterIterator
	import java.text.BreakIterator
	import java.util.Locale
	import kotlin.math.max
	import kotlin.math.min

	/**
	* Walks through cursor positions at word boundaries.
	*
	* Also provides methods to determine word boundaries.
	*
	* Note: This file is copied from
	* [WordIterator.java](https://android.googlesource.com/platform/frameworks/base/+/master/core/java/android/text/method/WordIterator.java)
	* @constructor Constructs a new WordIterator for the specified locale.
	* @param locale The locale to be used for analyzing the text.
	* Caches [CharSequence] for performance reasons.
	*/
	internal class WordIterator(
	private val charSequence: CharSequence,
	start: Int,
	end: Int,
	locale: Locale?
	) {
	private val start: Int
	private val end: Int
	private val iterator: BreakIterator

	init {
	require(start in 0..charSequence.length) {
	"input start index is outside the CharSequence"
	}
	require(end in 0..charSequence.length) {
	"input end index is outside the CharSequence"
	}
	iterator = BreakIterator.getWordInstance(locale)
	this.start = max(0, start - WINDOW_WIDTH)
	this.end = min(charSequence.length, end + WINDOW_WIDTH)
	iterator.text = CharSequenceCharacterIterator(charSequence, start, end)
	}

	/**
	* Returns the position of next boundary after the given offset. Returns
	* `BreakIterator.DONE` if there is no boundary after the given offset.
	*
	* @param offset the given start position to search from.
	* @return the position of the last boundary preceding the given offset.
	*/
	fun nextBoundary(offset: Int): Int {
	checkOffsetIsValid(offset)
	return iterator.following(offset)
	}

	/**
	* Returns the position of boundary preceding the given offset or
	* `BreakIterator.DONE` if the given offset specifies the starting position.
	*
	* @param offset the given start position to search from.
	* @return the position of the last boundary preceding the given offset.
	*/
	fun prevBoundary(offset: Int): Int {
	checkOffsetIsValid(offset)
	return iterator.preceding(offset)
	}

	/**
	* If the `offset` is within a word or on a word boundary that can only be
	* considered the start of a word (e.g. _word where "_" is any character that would not
	* be considered part of the word) then this returns the index of the first character of
	* that word.
	*
	* If the offset is on a word boundary that can be considered the start and end of a
	* word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
	* between AA and BB, this would return the start of the previous word, AA.
	*
	* Returns BreakIterator.DONE if there is no previous boundary.
	*
	* @throws IllegalArgumentException is offset is not valid.
	*/
	fun getPrevWordBeginningOnTwoWordsBoundary(offset: Int): Int {
	return getBeginning(offset, true)
	}

	/**
	* If the `offset` is within a word or on a word boundary that can only be
	* considered the end of a word (e.g. word_ where "_" is any character that would not
	* be considered part of the word) then this returns the index of the last character
	* plus one of that word.
	*
	* If the offset is on a word boundary that can be considered the start and end of a
	* word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
	* between AA and BB, this would return the end of the next word, BB.
	*
	* Returns BreakIterator.DONE if there is no next boundary.
	*
	* @throws IllegalArgumentException is offset is not valid.
	*/
	fun getNextWordEndOnTwoWordBoundary(offset: Int): Int {
	return getEnd(offset, true)
	}

	/**
	* If `offset` is within a group of punctuation as defined
	* by [.isPunctuation], returns the index of the first character
	* of that group, otherwise returns BreakIterator.DONE.
	*
	* @param offset the offset to search from.
	*/
	fun getPunctuationBeginning(offset: Int): Int {
	checkOffsetIsValid(offset)
	var result = offset
	while (result != BreakIterator.DONE && !isPunctuationStartBoundary(result)) {
	result = prevBoundary(result)
	}
	// No need to shift offset, prevBoundary handles that.
	return result
	}

	/**
	* If `offset` is within a group of punctuation as defined
	* by [.isPunctuation], returns the index of the last character
	* of that group plus one, otherwise returns BreakIterator.DONE.
	*
	* @param offset the offset to search from.
	*/
	fun getPunctuationEnd(offset: Int): Int {
	checkOffsetIsValid(offset)
	var result = offset
	while (result != BreakIterator.DONE && !isPunctuationEndBoundary(result)) {
	result = nextBoundary(result)
	}
	// No need to shift offset, nextBoundary handles that.
	return result
	}

	/**
	* Indicates if the provided offset is after a punctuation character
	* as defined by [.isPunctuation].
	*
	* @param offset the offset to check from.
	* @return Whether the offset is after a punctuation character.
	*/
	fun isAfterPunctuation(offset: Int): Boolean {
	if (offset in (start + 1)..end) {
	val codePoint = Character.codePointBefore(charSequence, offset)
	return isPunctuation(codePoint)
	}
	return false
	}

	/**
	* Indicates if the provided offset is at a punctuation character
	* as defined by [.isPunctuation].
	*
	* @param offset the offset to check from.
	* @return Whether the offset is at a punctuation character.
	*/
	fun isOnPunctuation(offset: Int): Boolean {
	if (offset in start until end) {
	val codePoint = Character.codePointAt(charSequence, offset)
	return isPunctuation(codePoint)
	}
	return false
	}

	/**
	* If the `offset` is within a word or on a word boundary that can only be
	* considered the start of a word (e.g. _word where "_" is any character that would not
	* be considered part of the word) then this returns the index of the first character of
	* that word.
	*
	* If the offset is on a word boundary that can be considered the start and end of a
	* word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
	* between AA and BB, and getPrevWordBeginningOnTwoWordsBoundary is true then this would
	* return the start of the previous word, AA. Otherwise it would return the current offset,
	* the start of BB.
	*
	* Returns BreakIterator.DONE if there is no previous boundary.
	*
	* @throws IllegalArgumentException is offset is not valid.
	*/
	private fun getBeginning(offset: Int, getPrevWordBeginningOnTwoWordsBoundary: Boolean): Int {
	checkOffsetIsValid(offset)
	if (isOnLetterOrDigit(offset)) {
	return if (iterator.isBoundary(offset) &&
	(!isAfterLetterOrDigit(offset) \|\| !getPrevWordBeginningOnTwoWordsBoundary)
	) {
	offset
	} else {
	iterator.preceding(offset)
	}
	} else {
	if (isAfterLetterOrDigit(offset)) {
	return iterator.preceding(offset)
	}
	}
	return BreakIterator.DONE
	}

	/**
	* If the `offset` is within a word or on a word boundary that can only be
	* considered the end of a word (e.g. word_ where "_" is any character that would not be
	* considered part of the word) then this returns the index of the last character plus one
	* of that word.
	*
	* If the offset is on a word boundary that can be considered the start and end of a
	* word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
	* between AA and BB, and getNextWordEndOnTwoWordBoundary is true then this would return
	* the end of the next word, BB. Otherwise it would return the current offset, the end
	* of AA.
	*
	* Returns BreakIterator.DONE if there is no next boundary.
	*
	* @throws IllegalArgumentException is offset is not valid.
	*/
	private fun getEnd(offset: Int, getNextWordEndOnTwoWordBoundary: Boolean): Int {
	checkOffsetIsValid(offset)
	if (isAfterLetterOrDigit(offset)) {
	return if (iterator.isBoundary(offset) &&
	(!isOnLetterOrDigit(offset) \|\| !getNextWordEndOnTwoWordBoundary)
	) {
	offset
	} else {
	iterator.following(offset)
	}
	} else {
	if (isOnLetterOrDigit(offset)) {
	return iterator.following(offset)
	}
	}
	return BreakIterator.DONE
	}

	private fun isPunctuationStartBoundary(offset: Int): Boolean {
	return isOnPunctuation(offset) && !isAfterPunctuation(offset)
	}

	private fun isPunctuationEndBoundary(offset: Int): Boolean {
	return !isOnPunctuation(offset) && isAfterPunctuation(offset)
	}

	private fun isAfterLetterOrDigit(offset: Int): Boolean {
	if (offset in (start + 1)..end) {
	val codePoint = Character.codePointBefore(charSequence, offset)
	if (Character.isLetterOrDigit(codePoint)) return true
	}
	return false
	}

	private fun isOnLetterOrDigit(offset: Int): Boolean {
	if (offset in start until end) {
	val codePoint = Character.codePointAt(charSequence, offset)
	if (Character.isLetterOrDigit(codePoint)) return true
	}
	return false
	}

	/**
	* Check if the given offset is in the given range.
	*/
	private fun checkOffsetIsValid(offset: Int) {
	require(offset in start..end) {
	("Invalid offset: $offset. Valid range is [$start , $end]")
	}
	}

	companion object {
	// The size of the WINDOW_WIDTH is currently 50, as in Android.
	// According to Wikipedia https://en.wikipedia.org/wiki/Longest_word_in_English , the
	// longest English word in English contains 45 letters. Then 50 is a good number for
	// WINDOW_WIDTH. Size of the window for the word iterator, should be greater than the
	// longest word's length.
	private const val WINDOW_WIDTH = 50

	internal fun isPunctuation(cp: Int): Boolean {
	val type = Character.getType(cp)
	return type == Character.CONNECTOR_PUNCTUATION.toInt() \|\|
	type == Character.DASH_PUNCTUATION.toInt() \|\|
	type == Character.END_PUNCTUATION.toInt() \|\|
	type == Character.FINAL_QUOTE_PUNCTUATION.toInt() \|\|
	type == Character.INITIAL_QUOTE_PUNCTUATION.toInt() \|\|
	type == Character.OTHER_PUNCTUATION.toInt() \|\|
	type == Character.START_PUNCTUATION.toInt()
	}
	}
	}