src/java.base/share/classes/java/text/BreakIterator.java
changeset 58288 48e480e56aad
parent 58242 94bb65cb37d3
child 58679 9c3209ff7550
equal deleted inserted replaced
58287:a7f16447085e 58288:48e480e56aad
    46 import sun.util.locale.provider.LocaleProviderAdapter;
    46 import sun.util.locale.provider.LocaleProviderAdapter;
    47 import sun.util.locale.provider.LocaleServiceProviderPool;
    47 import sun.util.locale.provider.LocaleServiceProviderPool;
    48 
    48 
    49 
    49 
    50 /**
    50 /**
    51  * The <code>BreakIterator</code> class implements methods for finding
    51  * The {@code BreakIterator} class implements methods for finding
    52  * the location of boundaries in text. Instances of <code>BreakIterator</code>
    52  * the location of boundaries in text. Instances of {@code BreakIterator}
    53  * maintain a current position and scan over text
    53  * maintain a current position and scan over text
    54  * returning the index of characters where boundaries occur.
    54  * returning the index of characters where boundaries occur.
    55  * Internally, <code>BreakIterator</code> scans text using a
    55  * Internally, {@code BreakIterator} scans text using a
    56  * <code>CharacterIterator</code>, and is thus able to scan text held
    56  * {@code CharacterIterator}, and is thus able to scan text held
    57  * by any object implementing that protocol. A <code>StringCharacterIterator</code>
    57  * by any object implementing that protocol. A {@code StringCharacterIterator}
    58  * is used to scan <code>String</code> objects passed to <code>setText</code>.
    58  * is used to scan {@code String} objects passed to {@code setText}.
    59  *
    59  *
    60  * <p>
    60  * <p>
    61  * You use the factory methods provided by this class to create
    61  * You use the factory methods provided by this class to create
    62  * instances of various types of break iterators. In particular,
    62  * instances of various types of break iterators. In particular,
    63  * use <code>getWordInstance</code>, <code>getLineInstance</code>,
    63  * use {@code getWordInstance}, {@code getLineInstance},
    64  * <code>getSentenceInstance</code>, and <code>getCharacterInstance</code>
    64  * {@code getSentenceInstance}, and {@code getCharacterInstance}
    65  * to create <code>BreakIterator</code>s that perform
    65  * to create {@code BreakIterator}s that perform
    66  * word, line, sentence, and character boundary analysis respectively.
    66  * word, line, sentence, and character boundary analysis respectively.
    67  * A single <code>BreakIterator</code> can work only on one unit
    67  * A single {@code BreakIterator} can work only on one unit
    68  * (word, line, sentence, and so on). You must use a different iterator
    68  * (word, line, sentence, and so on). You must use a different iterator
    69  * for each unit boundary analysis you wish to perform.
    69  * for each unit boundary analysis you wish to perform.
    70  *
    70  *
    71  * <p><a id="line"></a>
    71  * <p><a id="line"></a>
    72  * Line boundary analysis determines where a text string can be
    72  * Line boundary analysis determines where a text string can be
    98  * For example, an accented character might be stored as a base character
    98  * For example, an accented character might be stored as a base character
    99  * and a diacritical mark. What users consider to be a character can
    99  * and a diacritical mark. What users consider to be a character can
   100  * differ between languages.
   100  * differ between languages.
   101  *
   101  *
   102  * <p>
   102  * <p>
   103  * The <code>BreakIterator</code> instances returned by the factory methods
   103  * The {@code BreakIterator} instances returned by the factory methods
   104  * of this class are intended for use with natural languages only, not for
   104  * of this class are intended for use with natural languages only, not for
   105  * programming language text. It is however possible to define subclasses
   105  * programming language text. It is however possible to define subclasses
   106  * that tokenize a programming language.
   106  * that tokenize a programming language.
   107  *
   107  *
   108  * <P>
   108  * <P>
   272     public abstract int last();
   272     public abstract int last();
   273 
   273 
   274     /**
   274     /**
   275      * Returns the nth boundary from the current boundary. If either
   275      * Returns the nth boundary from the current boundary. If either
   276      * the first or last text boundary has been reached, it returns
   276      * the first or last text boundary has been reached, it returns
   277      * <code>BreakIterator.DONE</code> and the current position is set to either
   277      * {@code BreakIterator.DONE} and the current position is set to either
   278      * the first or last text boundary depending on which one is reached. Otherwise,
   278      * the first or last text boundary depending on which one is reached. Otherwise,
   279      * the iterator's current position is set to the new boundary.
   279      * the iterator's current position is set to the new boundary.
   280      * For example, if the iterator's current position is the mth text boundary
   280      * For example, if the iterator's current position is the mth text boundary
   281      * and three more boundaries exist from the current boundary to the last text
   281      * and three more boundaries exist from the current boundary to the last text
   282      * boundary, the next(2) call will return m + 2. The new text position is set
   282      * boundary, the next(2) call will return m + 2. The new text position is set
   283      * to the (m + 2)th text boundary. A next(4) call would return
   283      * to the (m + 2)th text boundary. A next(4) call would return
   284      * <code>BreakIterator.DONE</code> and the last text boundary would become the
   284      * {@code BreakIterator.DONE} and the last text boundary would become the
   285      * new text position.
   285      * new text position.
   286      * @param n which boundary to return.  A value of 0
   286      * @param n which boundary to return.  A value of 0
   287      * does nothing.  Negative values move to previous boundaries
   287      * does nothing.  Negative values move to previous boundaries
   288      * and positive values move to later boundaries.
   288      * and positive values move to later boundaries.
   289      * @return The character index of the nth boundary from the current position
   289      * @return The character index of the nth boundary from the current position
   290      * or <code>BreakIterator.DONE</code> if either first or last text boundary
   290      * or {@code BreakIterator.DONE} if either first or last text boundary
   291      * has been reached.
   291      * has been reached.
   292      */
   292      */
   293     public abstract int next(int n);
   293     public abstract int next(int n);
   294 
   294 
   295     /**
   295     /**
   296      * Returns the boundary following the current boundary. If the current boundary
   296      * Returns the boundary following the current boundary. If the current boundary
   297      * is the last text boundary, it returns <code>BreakIterator.DONE</code> and
   297      * is the last text boundary, it returns {@code BreakIterator.DONE} and
   298      * the iterator's current position is unchanged. Otherwise, the iterator's
   298      * the iterator's current position is unchanged. Otherwise, the iterator's
   299      * current position is set to the boundary following the current boundary.
   299      * current position is set to the boundary following the current boundary.
   300      * @return The character index of the next text boundary or
   300      * @return The character index of the next text boundary or
   301      * <code>BreakIterator.DONE</code> if the current boundary is the last text
   301      * {@code BreakIterator.DONE} if the current boundary is the last text
   302      * boundary.
   302      * boundary.
   303      * Equivalent to next(1).
   303      * Equivalent to next(1).
   304      * @see #next(int)
   304      * @see #next(int)
   305      */
   305      */
   306     public abstract int next();
   306     public abstract int next();
   307 
   307 
   308     /**
   308     /**
   309      * Returns the boundary preceding the current boundary. If the current boundary
   309      * Returns the boundary preceding the current boundary. If the current boundary
   310      * is the first text boundary, it returns <code>BreakIterator.DONE</code> and
   310      * is the first text boundary, it returns {@code BreakIterator.DONE} and
   311      * the iterator's current position is unchanged. Otherwise, the iterator's
   311      * the iterator's current position is unchanged. Otherwise, the iterator's
   312      * current position is set to the boundary preceding the current boundary.
   312      * current position is set to the boundary preceding the current boundary.
   313      * @return The character index of the previous text boundary or
   313      * @return The character index of the previous text boundary or
   314      * <code>BreakIterator.DONE</code> if the current boundary is the first text
   314      * {@code BreakIterator.DONE} if the current boundary is the first text
   315      * boundary.
   315      * boundary.
   316      */
   316      */
   317     public abstract int previous();
   317     public abstract int previous();
   318 
   318 
   319     /**
   319     /**
   320      * Returns the first boundary following the specified character offset. If the
   320      * Returns the first boundary following the specified character offset. If the
   321      * specified offset equals to the last text boundary, it returns
   321      * specified offset equals to the last text boundary, it returns
   322      * <code>BreakIterator.DONE</code> and the iterator's current position is unchanged.
   322      * {@code BreakIterator.DONE} and the iterator's current position is unchanged.
   323      * Otherwise, the iterator's current position is set to the returned boundary.
   323      * Otherwise, the iterator's current position is set to the returned boundary.
   324      * The value returned is always greater than the offset or the value
   324      * The value returned is always greater than the offset or the value
   325      * <code>BreakIterator.DONE</code>.
   325      * {@code BreakIterator.DONE}.
   326      * @param offset the character offset to begin scanning.
   326      * @param offset the character offset to begin scanning.
   327      * @return The first boundary after the specified offset or
   327      * @return The first boundary after the specified offset or
   328      * <code>BreakIterator.DONE</code> if the last text boundary is passed in
   328      * {@code BreakIterator.DONE} if the last text boundary is passed in
   329      * as the offset.
   329      * as the offset.
   330      * @throws     IllegalArgumentException if the specified offset is less than
   330      * @throws     IllegalArgumentException if the specified offset is less than
   331      * the first text boundary or greater than the last text boundary.
   331      * the first text boundary or greater than the last text boundary.
   332      */
   332      */
   333     public abstract int following(int offset);
   333     public abstract int following(int offset);
   334 
   334 
   335     /**
   335     /**
   336      * Returns the last boundary preceding the specified character offset. If the
   336      * Returns the last boundary preceding the specified character offset. If the
   337      * specified offset equals to the first text boundary, it returns
   337      * specified offset equals to the first text boundary, it returns
   338      * <code>BreakIterator.DONE</code> and the iterator's current position is unchanged.
   338      * {@code BreakIterator.DONE} and the iterator's current position is unchanged.
   339      * Otherwise, the iterator's current position is set to the returned boundary.
   339      * Otherwise, the iterator's current position is set to the returned boundary.
   340      * The value returned is always less than the offset or the value
   340      * The value returned is always less than the offset or the value
   341      * <code>BreakIterator.DONE</code>.
   341      * {@code BreakIterator.DONE}.
   342      * @param offset the character offset to begin scanning.
   342      * @param offset the character offset to begin scanning.
   343      * @return The last boundary before the specified offset or
   343      * @return The last boundary before the specified offset or
   344      * <code>BreakIterator.DONE</code> if the first text boundary is passed in
   344      * {@code BreakIterator.DONE} if the first text boundary is passed in
   345      * as the offset.
   345      * as the offset.
   346      * @throws      IllegalArgumentException if the specified offset is less than
   346      * @throws      IllegalArgumentException if the specified offset is less than
   347      * the first text boundary or greater than the last text boundary.
   347      * the first text boundary or greater than the last text boundary.
   348      * @since 1.2
   348      * @since 1.2
   349      */
   349      */
   359     }
   359     }
   360 
   360 
   361     /**
   361     /**
   362      * Returns true if the specified character offset is a text boundary.
   362      * Returns true if the specified character offset is a text boundary.
   363      * @param offset the character offset to check.
   363      * @param offset the character offset to check.
   364      * @return <code>true</code> if "offset" is a boundary position,
   364      * @return {@code true} if "offset" is a boundary position,
   365      * <code>false</code> otherwise.
   365      * {@code false} otherwise.
   366      * @throws      IllegalArgumentException if the specified offset is less than
   366      * @throws      IllegalArgumentException if the specified offset is less than
   367      * the first text boundary or greater than the last text boundary.
   367      * the first text boundary or greater than the last text boundary.
   368      * @since 1.2
   368      * @since 1.2
   369      */
   369      */
   370     public boolean isBoundary(int offset) {
   370     public boolean isBoundary(int offset) {
   388 
   388 
   389     /**
   389     /**
   390      * Returns character index of the text boundary that was most
   390      * Returns character index of the text boundary that was most
   391      * recently returned by next(), next(int), previous(), first(), last(),
   391      * recently returned by next(), next(int), previous(), first(), last(),
   392      * following(int) or preceding(int). If any of these methods returns
   392      * following(int) or preceding(int). If any of these methods returns
   393      * <code>BreakIterator.DONE</code> because either first or last text boundary
   393      * {@code BreakIterator.DONE} because either first or last text boundary
   394      * has been reached, it returns the first or last text boundary depending on
   394      * has been reached, it returns the first or last text boundary depending on
   395      * which one is reached.
   395      * which one is reached.
   396      * @return The text boundary returned from the above methods, first or last
   396      * @return The text boundary returned from the above methods, first or last
   397      * text boundary.
   397      * text boundary.
   398      * @see #next()
   398      * @see #next()
   435 
   435 
   436     @SuppressWarnings("unchecked")
   436     @SuppressWarnings("unchecked")
   437     private static final SoftReference<BreakIteratorCache>[] iterCache = (SoftReference<BreakIteratorCache>[]) new SoftReference<?>[4];
   437     private static final SoftReference<BreakIteratorCache>[] iterCache = (SoftReference<BreakIteratorCache>[]) new SoftReference<?>[4];
   438 
   438 
   439     /**
   439     /**
   440      * Returns a new <code>BreakIterator</code> instance
   440      * Returns a new {@code BreakIterator} instance
   441      * for <a href="BreakIterator.html#word">word breaks</a>
   441      * for <a href="BreakIterator.html#word">word breaks</a>
   442      * for the {@linkplain Locale#getDefault() default locale}.
   442      * for the {@linkplain Locale#getDefault() default locale}.
   443      * @return A break iterator for word breaks
   443      * @return A break iterator for word breaks
   444      */
   444      */
   445     public static BreakIterator getWordInstance()
   445     public static BreakIterator getWordInstance()
   446     {
   446     {
   447         return getWordInstance(Locale.getDefault());
   447         return getWordInstance(Locale.getDefault());
   448     }
   448     }
   449 
   449 
   450     /**
   450     /**
   451      * Returns a new <code>BreakIterator</code> instance
   451      * Returns a new {@code BreakIterator} instance
   452      * for <a href="BreakIterator.html#word">word breaks</a>
   452      * for <a href="BreakIterator.html#word">word breaks</a>
   453      * for the given locale.
   453      * for the given locale.
   454      * @param locale the desired locale
   454      * @param locale the desired locale
   455      * @return A break iterator for word breaks
   455      * @return A break iterator for word breaks
   456      * @throws    NullPointerException if <code>locale</code> is null
   456      * @throws    NullPointerException if {@code locale} is null
   457      */
   457      */
   458     public static BreakIterator getWordInstance(Locale locale)
   458     public static BreakIterator getWordInstance(Locale locale)
   459     {
   459     {
   460         return getBreakInstance(locale, WORD_INDEX);
   460         return getBreakInstance(locale, WORD_INDEX);
   461     }
   461     }
   462 
   462 
   463     /**
   463     /**
   464      * Returns a new <code>BreakIterator</code> instance
   464      * Returns a new {@code BreakIterator} instance
   465      * for <a href="BreakIterator.html#line">line breaks</a>
   465      * for <a href="BreakIterator.html#line">line breaks</a>
   466      * for the {@linkplain Locale#getDefault() default locale}.
   466      * for the {@linkplain Locale#getDefault() default locale}.
   467      * @return A break iterator for line breaks
   467      * @return A break iterator for line breaks
   468      */
   468      */
   469     public static BreakIterator getLineInstance()
   469     public static BreakIterator getLineInstance()
   470     {
   470     {
   471         return getLineInstance(Locale.getDefault());
   471         return getLineInstance(Locale.getDefault());
   472     }
   472     }
   473 
   473 
   474     /**
   474     /**
   475      * Returns a new <code>BreakIterator</code> instance
   475      * Returns a new {@code BreakIterator} instance
   476      * for <a href="BreakIterator.html#line">line breaks</a>
   476      * for <a href="BreakIterator.html#line">line breaks</a>
   477      * for the given locale.
   477      * for the given locale.
   478      * @param locale the desired locale
   478      * @param locale the desired locale
   479      * @return A break iterator for line breaks
   479      * @return A break iterator for line breaks
   480      * @throws    NullPointerException if <code>locale</code> is null
   480      * @throws    NullPointerException if {@code locale} is null
   481      */
   481      */
   482     public static BreakIterator getLineInstance(Locale locale)
   482     public static BreakIterator getLineInstance(Locale locale)
   483     {
   483     {
   484         return getBreakInstance(locale, LINE_INDEX);
   484         return getBreakInstance(locale, LINE_INDEX);
   485     }
   485     }
   486 
   486 
   487     /**
   487     /**
   488      * Returns a new <code>BreakIterator</code> instance
   488      * Returns a new {@code BreakIterator} instance
   489      * for <a href="BreakIterator.html#character">character breaks</a>
   489      * for <a href="BreakIterator.html#character">character breaks</a>
   490      * for the {@linkplain Locale#getDefault() default locale}.
   490      * for the {@linkplain Locale#getDefault() default locale}.
   491      * @return A break iterator for character breaks
   491      * @return A break iterator for character breaks
   492      */
   492      */
   493     public static BreakIterator getCharacterInstance()
   493     public static BreakIterator getCharacterInstance()
   494     {
   494     {
   495         return getCharacterInstance(Locale.getDefault());
   495         return getCharacterInstance(Locale.getDefault());
   496     }
   496     }
   497 
   497 
   498     /**
   498     /**
   499      * Returns a new <code>BreakIterator</code> instance
   499      * Returns a new {@code BreakIterator} instance
   500      * for <a href="BreakIterator.html#character">character breaks</a>
   500      * for <a href="BreakIterator.html#character">character breaks</a>
   501      * for the given locale.
   501      * for the given locale.
   502      * @param locale the desired locale
   502      * @param locale the desired locale
   503      * @return A break iterator for character breaks
   503      * @return A break iterator for character breaks
   504      * @throws    NullPointerException if <code>locale</code> is null
   504      * @throws    NullPointerException if {@code locale} is null
   505      */
   505      */
   506     public static BreakIterator getCharacterInstance(Locale locale)
   506     public static BreakIterator getCharacterInstance(Locale locale)
   507     {
   507     {
   508         return getBreakInstance(locale, CHARACTER_INDEX);
   508         return getBreakInstance(locale, CHARACTER_INDEX);
   509     }
   509     }
   510 
   510 
   511     /**
   511     /**
   512      * Returns a new <code>BreakIterator</code> instance
   512      * Returns a new {@code BreakIterator} instance
   513      * for <a href="BreakIterator.html#sentence">sentence breaks</a>
   513      * for <a href="BreakIterator.html#sentence">sentence breaks</a>
   514      * for the {@linkplain Locale#getDefault() default locale}.
   514      * for the {@linkplain Locale#getDefault() default locale}.
   515      * @return A break iterator for sentence breaks
   515      * @return A break iterator for sentence breaks
   516      */
   516      */
   517     public static BreakIterator getSentenceInstance()
   517     public static BreakIterator getSentenceInstance()
   518     {
   518     {
   519         return getSentenceInstance(Locale.getDefault());
   519         return getSentenceInstance(Locale.getDefault());
   520     }
   520     }
   521 
   521 
   522     /**
   522     /**
   523      * Returns a new <code>BreakIterator</code> instance
   523      * Returns a new {@code BreakIterator} instance
   524      * for <a href="BreakIterator.html#sentence">sentence breaks</a>
   524      * for <a href="BreakIterator.html#sentence">sentence breaks</a>
   525      * for the given locale.
   525      * for the given locale.
   526      * @param locale the desired locale
   526      * @param locale the desired locale
   527      * @return A break iterator for sentence breaks
   527      * @return A break iterator for sentence breaks
   528      * @throws    NullPointerException if <code>locale</code> is null
   528      * @throws    NullPointerException if {@code locale} is null
   529      */
   529      */
   530     public static BreakIterator getSentenceInstance(Locale locale)
   530     public static BreakIterator getSentenceInstance(Locale locale)
   531     {
   531     {
   532         return getBreakInstance(locale, SENTENCE_INDEX);
   532         return getBreakInstance(locale, SENTENCE_INDEX);
   533     }
   533     }
   578         return iterator;
   578         return iterator;
   579     }
   579     }
   580 
   580 
   581     /**
   581     /**
   582      * Returns an array of all locales for which the
   582      * Returns an array of all locales for which the
   583      * <code>get*Instance</code> methods of this class can return
   583      * {@code get*Instance} methods of this class can return
   584      * localized instances.
   584      * localized instances.
   585      * The returned array represents the union of locales supported by the Java
   585      * The returned array represents the union of locales supported by the Java
   586      * runtime and by installed
   586      * runtime and by installed
   587      * {@link java.text.spi.BreakIteratorProvider BreakIteratorProvider} implementations.
   587      * {@link java.text.spi.BreakIteratorProvider BreakIteratorProvider} implementations.
   588      * It must contain at least a <code>Locale</code>
   588      * It must contain at least a {@code Locale}
   589      * instance equal to {@link java.util.Locale#US Locale.US}.
   589      * instance equal to {@link java.util.Locale#US Locale.US}.
   590      *
   590      *
   591      * @return An array of locales for which localized
   591      * @return An array of locales for which localized
   592      *         <code>BreakIterator</code> instances are available.
   592      *         {@code BreakIterator} instances are available.
   593      */
   593      */
   594     public static synchronized Locale[] getAvailableLocales()
   594     public static synchronized Locale[] getAvailableLocales()
   595     {
   595     {
   596         LocaleServiceProviderPool pool =
   596         LocaleServiceProviderPool pool =
   597             LocaleServiceProviderPool.getPool(BreakIteratorProvider.class);
   597             LocaleServiceProviderPool.getPool(BreakIteratorProvider.class);