jdk/test/java/util/regex/POSIX_Unicode.java
author martin
Tue, 15 Sep 2015 21:56:04 -0700
changeset 32649 2ee9017c7597
parent 23010 6dadb192ad81
permissions -rw-r--r--
8136583: Core libraries should use blessed modifier order Summary: Run blessed-modifier-order script (see bug) Reviewed-by: psandoz, chegar, alanb, plevart
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
     1
/*
23010
6dadb192ad81 8029235: Update copyright year to match last edit in jdk8 jdk repository for 2013
lana
parents: 17434
diff changeset
     2
 * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
     3
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
     4
 *
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
     5
 * This code is free software; you can redistribute it and/or modify it
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
     6
 * under the terms of the GNU General Public License version 2 only, as
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
     7
 * published by the Free Software Foundation.
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
     8
 *
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
     9
 * This code is distributed in the hope that it will be useful, but WITHOUT
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    10
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    11
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    12
 * version 2 for more details (a copy is included in the LICENSE file that
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    13
 * accompanied this code).
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    14
 *
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    15
 * You should have received a copy of the GNU General Public License version
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    16
 * 2 along with this work; if not, write to the Free Software Foundation,
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    17
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    18
 *
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    19
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    20
 * or visit www.oracle.com if you need additional information or have any
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    21
 * questions.
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    22
 */
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    23
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    24
import java.util.HashMap;
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    25
import java.util.Locale;
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    26
32649
2ee9017c7597 8136583: Core libraries should use blessed modifier order
martin
parents: 23010
diff changeset
    27
public final class POSIX_Unicode {
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    28
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    29
    public static boolean isAlpha(int ch) {
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    30
        return Character.isAlphabetic(ch);
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    31
    }
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    32
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    33
    public static boolean isLower(int ch) {
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    34
        return Character.isLowerCase(ch);
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    35
    }
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    36
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    37
    public static boolean isUpper(int ch) {
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    38
        return Character.isUpperCase(ch);
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    39
    }
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    40
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    41
    // \p{Whitespace}
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    42
    public static boolean isSpace(int ch) {
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    43
        return ((((1 << Character.SPACE_SEPARATOR) |
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    44
                  (1 << Character.LINE_SEPARATOR) |
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    45
                  (1 << Character.PARAGRAPH_SEPARATOR)) >> Character.getType(ch)) & 1)
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    46
                   != 0 ||
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    47
               (ch >= 0x9 && ch <= 0xd) ||
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    48
               (ch == 0x85);
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    49
    }
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    50
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    51
    // \p{gc=Control}
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    52
    public static boolean isCntrl(int ch) {
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    53
        return Character.getType(ch) == Character.CONTROL;
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    54
    }
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    55
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    56
    // \p{gc=Punctuation}
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    57
    public static boolean isPunct(int ch) {
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    58
        return ((((1 << Character.CONNECTOR_PUNCTUATION) |
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    59
                  (1 << Character.DASH_PUNCTUATION) |
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    60
                  (1 << Character.START_PUNCTUATION) |
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    61
                  (1 << Character.END_PUNCTUATION) |
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    62
                  (1 << Character.OTHER_PUNCTUATION) |
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    63
                  (1 << Character.INITIAL_QUOTE_PUNCTUATION) |
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    64
                  (1 << Character.FINAL_QUOTE_PUNCTUATION)) >> Character.getType(ch)) & 1)
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    65
              != 0;
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    66
    }
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    67
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    68
    // \p{gc=Decimal_Number}
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    69
    // \p{Hex_Digit}    -> PropList.txt: Hex_Digit
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    70
    public static boolean isHexDigit(int ch) {
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    71
        return Character.isDigit(ch) ||
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    72
               (ch >= 0x0030 && ch <= 0x0039) ||
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    73
               (ch >= 0x0041 && ch <= 0x0046) ||
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    74
               (ch >= 0x0061 && ch <= 0x0066) ||
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    75
               (ch >= 0xFF10 && ch <= 0xFF19) ||
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    76
               (ch >= 0xFF21 && ch <= 0xFF26) ||
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    77
               (ch >= 0xFF41 && ch <= 0xFF46);
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    78
    }
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    79
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    80
    // \p{gc=Decimal_Number}
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    81
    public static boolean isDigit(int ch) {
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    82
        return Character.isDigit(ch);
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    83
    };
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    84
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    85
    // \p{alpha}
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    86
    // \p{digit}
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    87
    public static boolean isAlnum(int ch) {
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    88
        return Character.isAlphabetic(ch) || Character.isDigit(ch);
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    89
    }
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    90
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    91
    // \p{Whitespace} --
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    92
    // [\N{LF} \N{VT} \N{FF} \N{CR} \N{NEL}  -> 0xa, 0xb, 0xc, 0xd, 0x85
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    93
    //  \p{gc=Line_Separator}
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    94
    //  \p{gc=Paragraph_Separator}]
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    95
    public static boolean isBlank(int ch) {
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    96
        int type = Character.getType(ch);
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    97
        return isSpace(ch) &&
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    98
               ch != 0xa & ch != 0xb && ch !=0xc && ch != 0xd && ch != 0x85 &&
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
    99
               type != Character.LINE_SEPARATOR &&
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   100
               type != Character.PARAGRAPH_SEPARATOR;
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   101
    }
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   102
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   103
    // [^
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   104
    //  \p{space}
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   105
    //  \p{gc=Control}
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   106
    //  \p{gc=Surrogate}
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   107
    //  \p{gc=Unassigned}]
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   108
    public static boolean isGraph(int ch) {
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   109
        int type = Character.getType(ch);
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   110
        return !(isSpace(ch) ||
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   111
                 Character.CONTROL == type ||
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   112
                 Character.SURROGATE == type ||
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   113
                 Character.UNASSIGNED == type);
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   114
    }
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   115
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   116
    // \p{graph}
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   117
    // \p{blank}
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   118
    // -- \p{cntrl}
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   119
    public static boolean isPrint(int ch) {
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   120
        return (isGraph(ch) || isBlank(ch)) && !isCntrl(ch);
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   121
    }
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   122
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   123
    // PropList.txt:Noncharacter_Code_Point
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   124
    public static boolean isNoncharacterCodePoint(int ch) {
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   125
        return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   126
    }
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   127
17434
4a04d7127e80 8013252: Regex Matcher .start and .end should be accessible by group name
sherman
parents: 9536
diff changeset
   128
    public static boolean isJoinControl(int ch) {
4a04d7127e80 8013252: Regex Matcher .start and .end should be accessible by group name
sherman
parents: 9536
diff changeset
   129
        return (ch == 0x200C || ch == 0x200D);
4a04d7127e80 8013252: Regex Matcher .start and .end should be accessible by group name
sherman
parents: 9536
diff changeset
   130
    }
4a04d7127e80 8013252: Regex Matcher .start and .end should be accessible by group name
sherman
parents: 9536
diff changeset
   131
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   132
    //  \p{alpha}
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   133
    //  \p{gc=Mark}
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   134
    //  \p{digit}
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   135
    //  \p{gc=Connector_Punctuation}
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   136
    public static boolean isWord(int ch) {
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   137
        return isAlpha(ch) ||
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   138
               ((((1 << Character.NON_SPACING_MARK) |
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   139
                  (1 << Character.ENCLOSING_MARK) |
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   140
                  (1 << Character.COMBINING_SPACING_MARK) |
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   141
                  (1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   142
               != 0 ||
17434
4a04d7127e80 8013252: Regex Matcher .start and .end should be accessible by group name
sherman
parents: 9536
diff changeset
   143
               isDigit(ch) ||
4a04d7127e80 8013252: Regex Matcher .start and .end should be accessible by group name
sherman
parents: 9536
diff changeset
   144
               isJoinControl(ch);
9536
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   145
    }
648c9add2a74 7039066: j.u.rgex does not match TR18 RL1.4 Simple Word Boundaries and RL1.2 Properties
sherman
parents:
diff changeset
   146
}