# HG changeset patch # User okutsu # Date 1292573623 -32400 # Node ID 50fa5dbf83e62ee247d115614e38f18da336f195 # Parent f66de3eb25f0b9f16b6187df54ec6100413895db 6644493: [Fmt-Da] SimpleDateFormat parsing RFC822 time offset is slow Reviewed-by: peytoia diff -r f66de3eb25f0 -r 50fa5dbf83e6 jdk/src/share/classes/java/text/SimpleDateFormat.java --- a/jdk/src/share/classes/java/text/SimpleDateFormat.java Fri Dec 17 16:56:13 2010 +0900 +++ b/jdk/src/share/classes/java/text/SimpleDateFormat.java Fri Dec 17 17:13:43 2010 +0900 @@ -1663,6 +1663,81 @@ } /** + * Parses numeric forms of time zone offset, such as "hh:mm", and + * sets calb to the parsed value. + * + * @param text the text to be parsed + * @param start the character position to start parsing + * @param sign 1: positive; -1: negative + * @param count 0: 'Z' or "GMT+hh:mm" parsing; 1 - 3: the number of 'X's + * @param colon true - colon required between hh and mm; false - no colon required + * @param calb a CalendarBuilder in which the parsed value is stored + * @return updated parsed position, or its negative value to indicate a parsing error + */ + private int subParseNumericZone(String text, int start, int sign, int count, + boolean colon, CalendarBuilder calb) { + int index = start; + + parse: + try { + char c = text.charAt(index++); + // Parse hh + int hours; + if (!isDigit(c)) { + break parse; + } + hours = c - '0'; + c = text.charAt(index++); + if (isDigit(c)) { + hours = hours * 10 + (c - '0'); + } else { + // If no colon in RFC 822 or 'X' (ISO), two digits are + // required. + if (count > 0 || !colon) { + break parse; + } + --index; + } + if (hours > 23) { + break parse; + } + int minutes = 0; + if (count != 1) { + // Proceed with parsing mm + c = text.charAt(index++); + if (colon) { + if (c != ':') { + break parse; + } + c = text.charAt(index++); + } + if (!isDigit(c)) { + break parse; + } + minutes = c - '0'; + c = text.charAt(index++); + if (!isDigit(c)) { + break parse; + } + minutes = minutes * 10 + (c - '0'); + if (minutes > 59) { + break parse; + } + } + minutes += hours * 60; + calb.set(Calendar.ZONE_OFFSET, minutes * MILLIS_PER_MINUTE * sign) + .set(Calendar.DST_OFFSET, 0); + return index; + } catch (IndexOutOfBoundsException e) { + } + return 1 - index; // -(index - 1) + } + + private boolean isDigit(char c) { + return c >= '0' && c <= '9'; + } + + /** * Private member function that converts the parsed date strings into * timeFields. Returns -start (for ParsePosition) if failed. * @param text the time text to be parsed. @@ -1907,248 +1982,95 @@ case PATTERN_ZONE_NAME: // 'z' case PATTERN_ZONE_VALUE: // 'Z' - // First try to parse generic forms such as GMT-07:00. Do this first - // in case localized TimeZoneNames contains the string "GMT" - // for a zone; in that case, we don't want to match the first three - // characters of GMT+/-hh:mm etc. { int sign = 0; - int offset; - - // For time zones that have no known names, look for strings - // of the form: - // GMT[+-]hours:minutes or - // GMT. - if ((text.length() - start) >= GMT.length() && - text.regionMatches(true, start, GMT, 0, GMT.length())) { - int num; - calb.set(Calendar.DST_OFFSET, 0); - pos.index = start + GMT.length(); - - try { // try-catch for "GMT" only time zone string - char c = text.charAt(pos.index); - if (c == '+') { - sign = 1; - } else if (c == '-') { - sign = -1; - } + try { + char c = text.charAt(pos.index); + if (c == '+') { + sign = 1; + } else if (c == '-') { + sign = -1; } - catch(StringIndexOutOfBoundsException e) {} - - if (sign == 0) { /* "GMT" without offset */ - calb.set(Calendar.ZONE_OFFSET, 0); - return pos.index; - } + if (sign == 0) { + // Try parsing a custom time zone "GMT+hh:mm" or "GMT". + if ((c == 'G' || c == 'g') + && (text.length() - start) >= GMT.length() + && text.regionMatches(true, start, GMT, 0, GMT.length())) { + pos.index = start + GMT.length(); - // Look for hours. - try { - char c = text.charAt(++pos.index); - if (c < '0' || c > '9') { /* must be from '0' to '9'. */ - break parsing; - } - num = c - '0'; - - if (text.charAt(++pos.index) != ':') { - c = text.charAt(pos.index); - if (c < '0' || c > '9') { /* must be from '0' to '9'. */ - break parsing; + if ((text.length() - pos.index) > 0) { + c = text.charAt(pos.index); + if (c == '+') { + sign = 1; + } else if (c == '-') { + sign = -1; + } } - num *= 10; - num += c - '0'; - pos.index++; - } - if (num > 23) { - --pos.index; - break parsing; - } - if (text.charAt(pos.index) != ':') { - break parsing; - } - // Look for minutes. - offset = num * 60; - c = text.charAt(++pos.index); - if (c < '0' || c > '9') { /* must be from '0' to '9'. */ - break parsing; - } - num = c - '0'; - c = text.charAt(++pos.index); - if (c < '0' || c > '9') { /* must be from '0' to '9'. */ - break parsing; - } - num *= 10; - num += c - '0'; + if (sign == 0) { /* "GMT" without offset */ + calb.set(Calendar.ZONE_OFFSET, 0) + .set(Calendar.DST_OFFSET, 0); + return pos.index; + } - if (num > 59) { - break parsing; - } - } catch (StringIndexOutOfBoundsException e) { - break parsing; - } - offset += num; - // Fall through for final processing below of 'offset' and 'sign'. - } else { - // If the first character is a sign, look for numeric timezones of - // the form [+-]hhmm as specified by RFC 822. Otherwise, check - // for named time zones by looking through the locale data from - // the TimeZoneNames strings. - try { - char c = text.charAt(pos.index); - if (c == '+') { - sign = 1; - } else if (c == '-') { - sign = -1; - } else { - // Try parsing the text as a time zone name (abbr). - int i = subParseZoneString(text, pos.index, calb); - if (i != 0) { + // Parse the rest as "hh:mm" + int i = subParseNumericZone(text, ++pos.index, + sign, 0, true, calb); + if (i > 0) { return i; } - break parsing; - } - - // Parse the text as an RFC 822 time zone string. This code is - // actually a little more permissive than RFC 822. It will - // try to do its best with numbers that aren't strictly 4 - // digits long. - - // Look for hh. - int hours = 0; - c = text.charAt(++pos.index); - if (c < '0' || c > '9') { /* must be from '0' to '9'. */ - break parsing; - } - hours = c - '0'; - c = text.charAt(++pos.index); - if (c < '0' || c > '9') { /* must be from '0' to '9'. */ - break parsing; - } - hours *= 10; - hours += c - '0'; - - if (hours > 23) { - break parsing; + pos.index = -i; + } else { + // Try parsing the text as a time zone + // name or abbreviation. + int i = subParseZoneString(text, pos.index, calb); + if (i > 0) { + return i; + } + pos.index = -i; } - - // Look for mm. - int minutes = 0; - c = text.charAt(++pos.index); - if (c < '0' || c > '9') { /* must be from '0' to '9'. */ - break parsing; - } - minutes = c - '0'; - c = text.charAt(++pos.index); - if (c < '0' || c > '9') { /* must be from '0' to '9'. */ - break parsing; + } else { + // Parse the rest as "hhmm" (RFC 822) + int i = subParseNumericZone(text, ++pos.index, + sign, 0, false, calb); + if (i > 0) { + return i; } - minutes *= 10; - minutes += c - '0'; - - if (minutes > 59) { - break parsing; - } - - offset = hours * 60 + minutes; - } catch (StringIndexOutOfBoundsException e) { - break parsing; + pos.index = -i; } - } - - // Do the final processing for both of the above cases. We only - // arrive here if the form GMT+/-... or an RFC 822 form was seen. - if (sign != 0) { - offset *= MILLIS_PER_MINUTE * sign; - calb.set(Calendar.ZONE_OFFSET, offset).set(Calendar.DST_OFFSET, 0); - return ++pos.index; + } catch (IndexOutOfBoundsException e) { } } break parsing; case PATTERN_ISO_ZONE: // 'X' { - int sign = 0; - int offset = 0; - - iso8601: { - try { - char c = text.charAt(pos.index); - if (c == 'Z') { - calb.set(Calendar.ZONE_OFFSET, 0).set(Calendar.DST_OFFSET, 0); - return ++pos.index; - } - - // parse text as "+/-hh[[:]mm]" based on count - if (c == '+') { - sign = 1; - } else if (c == '-') { - sign = -1; - } - // Look for hh. - int hours = 0; - c = text.charAt(++pos.index); - if (c < '0' || c > '9') { /* must be from '0' to '9'. */ - break parsing; - } - hours = c - '0'; - c = text.charAt(++pos.index); - if (c < '0' || c > '9') { /* must be from '0' to '9'. */ - break parsing; - } - hours *= 10; - hours += c - '0'; - if (hours > 23) { - break parsing; - } + if ((text.length() - pos.index) <= 0) { + break parsing; + } - if (count == 1) { // "X" - offset = hours * 60; - break iso8601; - } - - c = text.charAt(++pos.index); - // Skip ':' if "XXX" - if (c == ':') { - if (count == 2) { - break parsing; - } - c = text.charAt(++pos.index); - } else { - if (count == 3) { - // missing ':' - break parsing; - } - } - - // Look for mm. - int minutes = 0; - if (c < '0' || c > '9') { /* must be from '0' to '9'. */ - break parsing; - } - minutes = c - '0'; - c = text.charAt(++pos.index); - if (c < '0' || c > '9') { /* must be from '0' to '9'. */ - break parsing; - } - minutes *= 10; - minutes += c - '0'; - - if (minutes > 59) { - break parsing; - } - - offset = hours * 60 + minutes; - } catch (StringIndexOutOfBoundsException e) { - break parsing; - } + int sign = 0; + char c = text.charAt(pos.index); + if (c == 'Z') { + calb.set(Calendar.ZONE_OFFSET, 0).set(Calendar.DST_OFFSET, 0); + return ++pos.index; } - // Do the final processing for both of the above cases. We only - // arrive here if the form GMT+/-... or an RFC 822 form was seen. - if (sign != 0) { - offset *= MILLIS_PER_MINUTE * sign; - calb.set(Calendar.ZONE_OFFSET, offset).set(Calendar.DST_OFFSET, 0); - return ++pos.index; + // parse text as "+/-hh[[:]mm]" based on count + if (c == '+') { + sign = 1; + } else if (c == '-') { + sign = -1; + } else { + ++pos.index; + break parsing; } + int i = subParseNumericZone(text, ++pos.index, sign, count, + count == 3, calb); + if (i > 0) { + return i; + } + pos.index = -i; } break parsing; diff -r f66de3eb25f0 -r 50fa5dbf83e6 jdk/test/java/text/Format/DateFormat/ISO8601ZoneTest.java --- a/jdk/test/java/text/Format/DateFormat/ISO8601ZoneTest.java Fri Dec 17 16:56:13 2010 +0900 +++ b/jdk/test/java/text/Format/DateFormat/ISO8601ZoneTest.java Fri Dec 17 17:13:43 2010 +0900 @@ -60,48 +60,51 @@ "yyyy-MM-dd'T'HH:mm:ss.SSSXXX", }; + // badData[][0] - format + // badData[][1] - (bad) text to be parsed + // badData[][2] - subtext at the end of which a parse error is detected static final String[][] badData = { - { "X", "1" }, - { "X", "+1" }, - { "X", "-2" }, - { "X", "-24" }, - { "X", "+24" }, + { "X", "1", "1" }, + { "X", "+1", "+1" }, + { "X", "-2", "-2" }, + { "X", "-24", "-2" }, + { "X", "+24", "+2" }, - { "XX", "9" }, - { "XX", "23" }, - { "XX", "234" }, - { "XX", "3456" }, - { "XX", "23456" }, - { "XX", "+1" }, - { "XX", "-12" }, - { "XX", "+123" }, - { "XX", "-12:34" }, - { "XX", "+12:34" }, - { "XX", "-2423" }, - { "XX", "+2423" }, - { "XX", "-1260" }, - { "XX", "+1260" }, + { "XX", "9", "9" }, + { "XX", "23", "2" }, + { "XX", "234", "2" }, + { "XX", "3456", "3" }, + { "XX", "23456", "2" }, + { "XX", "+1", "+1" }, + { "XX", "-12", "-12" }, + { "XX", "+123", "+123" }, + { "XX", "-12:34", "-12" }, + { "XX", "+12:34", "+12" }, + { "XX", "-2423", "-2" }, + { "XX", "+2423", "+2" }, + { "XX", "-1260", "-126" }, + { "XX", "+1260", "+126" }, - { "XXX", "9" }, - { "XXX", "23" }, - { "XXX", "234" }, - { "XXX", "3456" }, - { "XXX", "23456" }, - { "XXX", "2:34" }, - { "XXX", "12:4" }, - { "XXX", "12:34" }, - { "XXX", "-1" }, - { "XXX", "+1" }, - { "XXX", "-12" }, - { "XXX", "+12" }, - { "XXX", "-123" }, - { "XXX", "+123" }, - { "XXX", "-1234" }, - { "XXX", "+1234" }, - { "XXX", "+24:23" }, - { "XXX", "+12:60" }, - { "XXX", "+1:23" }, - { "XXX", "+12:3" }, + { "XXX", "9", "9" }, + { "XXX", "23", "2" }, + { "XXX", "234", "2" }, + { "XXX", "3456", "3" }, + { "XXX", "23456", "2" }, + { "XXX", "2:34", "2" }, + { "XXX", "12:4", "1" }, + { "XXX", "12:34", "1" }, + { "XXX", "-1", "-1" }, + { "XXX", "+1", "+1" }, + { "XXX", "-12", "-12" }, + { "XXX", "+12", "+12" }, + { "XXX", "-123", "-12" }, + { "XXX", "+123", "+12" }, + { "XXX", "-1234", "-12" }, + { "XXX", "+1234", "+12" }, + { "XXX", "+24:23", "+2" }, + { "XXX", "+12:60", "+12:6" }, + { "XXX", "+1:23", "+1" }, + { "XXX", "+12:3", "+12:3" }, }; static String[] badFormats = { @@ -110,6 +113,8 @@ public static void main(String[] args) throws Exception { TimeZone tz = TimeZone.getDefault(); + Locale loc = Locale.getDefault(); + Locale.setDefault(Locale.US); try { for (int i = 0; i < formatData.length; i++) { @@ -128,7 +133,7 @@ } for (String[] d : badData) { - badDataParsing(d[0], d[1]); + badDataParsing(d[0], d[1], d[2].length()); } for (String fmt : badFormats) { @@ -136,6 +141,7 @@ } } finally { TimeZone.setDefault(tz); + Locale.setDefault(loc); } } @@ -188,15 +194,24 @@ } - static void badDataParsing(String fmt, String text) { + static void badDataParsing(String fmt, String text, int expectedErrorIndex) { + SimpleDateFormat sdf = new SimpleDateFormat(fmt); try { - SimpleDateFormat sdf = new SimpleDateFormat(fmt); sdf.parse(text); throw new RuntimeException("didn't throw an exception: fmt=" + fmt + ", text=" + text); } catch (ParseException e) { // OK } + + ParsePosition pos = new ParsePosition(0); + Date d = sdf.parse(text, pos); + int errorIndex = pos.getErrorIndex(); + if (d != null || errorIndex != expectedErrorIndex) { + throw new RuntimeException("Bad error index=" + errorIndex + + ", expected=" + expectedErrorIndex + + ", fmt=" + fmt + ", text=" + text); + } } static void badFormat(String fmt) {