# HG changeset patch # User sundar # Date 1369912786 -19800 # Node ID 931209786d11a21ce53ddfd73f9f524858154773 # Parent d139023165ed524038c2e503509f69461f48631a 8015353: Date.parse illegal string parsing issues Reviewed-by: jlaskey, lagergren diff -r d139023165ed -r 931209786d11 nashorn/src/jdk/nashorn/internal/objects/DateParser.java --- a/nashorn/src/jdk/nashorn/internal/objects/DateParser.java Wed May 29 16:23:50 2013 -0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,706 +0,0 @@ -/* - * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -package jdk.nashorn.internal.objects; - -import static java.lang.Character.DECIMAL_DIGIT_NUMBER; -import static java.lang.Character.LOWERCASE_LETTER; -import static java.lang.Character.OTHER_PUNCTUATION; -import static java.lang.Character.SPACE_SEPARATOR; -import static java.lang.Character.UPPERCASE_LETTER; - -import java.util.HashMap; -import java.util.Locale; - -/** - * JavaScript date parser. This class first tries to parse a date string - * according to the extended ISO 8601 format specified in ES5 15.9.1.15. - * If that fails, it falls back to legacy mode in which it accepts a range - * of different formats. - * - *

This class is neither thread-safe nor reusable. Calling the - * parse() method more than once will yield undefined results.

- */ -public class DateParser { - - /** Constant for index position of parsed year value. */ - public final static int YEAR = 0; - /** Constant for index position of parsed month value. */ - public final static int MONTH = 1; - /** Constant for index position of parsed day value. */ - public final static int DAY = 2; - /** Constant for index position of parsed hour value. */ - public final static int HOUR = 3; - /** Constant for index position of parsed minute value. */ - public final static int MINUTE = 4; - /** Constant for index position of parsed second value. */ - public final static int SECOND = 5; - /** Constant for index position of parsed millisecond value. */ - public final static int MILLISECOND = 6; - /** Constant for index position of parsed time zone offset value. */ - public final static int TIMEZONE = 7; - - private enum Token { - UNKNOWN, NUMBER, SEPARATOR, PARENTHESIS, NAME, SIGN, END - } - - private final String string; - private final int length; - private final Integer[] fields; - private int pos = 0; - private Token token; - private int tokenLength; - private Name nameValue; - private int numValue; - private int currentField = YEAR; - private int yearSign = 0; - private boolean namedMonth = false; - - private final static HashMap names = new HashMap<>(); - - static { - addName("monday", Name.DAY_OF_WEEK, 0); - addName("tuesday", Name.DAY_OF_WEEK, 0); - addName("wednesday", Name.DAY_OF_WEEK, 0); - addName("thursday", Name.DAY_OF_WEEK, 0); - addName("friday", Name.DAY_OF_WEEK, 0); - addName("saturday", Name.DAY_OF_WEEK, 0); - addName("sunday", Name.DAY_OF_WEEK, 0); - addName("january", Name.MONTH_NAME, 1); - addName("february", Name.MONTH_NAME, 2); - addName("march", Name.MONTH_NAME, 3); - addName("april", Name.MONTH_NAME, 4); - addName("may", Name.MONTH_NAME, 5); - addName("june", Name.MONTH_NAME, 6); - addName("july", Name.MONTH_NAME, 7); - addName("august", Name.MONTH_NAME, 8); - addName("september", Name.MONTH_NAME, 9); - addName("october", Name.MONTH_NAME, 10); - addName("november", Name.MONTH_NAME, 11); - addName("december", Name.MONTH_NAME, 12); - addName("am", Name.AM_PM, 0); - addName("pm", Name.AM_PM, 12); - addName("z", Name.TIMEZONE_ID, 0); - addName("gmt", Name.TIMEZONE_ID, 0); - addName("ut", Name.TIMEZONE_ID, 0); - addName("utc", Name.TIMEZONE_ID, 0); - addName("est", Name.TIMEZONE_ID, -5 * 60); - addName("edt", Name.TIMEZONE_ID, -4 * 60); - addName("cst", Name.TIMEZONE_ID, -6 * 60); - addName("cdt", Name.TIMEZONE_ID, -5 * 60); - addName("mst", Name.TIMEZONE_ID, -7 * 60); - addName("mdt", Name.TIMEZONE_ID, -6 * 60); - addName("pst", Name.TIMEZONE_ID, -8 * 60); - addName("pdt", Name.TIMEZONE_ID, -7 * 60); - addName("t", Name.TIME_SEPARATOR, 0); - } - - /** - * Construct a new DateParser instance for parsing the given string. - * @param string the string to be parsed - */ - public DateParser(final String string) { - this.string = string; - this.length = string.length(); - this.fields = new Integer[TIMEZONE + 1]; - } - - /** - * Try parsing the given string as date according to the extended ISO 8601 format - * specified in ES5 15.9.1.15. Fall back to legacy mode if that fails. - * This method returns true if the string could be parsed. - * @return true if the string could be parsed as date - */ - public boolean parse() { - return parseEcmaDate() || parseLegacyDate(); - } - - /** - * Try parsing the date string according to the rules laid out in ES5 15.9.1.15. - * The date string must conform to the following format: - * - *
  [('-'|'+')yy]yyyy[-MM[-dd]][hh:mm[:ss[.sss]][Z|(+|-)hh:mm]] 
- * - *

If the string does not contain a time zone offset, the TIMEZONE field - * is set to 0 (GMT).

- * @return true if string represents a valid ES5 date string. - */ - public boolean parseEcmaDate() { - - if (token == null) { - token = next(); - } - - while (token != Token.END) { - - switch (token) { - case NUMBER: - if (currentField == YEAR && yearSign != 0) { - // 15.9.1.15.1 Extended year must have six digits - if (tokenLength != 6) { - return false; - } - numValue *= yearSign; - } else if (!checkEcmaField(currentField, numValue)) { - return false; - } - if (!skipEcmaDelimiter()) { - return false; - } - if (currentField < TIMEZONE) { - set(currentField++, numValue); - } - break; - - case NAME: - if (nameValue == null) { - return false; - } - switch (nameValue.type) { - case Name.TIME_SEPARATOR: - if (currentField == YEAR || currentField > HOUR) { - return false; - } - currentField = HOUR; - break; - case Name.TIMEZONE_ID: - if (!nameValue.key.equals("z") || !setTimezone(nameValue.value, false)) { - return false; - } - break; - default: - return false; - } - break; - - case SIGN: - if (currentField == YEAR) { - yearSign = numValue; - } else if (currentField < SECOND || !setTimezone(readTimeZoneOffset(), true)) { - // Note: Spidermonkey won't parse timezone unless time includes seconds and milliseconds - return false; - } - break; - - default: - return false; - } - token = next(); - } - - return patchResult(true); - } - - /** - * Try parsing the date using a fuzzy algorithm that can handle a variety of formats. - * - *

Numbers separated by ':' are treated as time values, optionally followed by a - * millisecond value separated by '.'. Other number values are treated as date values. - * The exact sequence of day, month, and year values to apply is determined heuristically.

- * - *

English month names and selected time zone names as well as AM/PM markers are recognized - * and handled properly. Additionally, numeric time zone offsets such as (+|-)hh:mm or - * (+|-)hhmm are recognized. If the string does not contain a time zone offset - * the TIMEZONEfield is left undefined, meaning the local time zone should be applied.

- * - *

English weekday names are recognized but ignored. All text in parentheses is ignored as well. - * All other text causes parsing to fail.

- * - * @return true if the string could be parsed - */ - public boolean parseLegacyDate() { - - if (yearSign != 0 || currentField > DAY) { - // we don't support signed years in legacy mode - return false; - } - if (token == null) { - token = next(); - } - - while (token != Token.END) { - - switch (token) { - case NUMBER: - if (skip(':')) { - // A number followed by ':' is parsed as time - if (!setTimeField(numValue)) { - return false; - } - // consume remaining time tokens - do { - token = next(); - if (token != Token.NUMBER || !setTimeField(numValue)) { - return false; - } - } while (skip(isSet(SECOND) ? '.' : ':')); - - } else { - // Parse as date token - if (!setDateField(numValue)) { - return false; - } - skip('-'); - } - break; - - case NAME: - if (nameValue == null) { - return false; - } - switch (nameValue.type) { - case Name.AM_PM: - if (!setAmPm(nameValue.value)) { - return false; - } - break; - case Name.MONTH_NAME: - if (!setMonth(nameValue.value)) { - return false; - } - break; - case Name.TIMEZONE_ID: - if (!setTimezone(nameValue.value, false)) { - return false; - } - break; - case Name.TIME_SEPARATOR: - return false; - default: - break; - } - if (nameValue.type != Name.TIMEZONE_ID) { - skip('-'); - } - break; - - case SIGN: - if (!setTimezone(readTimeZoneOffset(), true)) { - return false; - } - break; - - case PARENTHESIS: - if (!skipParentheses()) { - return false; - } - break; - - case SEPARATOR: - break; - - default: - return false; - } - token = next(); - } - - return patchResult(false); - } - - /** - * Get the parsed date and time fields as an array of Integers. - * - *

If parsing was successful, all fields are guaranteed to be set except for the - * TIMEZONE field which may be null, meaning that local time zone - * offset should be applied.

- * - * @return the parsed date fields - */ - public Integer[] getDateFields() { - return fields; - } - - private boolean isSet(final int field) { - return fields[field] != null; - } - - private Integer get(final int field) { - return fields[field]; - } - - private void set(final int field, final int value) { - fields[field] = value; - } - - private int peek() { - return pos < length ? string.charAt(pos) : -1; - } - - private boolean skip(final char c) { - if (pos < length && string.charAt(pos) == c) { - token = null; - pos++; - return true; - } - return false; - } - - private Token next() { - if (pos >= length) { - tokenLength = 0; - return Token.END; - } - - final char c = string.charAt(pos); - - if (c > 0x80) { - tokenLength = 1; - pos++; - return Token.UNKNOWN; // We only deal with ASCII here - } - - final int type = Character.getType(c); - switch (type) { - case DECIMAL_DIGIT_NUMBER: - numValue = readNumber(6); - return Token.NUMBER; - case SPACE_SEPARATOR : - case OTHER_PUNCTUATION: - tokenLength = 1; - pos++; - return Token.SEPARATOR; - case UPPERCASE_LETTER: - case LOWERCASE_LETTER: - nameValue = readName(); - return Token.NAME; - default: - tokenLength = 1; - pos++; - switch (c) { - case '(': - return Token.PARENTHESIS; - case '-': - case '+': - numValue = c == '-' ? -1 : 1; - return Token.SIGN; - default: - return Token.UNKNOWN; - } - } - } - - private static boolean checkLegacyField(final int field, final int value) { - switch (field) { - case HOUR: - return isHour(value); - case MINUTE: - case SECOND: - return isMinuteOrSecond(value); - case MILLISECOND: - return isMillisecond(value); - default: - // skip validation on other legacy fields as we don't know what's what - return true; - } - } - - private boolean checkEcmaField(final int field, final int value) { - switch (field) { - case YEAR: - return tokenLength == 4; - case MONTH: - return tokenLength == 2 && isMonth(value); - case DAY: - return tokenLength == 2 && isDay(value); - case HOUR: - return tokenLength == 2 && isHour(value); - case MINUTE: - case SECOND: - return tokenLength == 2 && isMinuteOrSecond(value); - case MILLISECOND: - // we allow millisecond to be less than 3 digits - return tokenLength < 4 && isMillisecond(value); - default: - return true; - } - } - - private boolean skipEcmaDelimiter() { - switch (currentField) { - case YEAR: - case MONTH: - return skip('-') || peek() == 'T' || peek() == -1; - case DAY: - return peek() == 'T' || peek() == -1; - case HOUR: - case MINUTE: - return skip(':') || endOfTime(); - case SECOND: - return skip('.') || endOfTime(); - default: - return true; - } - } - - private boolean endOfTime() { - final int c = peek(); - return c == -1 || c == 'Z' || c == '-' || c == '+' || c == ' '; - } - - private static boolean isAsciiLetter(final char ch) { - return ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z'); - } - - private static boolean isAsciiDigit(final char ch) { - return '0' <= ch && ch <= '9'; - } - - private int readNumber(final int maxDigits) { - final int start = pos; - int n = 0; - final int max = Math.min(length, pos + maxDigits); - while (pos < max && isAsciiDigit(string.charAt(pos))) { - n = n * 10 + string.charAt(pos++) - '0'; - } - tokenLength = pos - start; - return n; - } - - private Name readName() { - final int start = pos; - final int limit = Math.min(pos + 3, length); - - // first read up to the key length - while (pos < limit && isAsciiLetter(string.charAt(pos))) { - pos++; - } - final String key = string.substring(start, pos).toLowerCase(Locale.ENGLISH); - final Name name = names.get(key); - // then advance to end of name - while (pos < length && isAsciiLetter(string.charAt(pos))) { - pos++; - } - - tokenLength = pos - start; - // make sure we have the full name or a prefix - if (name != null && name.matches(string, start, tokenLength)) { - return name; - } - return null; - } - - private int readTimeZoneOffset() { - final int sign = string.charAt(pos - 1) == '+' ? 1 : -1; - int offset = readNumber(2); - skip(':'); - offset = offset * 60 + readNumber(2); - return sign * offset; - } - - private boolean skipParentheses() { - int parenCount = 1; - while (pos < length && parenCount != 0) { - final char c = string.charAt(pos++); - if (c == '(') { - parenCount++; - } else if (c == ')') { - parenCount--; - } - } - return true; - } - - private static int getDefaultValue(final int field) { - switch (field) { - case MONTH: - case DAY: - return 1; - default: - return 0; - } - } - - private static boolean isDay(final int n) { - return 1 <= n && n <= 31; - } - - private static boolean isMonth(final int n) { - return 1 <= n && n <= 12; - } - - private static boolean isHour(final int n) { - return 0 <= n && n <= 24; - } - - private static boolean isMinuteOrSecond(final int n) { - return 0 <= n && n < 60; - } - - private static boolean isMillisecond(final int n) { - return 0<= n && n < 1000; - } - - private boolean setMonth(final int m) { - if (!isSet(MONTH)) { - namedMonth = true; - set(MONTH, m); - return true; - } - return false; - } - - private boolean setDateField(final int n) { - for (int field = YEAR; field != HOUR; field++) { - if (!isSet(field)) { - // no validation on legacy date fields - set(field, n); - return true; - } - } - return false; - } - - private boolean setTimeField(final int n) { - for (int field = HOUR; field != TIMEZONE; field++) { - if (!isSet(field)) { - if (checkLegacyField(field, n)) { - set(field, n); - return true; - } - return false; - } - } - return false; - } - - private boolean setTimezone(final int offset, final boolean asNumericOffset) { - if (!isSet(TIMEZONE) || (asNumericOffset && get(TIMEZONE) == 0)) { - set(TIMEZONE, offset); - return true; - } - return false; - } - - private boolean setAmPm(final int offset) { - if (!isSet(HOUR)) { - return false; - } - final int hour = get(HOUR); - if (hour >= 0 && hour <= 12) { - set(HOUR, hour + offset); - } - return true; - } - - private boolean patchResult(final boolean strict) { - // sanity checks - make sure we have something - if (!isSet(YEAR) && !isSet(HOUR)) { - return false; - } - if (isSet(HOUR) && !isSet(MINUTE)) { - return false; - } - // fill in default values for unset fields except timezone - for (int field = YEAR; field <= TIMEZONE; field++) { - if (get(field) == null) { - if (field == TIMEZONE && !strict) { - // We only use UTC as default timezone for dates parsed complying with - // the format specified in ES5 15.9.1.15. Otherwise the slot is left empty - // and local timezone is used. - continue; - } - final int value = getDefaultValue(field); - set(field, value); - } - } - - if (!strict) { - // swap year, month, and day if it looks like the right thing to do - if (isDay(get(YEAR))) { - final int d = get(YEAR); - set(YEAR, get(DAY)); - if (namedMonth) { - // d-m-y - set(DAY, d); - } else { - // m-d-y - final int d2 = get(MONTH); - set(MONTH, d); - set(DAY, d2); - } - } - // sanity checks now that we know what's what - if (!isMonth(get(MONTH)) || !isDay(get(DAY))) { - return false; - } - - // add 1900 or 2000 to year if it's between 0 and 100 - final int year = get(YEAR); - if (year >= 0 && year < 100) { - set(YEAR, year >= 50 ? 1900 + year : 2000 + year); - } - } else { - // 24 hour value is only allowed if all other time values are zero - if (get(HOUR) == 24 && - (get(MINUTE) != 0 || get(SECOND) != 0 || get(MILLISECOND) != 0)) { - return false; - } - } - - // set month to 0-based - set(MONTH, get(MONTH) - 1); - return true; - } - - private static void addName(final String str, final int type, final int value) { - final Name name = new Name(str, type, value); - names.put(name.key, name); - } - - private static class Name { - final String name; - final String key; - final int value; - final int type; - - final static int DAY_OF_WEEK = -1; - final static int MONTH_NAME = 0; - final static int AM_PM = 1; - final static int TIMEZONE_ID = 2; - final static int TIME_SEPARATOR = 3; - - Name(final String name, final int type, final int value) { - assert name != null; - assert name.equals(name.toLowerCase(Locale.ENGLISH)); - - this.name = name; - // use first three characters as lookup key - this.key = name.substring(0, Math.min(3, name.length())); - this.type = type; - this.value = value; - } - - public boolean matches(final String str, final int offset, final int len) { - return name.regionMatches(true, 0, str, offset, len); - } - - @Override - public String toString() { - return name; - } - } - -} diff -r d139023165ed -r 931209786d11 nashorn/src/jdk/nashorn/internal/objects/NativeDate.java --- a/nashorn/src/jdk/nashorn/internal/objects/NativeDate.java Wed May 29 16:23:50 2013 -0300 +++ b/nashorn/src/jdk/nashorn/internal/objects/NativeDate.java Thu May 30 16:49:46 2013 +0530 @@ -39,6 +39,7 @@ import jdk.nashorn.internal.objects.annotations.ScriptClass; import jdk.nashorn.internal.objects.annotations.SpecializedConstructor; import jdk.nashorn.internal.objects.annotations.Where; +import jdk.nashorn.internal.parser.DateParser; import jdk.nashorn.internal.runtime.ConsString; import jdk.nashorn.internal.runtime.JSType; import jdk.nashorn.internal.runtime.ScriptEnvironment; diff -r d139023165ed -r 931209786d11 nashorn/src/jdk/nashorn/internal/parser/DateParser.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nashorn/src/jdk/nashorn/internal/parser/DateParser.java Thu May 30 16:49:46 2013 +0530 @@ -0,0 +1,716 @@ +/* + * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package jdk.nashorn.internal.parser; + +import static java.lang.Character.DECIMAL_DIGIT_NUMBER; +import static java.lang.Character.LOWERCASE_LETTER; +import static java.lang.Character.OTHER_PUNCTUATION; +import static java.lang.Character.SPACE_SEPARATOR; +import static java.lang.Character.UPPERCASE_LETTER; + +import java.util.HashMap; +import java.util.Locale; + +/** + * JavaScript date parser. This class first tries to parse a date string + * according to the extended ISO 8601 format specified in ES5 15.9.1.15. + * If that fails, it falls back to legacy mode in which it accepts a range + * of different formats. + * + *

This class is neither thread-safe nor reusable. Calling the + * parse() method more than once will yield undefined results.

+ */ +public class DateParser { + + /** Constant for index position of parsed year value. */ + public final static int YEAR = 0; + /** Constant for index position of parsed month value. */ + public final static int MONTH = 1; + /** Constant for index position of parsed day value. */ + public final static int DAY = 2; + /** Constant for index position of parsed hour value. */ + public final static int HOUR = 3; + /** Constant for index position of parsed minute value. */ + public final static int MINUTE = 4; + /** Constant for index position of parsed second value. */ + public final static int SECOND = 5; + /** Constant for index position of parsed millisecond value. */ + public final static int MILLISECOND = 6; + /** Constant for index position of parsed time zone offset value. */ + public final static int TIMEZONE = 7; + + private enum Token { + UNKNOWN, NUMBER, SEPARATOR, PARENTHESIS, NAME, SIGN, END + } + + private final String string; + private final int length; + private final Integer[] fields; + private int pos = 0; + private Token token; + private int tokenLength; + private Name nameValue; + private int numValue; + private int currentField = YEAR; + private int yearSign = 0; + private boolean namedMonth = false; + + private final static HashMap names = new HashMap<>(); + + static { + addName("monday", Name.DAY_OF_WEEK, 0); + addName("tuesday", Name.DAY_OF_WEEK, 0); + addName("wednesday", Name.DAY_OF_WEEK, 0); + addName("thursday", Name.DAY_OF_WEEK, 0); + addName("friday", Name.DAY_OF_WEEK, 0); + addName("saturday", Name.DAY_OF_WEEK, 0); + addName("sunday", Name.DAY_OF_WEEK, 0); + addName("january", Name.MONTH_NAME, 1); + addName("february", Name.MONTH_NAME, 2); + addName("march", Name.MONTH_NAME, 3); + addName("april", Name.MONTH_NAME, 4); + addName("may", Name.MONTH_NAME, 5); + addName("june", Name.MONTH_NAME, 6); + addName("july", Name.MONTH_NAME, 7); + addName("august", Name.MONTH_NAME, 8); + addName("september", Name.MONTH_NAME, 9); + addName("october", Name.MONTH_NAME, 10); + addName("november", Name.MONTH_NAME, 11); + addName("december", Name.MONTH_NAME, 12); + addName("am", Name.AM_PM, 0); + addName("pm", Name.AM_PM, 12); + addName("z", Name.TIMEZONE_ID, 0); + addName("gmt", Name.TIMEZONE_ID, 0); + addName("ut", Name.TIMEZONE_ID, 0); + addName("utc", Name.TIMEZONE_ID, 0); + addName("est", Name.TIMEZONE_ID, -5 * 60); + addName("edt", Name.TIMEZONE_ID, -4 * 60); + addName("cst", Name.TIMEZONE_ID, -6 * 60); + addName("cdt", Name.TIMEZONE_ID, -5 * 60); + addName("mst", Name.TIMEZONE_ID, -7 * 60); + addName("mdt", Name.TIMEZONE_ID, -6 * 60); + addName("pst", Name.TIMEZONE_ID, -8 * 60); + addName("pdt", Name.TIMEZONE_ID, -7 * 60); + addName("t", Name.TIME_SEPARATOR, 0); + } + + /** + * Construct a new DateParser instance for parsing the given string. + * @param string the string to be parsed + */ + public DateParser(final String string) { + this.string = string; + this.length = string.length(); + this.fields = new Integer[TIMEZONE + 1]; + } + + /** + * Try parsing the given string as date according to the extended ISO 8601 format + * specified in ES5 15.9.1.15. Fall back to legacy mode if that fails. + * This method returns true if the string could be parsed. + * @return true if the string could be parsed as date + */ + public boolean parse() { + return parseEcmaDate() || parseLegacyDate(); + } + + /** + * Try parsing the date string according to the rules laid out in ES5 15.9.1.15. + * The date string must conform to the following format: + * + *
  [('-'|'+')yy]yyyy[-MM[-dd]][hh:mm[:ss[.sss]][Z|(+|-)hh:mm]] 
+ * + *

If the string does not contain a time zone offset, the TIMEZONE field + * is set to 0 (GMT).

+ * @return true if string represents a valid ES5 date string. + */ + public boolean parseEcmaDate() { + + if (token == null) { + token = next(); + } + + while (token != Token.END) { + + switch (token) { + case NUMBER: + if (currentField == YEAR && yearSign != 0) { + // 15.9.1.15.1 Extended year must have six digits + if (tokenLength != 6) { + return false; + } + numValue *= yearSign; + } else if (!checkEcmaField(currentField, numValue)) { + return false; + } + if (!skipEcmaDelimiter()) { + return false; + } + if (currentField < TIMEZONE) { + set(currentField++, numValue); + } + break; + + case NAME: + if (nameValue == null) { + return false; + } + switch (nameValue.type) { + case Name.TIME_SEPARATOR: + if (currentField == YEAR || currentField > HOUR) { + return false; + } + currentField = HOUR; + break; + case Name.TIMEZONE_ID: + if (!nameValue.key.equals("z") || !setTimezone(nameValue.value, false)) { + return false; + } + break; + default: + return false; + } + break; + + case SIGN: + if (peek() == -1) { + // END after sign - wrong! + return false; + } + + if (currentField == YEAR) { + yearSign = numValue; + } else if (currentField < SECOND || !setTimezone(readTimeZoneOffset(), true)) { + // Note: Spidermonkey won't parse timezone unless time includes seconds and milliseconds + return false; + } + break; + + default: + return false; + } + token = next(); + } + + return patchResult(true); + } + + /** + * Try parsing the date using a fuzzy algorithm that can handle a variety of formats. + * + *

Numbers separated by ':' are treated as time values, optionally followed by a + * millisecond value separated by '.'. Other number values are treated as date values. + * The exact sequence of day, month, and year values to apply is determined heuristically.

+ * + *

English month names and selected time zone names as well as AM/PM markers are recognized + * and handled properly. Additionally, numeric time zone offsets such as (+|-)hh:mm or + * (+|-)hhmm are recognized. If the string does not contain a time zone offset + * the TIMEZONEfield is left undefined, meaning the local time zone should be applied.

+ * + *

English weekday names are recognized but ignored. All text in parentheses is ignored as well. + * All other text causes parsing to fail.

+ * + * @return true if the string could be parsed + */ + public boolean parseLegacyDate() { + + if (yearSign != 0 || currentField > DAY) { + // we don't support signed years in legacy mode + return false; + } + if (token == null) { + token = next(); + } + + while (token != Token.END) { + + switch (token) { + case NUMBER: + if (skip(':')) { + // A number followed by ':' is parsed as time + if (!setTimeField(numValue)) { + return false; + } + // consume remaining time tokens + do { + token = next(); + if (token != Token.NUMBER || !setTimeField(numValue)) { + return false; + } + } while (skip(isSet(SECOND) ? '.' : ':')); + + } else { + // Parse as date token + if (!setDateField(numValue)) { + return false; + } + skip('-'); + } + break; + + case NAME: + if (nameValue == null) { + return false; + } + switch (nameValue.type) { + case Name.AM_PM: + if (!setAmPm(nameValue.value)) { + return false; + } + break; + case Name.MONTH_NAME: + if (!setMonth(nameValue.value)) { + return false; + } + break; + case Name.TIMEZONE_ID: + if (!setTimezone(nameValue.value, false)) { + return false; + } + break; + case Name.TIME_SEPARATOR: + return false; + default: + break; + } + if (nameValue.type != Name.TIMEZONE_ID) { + skip('-'); + } + break; + + case SIGN: + if (peek() == -1) { + // END after sign - wrong! + return false; + } + + if (!setTimezone(readTimeZoneOffset(), true)) { + return false; + } + break; + + case PARENTHESIS: + if (!skipParentheses()) { + return false; + } + break; + + case SEPARATOR: + break; + + default: + return false; + } + token = next(); + } + + return patchResult(false); + } + + /** + * Get the parsed date and time fields as an array of Integers. + * + *

If parsing was successful, all fields are guaranteed to be set except for the + * TIMEZONE field which may be null, meaning that local time zone + * offset should be applied.

+ * + * @return the parsed date fields + */ + public Integer[] getDateFields() { + return fields; + } + + private boolean isSet(final int field) { + return fields[field] != null; + } + + private Integer get(final int field) { + return fields[field]; + } + + private void set(final int field, final int value) { + fields[field] = value; + } + + private int peek() { + return pos < length ? string.charAt(pos) : -1; + } + + private boolean skip(final char c) { + if (pos < length && string.charAt(pos) == c) { + token = null; + pos++; + return true; + } + return false; + } + + private Token next() { + if (pos >= length) { + tokenLength = 0; + return Token.END; + } + + final char c = string.charAt(pos); + + if (c > 0x80) { + tokenLength = 1; + pos++; + return Token.UNKNOWN; // We only deal with ASCII here + } + + final int type = Character.getType(c); + switch (type) { + case DECIMAL_DIGIT_NUMBER: + numValue = readNumber(6); + return Token.NUMBER; + case SPACE_SEPARATOR : + case OTHER_PUNCTUATION: + tokenLength = 1; + pos++; + return Token.SEPARATOR; + case UPPERCASE_LETTER: + case LOWERCASE_LETTER: + nameValue = readName(); + return Token.NAME; + default: + tokenLength = 1; + pos++; + switch (c) { + case '(': + return Token.PARENTHESIS; + case '-': + case '+': + numValue = c == '-' ? -1 : 1; + return Token.SIGN; + default: + return Token.UNKNOWN; + } + } + } + + private static boolean checkLegacyField(final int field, final int value) { + switch (field) { + case HOUR: + return isHour(value); + case MINUTE: + case SECOND: + return isMinuteOrSecond(value); + case MILLISECOND: + return isMillisecond(value); + default: + // skip validation on other legacy fields as we don't know what's what + return true; + } + } + + private boolean checkEcmaField(final int field, final int value) { + switch (field) { + case YEAR: + return tokenLength == 4; + case MONTH: + return tokenLength == 2 && isMonth(value); + case DAY: + return tokenLength == 2 && isDay(value); + case HOUR: + return tokenLength == 2 && isHour(value); + case MINUTE: + case SECOND: + return tokenLength == 2 && isMinuteOrSecond(value); + case MILLISECOND: + // we allow millisecond to be less than 3 digits + return tokenLength < 4 && isMillisecond(value); + default: + return true; + } + } + + private boolean skipEcmaDelimiter() { + switch (currentField) { + case YEAR: + case MONTH: + return skip('-') || peek() == 'T' || peek() == -1; + case DAY: + return peek() == 'T' || peek() == -1; + case HOUR: + case MINUTE: + return skip(':') || endOfTime(); + case SECOND: + return skip('.') || endOfTime(); + default: + return true; + } + } + + private boolean endOfTime() { + final int c = peek(); + return c == -1 || c == 'Z' || c == '-' || c == '+' || c == ' '; + } + + private static boolean isAsciiLetter(final char ch) { + return ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z'); + } + + private static boolean isAsciiDigit(final char ch) { + return '0' <= ch && ch <= '9'; + } + + private int readNumber(final int maxDigits) { + final int start = pos; + int n = 0; + final int max = Math.min(length, pos + maxDigits); + while (pos < max && isAsciiDigit(string.charAt(pos))) { + n = n * 10 + string.charAt(pos++) - '0'; + } + tokenLength = pos - start; + return n; + } + + private Name readName() { + final int start = pos; + final int limit = Math.min(pos + 3, length); + + // first read up to the key length + while (pos < limit && isAsciiLetter(string.charAt(pos))) { + pos++; + } + final String key = string.substring(start, pos).toLowerCase(Locale.ENGLISH); + final Name name = names.get(key); + // then advance to end of name + while (pos < length && isAsciiLetter(string.charAt(pos))) { + pos++; + } + + tokenLength = pos - start; + // make sure we have the full name or a prefix + if (name != null && name.matches(string, start, tokenLength)) { + return name; + } + return null; + } + + private int readTimeZoneOffset() { + final int sign = string.charAt(pos - 1) == '+' ? 1 : -1; + int offset = readNumber(2); + skip(':'); + offset = offset * 60 + readNumber(2); + return sign * offset; + } + + private boolean skipParentheses() { + int parenCount = 1; + while (pos < length && parenCount != 0) { + final char c = string.charAt(pos++); + if (c == '(') { + parenCount++; + } else if (c == ')') { + parenCount--; + } + } + return true; + } + + private static int getDefaultValue(final int field) { + switch (field) { + case MONTH: + case DAY: + return 1; + default: + return 0; + } + } + + private static boolean isDay(final int n) { + return 1 <= n && n <= 31; + } + + private static boolean isMonth(final int n) { + return 1 <= n && n <= 12; + } + + private static boolean isHour(final int n) { + return 0 <= n && n <= 24; + } + + private static boolean isMinuteOrSecond(final int n) { + return 0 <= n && n < 60; + } + + private static boolean isMillisecond(final int n) { + return 0<= n && n < 1000; + } + + private boolean setMonth(final int m) { + if (!isSet(MONTH)) { + namedMonth = true; + set(MONTH, m); + return true; + } + return false; + } + + private boolean setDateField(final int n) { + for (int field = YEAR; field != HOUR; field++) { + if (!isSet(field)) { + // no validation on legacy date fields + set(field, n); + return true; + } + } + return false; + } + + private boolean setTimeField(final int n) { + for (int field = HOUR; field != TIMEZONE; field++) { + if (!isSet(field)) { + if (checkLegacyField(field, n)) { + set(field, n); + return true; + } + return false; + } + } + return false; + } + + private boolean setTimezone(final int offset, final boolean asNumericOffset) { + if (!isSet(TIMEZONE) || (asNumericOffset && get(TIMEZONE) == 0)) { + set(TIMEZONE, offset); + return true; + } + return false; + } + + private boolean setAmPm(final int offset) { + if (!isSet(HOUR)) { + return false; + } + final int hour = get(HOUR); + if (hour >= 0 && hour <= 12) { + set(HOUR, hour + offset); + } + return true; + } + + private boolean patchResult(final boolean strict) { + // sanity checks - make sure we have something + if (!isSet(YEAR) && !isSet(HOUR)) { + return false; + } + if (isSet(HOUR) && !isSet(MINUTE)) { + return false; + } + // fill in default values for unset fields except timezone + for (int field = YEAR; field <= TIMEZONE; field++) { + if (get(field) == null) { + if (field == TIMEZONE && !strict) { + // We only use UTC as default timezone for dates parsed complying with + // the format specified in ES5 15.9.1.15. Otherwise the slot is left empty + // and local timezone is used. + continue; + } + final int value = getDefaultValue(field); + set(field, value); + } + } + + if (!strict) { + // swap year, month, and day if it looks like the right thing to do + if (isDay(get(YEAR))) { + final int d = get(YEAR); + set(YEAR, get(DAY)); + if (namedMonth) { + // d-m-y + set(DAY, d); + } else { + // m-d-y + final int d2 = get(MONTH); + set(MONTH, d); + set(DAY, d2); + } + } + // sanity checks now that we know what's what + if (!isMonth(get(MONTH)) || !isDay(get(DAY))) { + return false; + } + + // add 1900 or 2000 to year if it's between 0 and 100 + final int year = get(YEAR); + if (year >= 0 && year < 100) { + set(YEAR, year >= 50 ? 1900 + year : 2000 + year); + } + } else { + // 24 hour value is only allowed if all other time values are zero + if (get(HOUR) == 24 && + (get(MINUTE) != 0 || get(SECOND) != 0 || get(MILLISECOND) != 0)) { + return false; + } + } + + // set month to 0-based + set(MONTH, get(MONTH) - 1); + return true; + } + + private static void addName(final String str, final int type, final int value) { + final Name name = new Name(str, type, value); + names.put(name.key, name); + } + + private static class Name { + final String name; + final String key; + final int value; + final int type; + + final static int DAY_OF_WEEK = -1; + final static int MONTH_NAME = 0; + final static int AM_PM = 1; + final static int TIMEZONE_ID = 2; + final static int TIME_SEPARATOR = 3; + + Name(final String name, final int type, final int value) { + assert name != null; + assert name.equals(name.toLowerCase(Locale.ENGLISH)); + + this.name = name; + // use first three characters as lookup key + this.key = name.substring(0, Math.min(3, name.length())); + this.type = type; + this.value = value; + } + + public boolean matches(final String str, final int offset, final int len) { + return name.regionMatches(true, 0, str, offset, len); + } + + @Override + public String toString() { + return name; + } + } + +} diff -r d139023165ed -r 931209786d11 nashorn/test/script/basic/JDK-8015353.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nashorn/test/script/basic/JDK-8015353.js Thu May 30 16:49:46 2013 +0530 @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * JDK-8015353: Date.parse illegal string parsing issues + * + * @test + * @run + */ + +function checkDate(str) { + if (! isNaN(Date.parse(str))) { + fail(str + " is parsed as legal Date"); + } +} + +checkDate("2012-01-10T00:00:00.000-"); +checkDate("2012-01-01T00:00+");