jaxws/src/java.activation/share/classes/com/sun/activation/registries/MailcapTokenizer.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jaxws/src/java.activation/share/classes/com/sun/activation/registries/MailcapTokenizer.java Sun Aug 17 15:52:15 2014 +0100
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package com.sun.activation.registries;
+
+/**
+ * A tokenizer for strings in the form of "foo/bar; prop1=val1; ... ".
+ * Useful for parsing MIME content types.
+ */
+public class MailcapTokenizer {
+
+ public static final int UNKNOWN_TOKEN = 0;
+ public static final int START_TOKEN = 1;
+ public static final int STRING_TOKEN = 2;
+ public static final int EOI_TOKEN = 5;
+ public static final int SLASH_TOKEN = '/';
+ public static final int SEMICOLON_TOKEN = ';';
+ public static final int EQUALS_TOKEN = '=';
+
+ /**
+ * Constructor
+ *
+ * @parameter inputString the string to tokenize
+ */
+ public MailcapTokenizer(String inputString) {
+ data = inputString;
+ dataIndex = 0;
+ dataLength = inputString.length();
+
+ currentToken = START_TOKEN;
+ currentTokenValue = "";
+
+ isAutoquoting = false;
+ autoquoteChar = ';';
+ }
+
+ /**
+ * Set whether auto-quoting is on or off.
+ *
+ * Auto-quoting means that all characters after the first
+ * non-whitespace, non-control character up to the auto-quote
+ * terminator character or EOI (minus any whitespace immediatley
+ * preceeding it) is considered a token.
+ *
+ * This is required for handling command strings in a mailcap entry.
+ */
+ public void setIsAutoquoting(boolean value) {
+ isAutoquoting = value;
+ }
+
+ /**
+ * Retrieve current token.
+ *
+ * @returns The current token value
+ */
+ public int getCurrentToken() {
+ return currentToken;
+ }
+
+ /*
+ * Get a String that describes the given token.
+ */
+ public static String nameForToken(int token) {
+ String name = "really unknown";
+
+ switch(token) {
+ case UNKNOWN_TOKEN:
+ name = "unknown";
+ break;
+ case START_TOKEN:
+ name = "start";
+ break;
+ case STRING_TOKEN:
+ name = "string";
+ break;
+ case EOI_TOKEN:
+ name = "EOI";
+ break;
+ case SLASH_TOKEN:
+ name = "'/'";
+ break;
+ case SEMICOLON_TOKEN:
+ name = "';'";
+ break;
+ case EQUALS_TOKEN:
+ name = "'='";
+ break;
+ }
+
+ return name;
+ }
+
+ /*
+ * Retrieve current token value.
+ *
+ * @returns A String containing the current token value
+ */
+ public String getCurrentTokenValue() {
+ return currentTokenValue;
+ }
+ /*
+ * Process the next token.
+ *
+ * @returns the next token
+ */
+ public int nextToken() {
+ if (dataIndex < dataLength) {
+ // skip white space
+ while ((dataIndex < dataLength) &&
+ (isWhiteSpaceChar(data.charAt(dataIndex)))) {
+ ++dataIndex;
+ }
+
+ if (dataIndex < dataLength) {
+ // examine the current character and see what kind of token we have
+ char c = data.charAt(dataIndex);
+ if (isAutoquoting) {
+ if (c == ';' || c == '=') {
+ currentToken = c;
+ currentTokenValue = new Character(c).toString();
+ ++dataIndex;
+ } else {
+ processAutoquoteToken();
+ }
+ } else {
+ if (isStringTokenChar(c)) {
+ processStringToken();
+ } else if ((c == '/') || (c == ';') || (c == '=')) {
+ currentToken = c;
+ currentTokenValue = new Character(c).toString();
+ ++dataIndex;
+ } else {
+ currentToken = UNKNOWN_TOKEN;
+ currentTokenValue = new Character(c).toString();
+ ++dataIndex;
+ }
+ }
+ } else {
+ currentToken = EOI_TOKEN;
+ currentTokenValue = null;
+ }
+ } else {
+ currentToken = EOI_TOKEN;
+ currentTokenValue = null;
+ }
+
+ return currentToken;
+ }
+
+ private void processStringToken() {
+ // capture the initial index
+ int initialIndex = dataIndex;
+
+ // skip to 1st non string token character
+ while ((dataIndex < dataLength) &&
+ isStringTokenChar(data.charAt(dataIndex))) {
+ ++dataIndex;
+ }
+
+ currentToken = STRING_TOKEN;
+ currentTokenValue = data.substring(initialIndex, dataIndex);
+ }
+
+ private void processAutoquoteToken() {
+ // capture the initial index
+ int initialIndex = dataIndex;
+
+ // now skip to the 1st non-escaped autoquote termination character
+ // XXX - doesn't actually consider escaping
+ boolean foundTerminator = false;
+ while ((dataIndex < dataLength) && !foundTerminator) {
+ char c = data.charAt(dataIndex);
+ if (c != autoquoteChar) {
+ ++dataIndex;
+ } else {
+ foundTerminator = true;
+ }
+ }
+
+ currentToken = STRING_TOKEN;
+ currentTokenValue =
+ fixEscapeSequences(data.substring(initialIndex, dataIndex));
+ }
+
+ private static boolean isSpecialChar(char c) {
+ boolean lAnswer = false;
+
+ switch(c) {
+ case '(':
+ case ')':
+ case '<':
+ case '>':
+ case '@':
+ case ',':
+ case ';':
+ case ':':
+ case '\\':
+ case '"':
+ case '/':
+ case '[':
+ case ']':
+ case '?':
+ case '=':
+ lAnswer = true;
+ break;
+ }
+
+ return lAnswer;
+ }
+
+ private static boolean isControlChar(char c) {
+ return Character.isISOControl(c);
+ }
+
+ private static boolean isWhiteSpaceChar(char c) {
+ return Character.isWhitespace(c);
+ }
+
+ private static boolean isStringTokenChar(char c) {
+ return !isSpecialChar(c) && !isControlChar(c) && !isWhiteSpaceChar(c);
+ }
+
+ private static String fixEscapeSequences(String inputString) {
+ int inputLength = inputString.length();
+ StringBuffer buffer = new StringBuffer();
+ buffer.ensureCapacity(inputLength);
+
+ for (int i = 0; i < inputLength; ++i) {
+ char currentChar = inputString.charAt(i);
+ if (currentChar != '\\') {
+ buffer.append(currentChar);
+ } else {
+ if (i < inputLength - 1) {
+ char nextChar = inputString.charAt(i + 1);
+ buffer.append(nextChar);
+
+ // force a skip over the next character too
+ ++i;
+ } else {
+ buffer.append(currentChar);
+ }
+ }
+ }
+
+ return buffer.toString();
+ }
+
+ private String data;
+ private int dataIndex;
+ private int dataLength;
+ private int currentToken;
+ private String currentTokenValue;
+ private boolean isAutoquoting;
+ private char autoquoteChar;
+
+ /*
+ public static void main(String[] args) {
+ for (int i = 0; i < args.length; ++i) {
+ MailcapTokenizer tokenizer = new MailcapTokenizer(args[i]);
+
+ System.out.println("Original: |" + args[i] + "|");
+
+ int currentToken = tokenizer.nextToken();
+ while (currentToken != EOI_TOKEN) {
+ switch(currentToken) {
+ case UNKNOWN_TOKEN:
+ System.out.println(" Unknown Token: |" + tokenizer.getCurrentTokenValue() + "|");
+ break;
+ case START_TOKEN:
+ System.out.println(" Start Token: |" + tokenizer.getCurrentTokenValue() + "|");
+ break;
+ case STRING_TOKEN:
+ System.out.println(" String Token: |" + tokenizer.getCurrentTokenValue() + "|");
+ break;
+ case EOI_TOKEN:
+ System.out.println(" EOI Token: |" + tokenizer.getCurrentTokenValue() + "|");
+ break;
+ case SLASH_TOKEN:
+ System.out.println(" Slash Token: |" + tokenizer.getCurrentTokenValue() + "|");
+ break;
+ case SEMICOLON_TOKEN:
+ System.out.println(" Semicolon Token: |" + tokenizer.getCurrentTokenValue() + "|");
+ break;
+ case EQUALS_TOKEN:
+ System.out.println(" Equals Token: |" + tokenizer.getCurrentTokenValue() + "|");
+ break;
+ default:
+ System.out.println(" Really Unknown Token: |" + tokenizer.getCurrentTokenValue() + "|");
+ break;
+ }
+
+ currentToken = tokenizer.nextToken();
+ }
+
+ System.out.println("");
+ }
+ }
+ */
+}