jaxws/src/java.activation/share/classes/com/sun/activation/registries/MailcapTokenizer.java
changeset 25871 b80b84e87032
parent 22678 ac1ea46be942
child 31501 1b48499c9e1c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jaxws/src/java.activation/share/classes/com/sun/activation/registries/MailcapTokenizer.java	Sun Aug 17 15:52:15 2014 +0100
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package com.sun.activation.registries;
+
+/**
+ *      A tokenizer for strings in the form of "foo/bar; prop1=val1; ... ".
+ *      Useful for parsing MIME content types.
+ */
+public class MailcapTokenizer {
+
+    public static final int UNKNOWN_TOKEN = 0;
+    public static final int START_TOKEN = 1;
+    public static final int STRING_TOKEN = 2;
+    public static final int EOI_TOKEN = 5;
+    public static final int SLASH_TOKEN = '/';
+    public static final int SEMICOLON_TOKEN = ';';
+    public static final int EQUALS_TOKEN = '=';
+
+    /**
+     *  Constructor
+     *
+     *  @parameter  inputString the string to tokenize
+     */
+    public MailcapTokenizer(String inputString) {
+        data = inputString;
+        dataIndex = 0;
+        dataLength = inputString.length();
+
+        currentToken = START_TOKEN;
+        currentTokenValue = "";
+
+        isAutoquoting = false;
+        autoquoteChar = ';';
+    }
+
+    /**
+     *  Set whether auto-quoting is on or off.
+     *
+     *  Auto-quoting means that all characters after the first
+     *  non-whitespace, non-control character up to the auto-quote
+     *  terminator character or EOI (minus any whitespace immediatley
+     *  preceeding it) is considered a token.
+     *
+     *  This is required for handling command strings in a mailcap entry.
+     */
+    public void setIsAutoquoting(boolean value) {
+        isAutoquoting = value;
+    }
+
+    /**
+     *  Retrieve current token.
+     *
+     *  @returns    The current token value
+     */
+    public int getCurrentToken() {
+        return currentToken;
+    }
+
+    /*
+     *  Get a String that describes the given token.
+     */
+    public static String nameForToken(int token) {
+        String name = "really unknown";
+
+        switch(token) {
+            case UNKNOWN_TOKEN:
+                name = "unknown";
+                break;
+            case START_TOKEN:
+                name = "start";
+                break;
+            case STRING_TOKEN:
+                name = "string";
+                break;
+            case EOI_TOKEN:
+                name = "EOI";
+                break;
+            case SLASH_TOKEN:
+                name = "'/'";
+                break;
+            case SEMICOLON_TOKEN:
+                name = "';'";
+                break;
+            case EQUALS_TOKEN:
+                name = "'='";
+                break;
+        }
+
+        return name;
+    }
+
+    /*
+     *  Retrieve current token value.
+     *
+     *  @returns    A String containing the current token value
+     */
+    public String getCurrentTokenValue() {
+        return currentTokenValue;
+    }
+    /*
+     *  Process the next token.
+     *
+     *  @returns    the next token
+     */
+    public int nextToken() {
+        if (dataIndex < dataLength) {
+            //  skip white space
+            while ((dataIndex < dataLength) &&
+                    (isWhiteSpaceChar(data.charAt(dataIndex)))) {
+                ++dataIndex;
+            }
+
+            if (dataIndex < dataLength) {
+                //  examine the current character and see what kind of token we have
+                char c = data.charAt(dataIndex);
+                if (isAutoquoting) {
+                    if (c == ';' || c == '=') {
+                        currentToken = c;
+                        currentTokenValue = new Character(c).toString();
+                        ++dataIndex;
+                    } else {
+                        processAutoquoteToken();
+                    }
+                } else {
+                    if (isStringTokenChar(c)) {
+                        processStringToken();
+                    } else if ((c == '/') || (c == ';') || (c == '=')) {
+                        currentToken = c;
+                        currentTokenValue = new Character(c).toString();
+                        ++dataIndex;
+                    } else {
+                        currentToken = UNKNOWN_TOKEN;
+                        currentTokenValue = new Character(c).toString();
+                        ++dataIndex;
+                    }
+                }
+            } else {
+                currentToken = EOI_TOKEN;
+                currentTokenValue = null;
+            }
+        } else {
+            currentToken = EOI_TOKEN;
+            currentTokenValue = null;
+        }
+
+        return currentToken;
+    }
+
+    private void processStringToken() {
+        //  capture the initial index
+        int initialIndex = dataIndex;
+
+        //  skip to 1st non string token character
+        while ((dataIndex < dataLength) &&
+                isStringTokenChar(data.charAt(dataIndex))) {
+            ++dataIndex;
+        }
+
+        currentToken = STRING_TOKEN;
+        currentTokenValue = data.substring(initialIndex, dataIndex);
+    }
+
+    private void processAutoquoteToken() {
+        //  capture the initial index
+        int initialIndex = dataIndex;
+
+        //  now skip to the 1st non-escaped autoquote termination character
+        //  XXX - doesn't actually consider escaping
+        boolean foundTerminator = false;
+        while ((dataIndex < dataLength) && !foundTerminator) {
+            char c = data.charAt(dataIndex);
+            if (c != autoquoteChar) {
+                ++dataIndex;
+            } else {
+                foundTerminator = true;
+            }
+        }
+
+        currentToken = STRING_TOKEN;
+        currentTokenValue =
+            fixEscapeSequences(data.substring(initialIndex, dataIndex));
+    }
+
+    private static boolean isSpecialChar(char c) {
+        boolean lAnswer = false;
+
+        switch(c) {
+            case '(':
+            case ')':
+            case '<':
+            case '>':
+            case '@':
+            case ',':
+            case ';':
+            case ':':
+            case '\\':
+            case '"':
+            case '/':
+            case '[':
+            case ']':
+            case '?':
+            case '=':
+                lAnswer = true;
+                break;
+        }
+
+        return lAnswer;
+    }
+
+    private static boolean isControlChar(char c) {
+        return Character.isISOControl(c);
+    }
+
+    private static boolean isWhiteSpaceChar(char c) {
+        return Character.isWhitespace(c);
+    }
+
+    private static boolean isStringTokenChar(char c) {
+        return !isSpecialChar(c) && !isControlChar(c) && !isWhiteSpaceChar(c);
+    }
+
+    private static String fixEscapeSequences(String inputString) {
+        int inputLength = inputString.length();
+        StringBuffer buffer = new StringBuffer();
+        buffer.ensureCapacity(inputLength);
+
+        for (int i = 0; i < inputLength; ++i) {
+            char currentChar = inputString.charAt(i);
+            if (currentChar != '\\') {
+                buffer.append(currentChar);
+            } else {
+                if (i < inputLength - 1) {
+                    char nextChar = inputString.charAt(i + 1);
+                    buffer.append(nextChar);
+
+                    //  force a skip over the next character too
+                    ++i;
+                } else {
+                    buffer.append(currentChar);
+                }
+            }
+        }
+
+        return buffer.toString();
+    }
+
+    private String  data;
+    private int     dataIndex;
+    private int     dataLength;
+    private int     currentToken;
+    private String  currentTokenValue;
+    private boolean isAutoquoting;
+    private char    autoquoteChar;
+
+    /*
+    public static void main(String[] args) {
+        for (int i = 0; i < args.length; ++i) {
+            MailcapTokenizer tokenizer = new MailcapTokenizer(args[i]);
+
+            System.out.println("Original: |" + args[i] + "|");
+
+            int currentToken = tokenizer.nextToken();
+            while (currentToken != EOI_TOKEN) {
+                switch(currentToken) {
+                    case UNKNOWN_TOKEN:
+                        System.out.println("  Unknown Token:           |" + tokenizer.getCurrentTokenValue() + "|");
+                        break;
+                    case START_TOKEN:
+                        System.out.println("  Start Token:             |" + tokenizer.getCurrentTokenValue() + "|");
+                        break;
+                    case STRING_TOKEN:
+                        System.out.println("  String Token:            |" + tokenizer.getCurrentTokenValue() + "|");
+                        break;
+                    case EOI_TOKEN:
+                        System.out.println("  EOI Token:               |" + tokenizer.getCurrentTokenValue() + "|");
+                        break;
+                    case SLASH_TOKEN:
+                        System.out.println("  Slash Token:             |" + tokenizer.getCurrentTokenValue() + "|");
+                        break;
+                    case SEMICOLON_TOKEN:
+                        System.out.println("  Semicolon Token:         |" + tokenizer.getCurrentTokenValue() + "|");
+                        break;
+                    case EQUALS_TOKEN:
+                        System.out.println("  Equals Token:            |" + tokenizer.getCurrentTokenValue() + "|");
+                        break;
+                    default:
+                        System.out.println("  Really Unknown Token:    |" + tokenizer.getCurrentTokenValue() + "|");
+                        break;
+                }
+
+                currentToken = tokenizer.nextToken();
+            }
+
+            System.out.println("");
+        }
+    }
+    */
+}