src/java.activation/share/classes/com/sun/activation/registries/MailcapTokenizer.java
changeset 47216 71c04702a3d5
parent 31501 1b48499c9e1c
equal deleted inserted replaced
47215:4ebc2e2fb97c 47216:71c04702a3d5
       
     1 /*
       
     2  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Oracle designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Oracle in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
       
    22  * or visit www.oracle.com if you need additional information or have any
       
    23  * questions.
       
    24  */
       
    25 
       
    26 package com.sun.activation.registries;
       
    27 
       
    28 /**
       
    29  *      A tokenizer for strings in the form of "foo/bar; prop1=val1; ... ".
       
    30  *      Useful for parsing MIME content types.
       
    31  */
       
    32 public class MailcapTokenizer {
       
    33 
       
    34     public static final int UNKNOWN_TOKEN = 0;
       
    35     public static final int START_TOKEN = 1;
       
    36     public static final int STRING_TOKEN = 2;
       
    37     public static final int EOI_TOKEN = 5;
       
    38     public static final int SLASH_TOKEN = '/';
       
    39     public static final int SEMICOLON_TOKEN = ';';
       
    40     public static final int EQUALS_TOKEN = '=';
       
    41 
       
    42     /**
       
    43      *  Constructor
       
    44      *
       
    45      *  @param  inputString the string to tokenize
       
    46      */
       
    47     public MailcapTokenizer(String inputString) {
       
    48         data = inputString;
       
    49         dataIndex = 0;
       
    50         dataLength = inputString.length();
       
    51 
       
    52         currentToken = START_TOKEN;
       
    53         currentTokenValue = "";
       
    54 
       
    55         isAutoquoting = false;
       
    56         autoquoteChar = ';';
       
    57     }
       
    58 
       
    59     /**
       
    60      *  Set whether auto-quoting is on or off.
       
    61      *
       
    62      *  Auto-quoting means that all characters after the first
       
    63      *  non-whitespace, non-control character up to the auto-quote
       
    64      *  terminator character or EOI (minus any whitespace immediatley
       
    65      *  preceeding it) is considered a token.
       
    66      *
       
    67      *  This is required for handling command strings in a mailcap entry.
       
    68      */
       
    69     public void setIsAutoquoting(boolean value) {
       
    70         isAutoquoting = value;
       
    71     }
       
    72 
       
    73     /**
       
    74      *  Retrieve current token.
       
    75      *
       
    76      *  @return    The current token value
       
    77      */
       
    78     public int getCurrentToken() {
       
    79         return currentToken;
       
    80     }
       
    81 
       
    82     /*
       
    83      *  Get a String that describes the given token.
       
    84      */
       
    85     public static String nameForToken(int token) {
       
    86         String name = "really unknown";
       
    87 
       
    88         switch(token) {
       
    89             case UNKNOWN_TOKEN:
       
    90                 name = "unknown";
       
    91                 break;
       
    92             case START_TOKEN:
       
    93                 name = "start";
       
    94                 break;
       
    95             case STRING_TOKEN:
       
    96                 name = "string";
       
    97                 break;
       
    98             case EOI_TOKEN:
       
    99                 name = "EOI";
       
   100                 break;
       
   101             case SLASH_TOKEN:
       
   102                 name = "'/'";
       
   103                 break;
       
   104             case SEMICOLON_TOKEN:
       
   105                 name = "';'";
       
   106                 break;
       
   107             case EQUALS_TOKEN:
       
   108                 name = "'='";
       
   109                 break;
       
   110         }
       
   111 
       
   112         return name;
       
   113     }
       
   114 
       
   115     /*
       
   116      *  Retrieve current token value.
       
   117      *
       
   118      *  @return    A String containing the current token value
       
   119      */
       
   120     public String getCurrentTokenValue() {
       
   121         return currentTokenValue;
       
   122     }
       
   123     /*
       
   124      *  Process the next token.
       
   125      *
       
   126      *  @return    the next token
       
   127      */
       
   128     public int nextToken() {
       
   129         if (dataIndex < dataLength) {
       
   130             //  skip white space
       
   131             while ((dataIndex < dataLength) &&
       
   132                     (isWhiteSpaceChar(data.charAt(dataIndex)))) {
       
   133                 ++dataIndex;
       
   134             }
       
   135 
       
   136             if (dataIndex < dataLength) {
       
   137                 //  examine the current character and see what kind of token we have
       
   138                 char c = data.charAt(dataIndex);
       
   139                 if (isAutoquoting) {
       
   140                     if (c == ';' || c == '=') {
       
   141                         currentToken = c;
       
   142                         currentTokenValue = new Character(c).toString();
       
   143                         ++dataIndex;
       
   144                     } else {
       
   145                         processAutoquoteToken();
       
   146                     }
       
   147                 } else {
       
   148                     if (isStringTokenChar(c)) {
       
   149                         processStringToken();
       
   150                     } else if ((c == '/') || (c == ';') || (c == '=')) {
       
   151                         currentToken = c;
       
   152                         currentTokenValue = new Character(c).toString();
       
   153                         ++dataIndex;
       
   154                     } else {
       
   155                         currentToken = UNKNOWN_TOKEN;
       
   156                         currentTokenValue = new Character(c).toString();
       
   157                         ++dataIndex;
       
   158                     }
       
   159                 }
       
   160             } else {
       
   161                 currentToken = EOI_TOKEN;
       
   162                 currentTokenValue = null;
       
   163             }
       
   164         } else {
       
   165             currentToken = EOI_TOKEN;
       
   166             currentTokenValue = null;
       
   167         }
       
   168 
       
   169         return currentToken;
       
   170     }
       
   171 
       
   172     private void processStringToken() {
       
   173         //  capture the initial index
       
   174         int initialIndex = dataIndex;
       
   175 
       
   176         //  skip to 1st non string token character
       
   177         while ((dataIndex < dataLength) &&
       
   178                 isStringTokenChar(data.charAt(dataIndex))) {
       
   179             ++dataIndex;
       
   180         }
       
   181 
       
   182         currentToken = STRING_TOKEN;
       
   183         currentTokenValue = data.substring(initialIndex, dataIndex);
       
   184     }
       
   185 
       
   186     private void processAutoquoteToken() {
       
   187         //  capture the initial index
       
   188         int initialIndex = dataIndex;
       
   189 
       
   190         //  now skip to the 1st non-escaped autoquote termination character
       
   191         //  XXX - doesn't actually consider escaping
       
   192         boolean foundTerminator = false;
       
   193         while ((dataIndex < dataLength) && !foundTerminator) {
       
   194             char c = data.charAt(dataIndex);
       
   195             if (c != autoquoteChar) {
       
   196                 ++dataIndex;
       
   197             } else {
       
   198                 foundTerminator = true;
       
   199             }
       
   200         }
       
   201 
       
   202         currentToken = STRING_TOKEN;
       
   203         currentTokenValue =
       
   204             fixEscapeSequences(data.substring(initialIndex, dataIndex));
       
   205     }
       
   206 
       
   207     private static boolean isSpecialChar(char c) {
       
   208         boolean lAnswer = false;
       
   209 
       
   210         switch(c) {
       
   211             case '(':
       
   212             case ')':
       
   213             case '<':
       
   214             case '>':
       
   215             case '@':
       
   216             case ',':
       
   217             case ';':
       
   218             case ':':
       
   219             case '\\':
       
   220             case '"':
       
   221             case '/':
       
   222             case '[':
       
   223             case ']':
       
   224             case '?':
       
   225             case '=':
       
   226                 lAnswer = true;
       
   227                 break;
       
   228         }
       
   229 
       
   230         return lAnswer;
       
   231     }
       
   232 
       
   233     private static boolean isControlChar(char c) {
       
   234         return Character.isISOControl(c);
       
   235     }
       
   236 
       
   237     private static boolean isWhiteSpaceChar(char c) {
       
   238         return Character.isWhitespace(c);
       
   239     }
       
   240 
       
   241     private static boolean isStringTokenChar(char c) {
       
   242         return !isSpecialChar(c) && !isControlChar(c) && !isWhiteSpaceChar(c);
       
   243     }
       
   244 
       
   245     private static String fixEscapeSequences(String inputString) {
       
   246         int inputLength = inputString.length();
       
   247         StringBuffer buffer = new StringBuffer();
       
   248         buffer.ensureCapacity(inputLength);
       
   249 
       
   250         for (int i = 0; i < inputLength; ++i) {
       
   251             char currentChar = inputString.charAt(i);
       
   252             if (currentChar != '\\') {
       
   253                 buffer.append(currentChar);
       
   254             } else {
       
   255                 if (i < inputLength - 1) {
       
   256                     char nextChar = inputString.charAt(i + 1);
       
   257                     buffer.append(nextChar);
       
   258 
       
   259                     //  force a skip over the next character too
       
   260                     ++i;
       
   261                 } else {
       
   262                     buffer.append(currentChar);
       
   263                 }
       
   264             }
       
   265         }
       
   266 
       
   267         return buffer.toString();
       
   268     }
       
   269 
       
   270     private String  data;
       
   271     private int     dataIndex;
       
   272     private int     dataLength;
       
   273     private int     currentToken;
       
   274     private String  currentTokenValue;
       
   275     private boolean isAutoquoting;
       
   276     private char    autoquoteChar;
       
   277 
       
   278     /*
       
   279     public static void main(String[] args) {
       
   280         for (int i = 0; i < args.length; ++i) {
       
   281             MailcapTokenizer tokenizer = new MailcapTokenizer(args[i]);
       
   282 
       
   283             System.out.println("Original: |" + args[i] + "|");
       
   284 
       
   285             int currentToken = tokenizer.nextToken();
       
   286             while (currentToken != EOI_TOKEN) {
       
   287                 switch(currentToken) {
       
   288                     case UNKNOWN_TOKEN:
       
   289                         System.out.println("  Unknown Token:           |" + tokenizer.getCurrentTokenValue() + "|");
       
   290                         break;
       
   291                     case START_TOKEN:
       
   292                         System.out.println("  Start Token:             |" + tokenizer.getCurrentTokenValue() + "|");
       
   293                         break;
       
   294                     case STRING_TOKEN:
       
   295                         System.out.println("  String Token:            |" + tokenizer.getCurrentTokenValue() + "|");
       
   296                         break;
       
   297                     case EOI_TOKEN:
       
   298                         System.out.println("  EOI Token:               |" + tokenizer.getCurrentTokenValue() + "|");
       
   299                         break;
       
   300                     case SLASH_TOKEN:
       
   301                         System.out.println("  Slash Token:             |" + tokenizer.getCurrentTokenValue() + "|");
       
   302                         break;
       
   303                     case SEMICOLON_TOKEN:
       
   304                         System.out.println("  Semicolon Token:         |" + tokenizer.getCurrentTokenValue() + "|");
       
   305                         break;
       
   306                     case EQUALS_TOKEN:
       
   307                         System.out.println("  Equals Token:            |" + tokenizer.getCurrentTokenValue() + "|");
       
   308                         break;
       
   309                     default:
       
   310                         System.out.println("  Really Unknown Token:    |" + tokenizer.getCurrentTokenValue() + "|");
       
   311                         break;
       
   312                 }
       
   313 
       
   314                 currentToken = tokenizer.nextToken();
       
   315             }
       
   316 
       
   317             System.out.println("");
       
   318         }
       
   319     }
       
   320     */
       
   321 }