jaxp/src/com/sun/org/apache/xalan/internal/lib/ExsltStrings.java
author joehw
Thu, 12 Apr 2012 08:38:26 -0700
changeset 12457 c348e06f0e82
parent 6 jaxp/src/share/classes/com/sun/org/apache/xalan/internal/lib/ExsltStrings.java@7f561c08de6b
child 22416 5473b73ce396
permissions -rw-r--r--
7160496: Rename JDK8 JAXP source directory Summary: moving src/share/classes to src Reviewed-by: ohair
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
6
7f561c08de6b Initial load
duke
parents:
diff changeset
     1
/*
7f561c08de6b Initial load
duke
parents:
diff changeset
     2
 * reserved comment block
7f561c08de6b Initial load
duke
parents:
diff changeset
     3
 * DO NOT REMOVE OR ALTER!
7f561c08de6b Initial load
duke
parents:
diff changeset
     4
 */
7f561c08de6b Initial load
duke
parents:
diff changeset
     5
/*
7f561c08de6b Initial load
duke
parents:
diff changeset
     6
 * Copyright 1999-2004 The Apache Software Foundation.
7f561c08de6b Initial load
duke
parents:
diff changeset
     7
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
     8
 * Licensed under the Apache License, Version 2.0 (the "License");
7f561c08de6b Initial load
duke
parents:
diff changeset
     9
 * you may not use this file except in compliance with the License.
7f561c08de6b Initial load
duke
parents:
diff changeset
    10
 * You may obtain a copy of the License at
7f561c08de6b Initial load
duke
parents:
diff changeset
    11
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    12
 *     http://www.apache.org/licenses/LICENSE-2.0
7f561c08de6b Initial load
duke
parents:
diff changeset
    13
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    14
 * Unless required by applicable law or agreed to in writing, software
7f561c08de6b Initial load
duke
parents:
diff changeset
    15
 * distributed under the License is distributed on an "AS IS" BASIS,
7f561c08de6b Initial load
duke
parents:
diff changeset
    16
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
7f561c08de6b Initial load
duke
parents:
diff changeset
    17
 * See the License for the specific language governing permissions and
7f561c08de6b Initial load
duke
parents:
diff changeset
    18
 * limitations under the License.
7f561c08de6b Initial load
duke
parents:
diff changeset
    19
 */
7f561c08de6b Initial load
duke
parents:
diff changeset
    20
/*
7f561c08de6b Initial load
duke
parents:
diff changeset
    21
 * $Id: ExsltStrings.java,v 1.1.2.1 2005/08/01 02:08:48 jeffsuttor Exp $
7f561c08de6b Initial load
duke
parents:
diff changeset
    22
 */
7f561c08de6b Initial load
duke
parents:
diff changeset
    23
package com.sun.org.apache.xalan.internal.lib;
7f561c08de6b Initial load
duke
parents:
diff changeset
    24
7f561c08de6b Initial load
duke
parents:
diff changeset
    25
import java.util.StringTokenizer;
7f561c08de6b Initial load
duke
parents:
diff changeset
    26
7f561c08de6b Initial load
duke
parents:
diff changeset
    27
import javax.xml.parsers.DocumentBuilderFactory;
7f561c08de6b Initial load
duke
parents:
diff changeset
    28
import javax.xml.parsers.FactoryConfigurationError;
7f561c08de6b Initial load
duke
parents:
diff changeset
    29
import javax.xml.parsers.ParserConfigurationException;
7f561c08de6b Initial load
duke
parents:
diff changeset
    30
7f561c08de6b Initial load
duke
parents:
diff changeset
    31
import com.sun.org.apache.xpath.internal.NodeSet;
7f561c08de6b Initial load
duke
parents:
diff changeset
    32
7f561c08de6b Initial load
duke
parents:
diff changeset
    33
import org.w3c.dom.Document;
7f561c08de6b Initial load
duke
parents:
diff changeset
    34
import org.w3c.dom.Element;
7f561c08de6b Initial load
duke
parents:
diff changeset
    35
import org.w3c.dom.Node;
7f561c08de6b Initial load
duke
parents:
diff changeset
    36
import org.w3c.dom.NodeList;
7f561c08de6b Initial load
duke
parents:
diff changeset
    37
import org.w3c.dom.Text;
7f561c08de6b Initial load
duke
parents:
diff changeset
    38
7f561c08de6b Initial load
duke
parents:
diff changeset
    39
/**
7f561c08de6b Initial load
duke
parents:
diff changeset
    40
 * This class contains EXSLT strings extension functions.
7f561c08de6b Initial load
duke
parents:
diff changeset
    41
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    42
 * It is accessed by specifying a namespace URI as follows:
7f561c08de6b Initial load
duke
parents:
diff changeset
    43
 * <pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
    44
 *    xmlns:str="http://exslt.org/strings"
7f561c08de6b Initial load
duke
parents:
diff changeset
    45
 * </pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
    46
 * The documentation for each function has been copied from the relevant
7f561c08de6b Initial load
duke
parents:
diff changeset
    47
 * EXSLT Implementer page.
7f561c08de6b Initial load
duke
parents:
diff changeset
    48
 *
7f561c08de6b Initial load
duke
parents:
diff changeset
    49
 * @see <a href="http://www.exslt.org/">EXSLT</a>
7f561c08de6b Initial load
duke
parents:
diff changeset
    50
7f561c08de6b Initial load
duke
parents:
diff changeset
    51
 * @xsl.usage general
7f561c08de6b Initial load
duke
parents:
diff changeset
    52
 */
7f561c08de6b Initial load
duke
parents:
diff changeset
    53
public class ExsltStrings extends ExsltBase
7f561c08de6b Initial load
duke
parents:
diff changeset
    54
{
7f561c08de6b Initial load
duke
parents:
diff changeset
    55
  /**
7f561c08de6b Initial load
duke
parents:
diff changeset
    56
   * The str:align function aligns a string within another string.
7f561c08de6b Initial load
duke
parents:
diff changeset
    57
   * <p>
7f561c08de6b Initial load
duke
parents:
diff changeset
    58
   * The first argument gives the target string to be aligned. The second argument gives
7f561c08de6b Initial load
duke
parents:
diff changeset
    59
   * the padding string within which it is to be aligned.
7f561c08de6b Initial load
duke
parents:
diff changeset
    60
   * <p>
7f561c08de6b Initial load
duke
parents:
diff changeset
    61
   * If the target string is shorter than the padding string then a range of characters
7f561c08de6b Initial load
duke
parents:
diff changeset
    62
   * in the padding string are repaced with those in the target string. Which characters
7f561c08de6b Initial load
duke
parents:
diff changeset
    63
   * are replaced depends on the value of the third argument, which gives the type of
7f561c08de6b Initial load
duke
parents:
diff changeset
    64
   * alignment. It can be one of 'left', 'right' or 'center'. If no third argument is
7f561c08de6b Initial load
duke
parents:
diff changeset
    65
   * given or if it is not one of these values, then it defaults to left alignment.
7f561c08de6b Initial load
duke
parents:
diff changeset
    66
   * <p>
7f561c08de6b Initial load
duke
parents:
diff changeset
    67
   * With left alignment, the range of characters replaced by the target string begins
7f561c08de6b Initial load
duke
parents:
diff changeset
    68
   * with the first character in the padding string. With right alignment, the range of
7f561c08de6b Initial load
duke
parents:
diff changeset
    69
   * characters replaced by the target string ends with the last character in the padding
7f561c08de6b Initial load
duke
parents:
diff changeset
    70
   * string. With center alignment, the range of characters replaced by the target string
7f561c08de6b Initial load
duke
parents:
diff changeset
    71
   * is in the middle of the padding string, such that either the number of unreplaced
7f561c08de6b Initial load
duke
parents:
diff changeset
    72
   * characters on either side of the range is the same or there is one less on the left
7f561c08de6b Initial load
duke
parents:
diff changeset
    73
   * than there is on the right.
7f561c08de6b Initial load
duke
parents:
diff changeset
    74
   * <p>
7f561c08de6b Initial load
duke
parents:
diff changeset
    75
   * If the target string is longer than the padding string, then it is truncated to be
7f561c08de6b Initial load
duke
parents:
diff changeset
    76
   * the same length as the padding string and returned.
7f561c08de6b Initial load
duke
parents:
diff changeset
    77
   *
7f561c08de6b Initial load
duke
parents:
diff changeset
    78
   * @param targetStr The target string
7f561c08de6b Initial load
duke
parents:
diff changeset
    79
   * @param paddingStr The padding string
7f561c08de6b Initial load
duke
parents:
diff changeset
    80
   * @param type The type of alignment
7f561c08de6b Initial load
duke
parents:
diff changeset
    81
   *
7f561c08de6b Initial load
duke
parents:
diff changeset
    82
   * @return The string after alignment
7f561c08de6b Initial load
duke
parents:
diff changeset
    83
   */
7f561c08de6b Initial load
duke
parents:
diff changeset
    84
  public static String align(String targetStr, String paddingStr, String type)
7f561c08de6b Initial load
duke
parents:
diff changeset
    85
  {
7f561c08de6b Initial load
duke
parents:
diff changeset
    86
    if (targetStr.length() >= paddingStr.length())
7f561c08de6b Initial load
duke
parents:
diff changeset
    87
      return targetStr.substring(0, paddingStr.length());
7f561c08de6b Initial load
duke
parents:
diff changeset
    88
7f561c08de6b Initial load
duke
parents:
diff changeset
    89
    if (type.equals("right"))
7f561c08de6b Initial load
duke
parents:
diff changeset
    90
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
    91
      return paddingStr.substring(0, paddingStr.length() - targetStr.length()) + targetStr;
7f561c08de6b Initial load
duke
parents:
diff changeset
    92
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
    93
    else if (type.equals("center"))
7f561c08de6b Initial load
duke
parents:
diff changeset
    94
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
    95
      int startIndex = (paddingStr.length() - targetStr.length()) / 2;
7f561c08de6b Initial load
duke
parents:
diff changeset
    96
      return paddingStr.substring(0, startIndex) + targetStr + paddingStr.substring(startIndex + targetStr.length());
7f561c08de6b Initial load
duke
parents:
diff changeset
    97
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
    98
    // Default is left
7f561c08de6b Initial load
duke
parents:
diff changeset
    99
    else
7f561c08de6b Initial load
duke
parents:
diff changeset
   100
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   101
      return targetStr + paddingStr.substring(targetStr.length());
7f561c08de6b Initial load
duke
parents:
diff changeset
   102
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   103
  }
7f561c08de6b Initial load
duke
parents:
diff changeset
   104
7f561c08de6b Initial load
duke
parents:
diff changeset
   105
  /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   106
   * See above
7f561c08de6b Initial load
duke
parents:
diff changeset
   107
   */
7f561c08de6b Initial load
duke
parents:
diff changeset
   108
  public static String align(String targetStr, String paddingStr)
7f561c08de6b Initial load
duke
parents:
diff changeset
   109
  {
7f561c08de6b Initial load
duke
parents:
diff changeset
   110
    return align(targetStr, paddingStr, "left");
7f561c08de6b Initial load
duke
parents:
diff changeset
   111
  }
7f561c08de6b Initial load
duke
parents:
diff changeset
   112
7f561c08de6b Initial load
duke
parents:
diff changeset
   113
  /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   114
   * The str:concat function takes a node set and returns the concatenation of the
7f561c08de6b Initial load
duke
parents:
diff changeset
   115
   * string values of the nodes in that node set. If the node set is empty, it returns
7f561c08de6b Initial load
duke
parents:
diff changeset
   116
   * an empty string.
7f561c08de6b Initial load
duke
parents:
diff changeset
   117
   *
7f561c08de6b Initial load
duke
parents:
diff changeset
   118
   * @param nl A node set
7f561c08de6b Initial load
duke
parents:
diff changeset
   119
   * @return The concatenation of the string values of the nodes in that node set
7f561c08de6b Initial load
duke
parents:
diff changeset
   120
   */
7f561c08de6b Initial load
duke
parents:
diff changeset
   121
  public static String concat(NodeList nl)
7f561c08de6b Initial load
duke
parents:
diff changeset
   122
  {
7f561c08de6b Initial load
duke
parents:
diff changeset
   123
    StringBuffer sb = new StringBuffer();
7f561c08de6b Initial load
duke
parents:
diff changeset
   124
    for (int i = 0; i < nl.getLength(); i++)
7f561c08de6b Initial load
duke
parents:
diff changeset
   125
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   126
      Node node = nl.item(i);
7f561c08de6b Initial load
duke
parents:
diff changeset
   127
      String value = toString(node);
7f561c08de6b Initial load
duke
parents:
diff changeset
   128
7f561c08de6b Initial load
duke
parents:
diff changeset
   129
      if (value != null && value.length() > 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
   130
        sb.append(value);
7f561c08de6b Initial load
duke
parents:
diff changeset
   131
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   132
7f561c08de6b Initial load
duke
parents:
diff changeset
   133
    return sb.toString();
7f561c08de6b Initial load
duke
parents:
diff changeset
   134
  }
7f561c08de6b Initial load
duke
parents:
diff changeset
   135
7f561c08de6b Initial load
duke
parents:
diff changeset
   136
  /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   137
   * The str:padding function creates a padding string of a certain length.
7f561c08de6b Initial load
duke
parents:
diff changeset
   138
   * The first argument gives the length of the padding string to be created.
7f561c08de6b Initial load
duke
parents:
diff changeset
   139
   * The second argument gives a string to be used to create the padding. This
7f561c08de6b Initial load
duke
parents:
diff changeset
   140
   * string is repeated as many times as is necessary to create a string of the
7f561c08de6b Initial load
duke
parents:
diff changeset
   141
   * length specified by the first argument; if the string is more than a character
7f561c08de6b Initial load
duke
parents:
diff changeset
   142
   * long, it may have to be truncated to produce the required length. If no second
7f561c08de6b Initial load
duke
parents:
diff changeset
   143
   * argument is specified, it defaults to a space (' '). If the second argument is
7f561c08de6b Initial load
duke
parents:
diff changeset
   144
   * an empty string, str:padding returns an empty string.
7f561c08de6b Initial load
duke
parents:
diff changeset
   145
   *
7f561c08de6b Initial load
duke
parents:
diff changeset
   146
   * @param length The length of the padding string to be created
7f561c08de6b Initial load
duke
parents:
diff changeset
   147
   * @param pattern The string to be used as pattern
7f561c08de6b Initial load
duke
parents:
diff changeset
   148
   *
7f561c08de6b Initial load
duke
parents:
diff changeset
   149
   * @return A padding string of the given length
7f561c08de6b Initial load
duke
parents:
diff changeset
   150
   */
7f561c08de6b Initial load
duke
parents:
diff changeset
   151
  public static String padding(double length, String pattern)
7f561c08de6b Initial load
duke
parents:
diff changeset
   152
  {
7f561c08de6b Initial load
duke
parents:
diff changeset
   153
    if (pattern == null || pattern.length() == 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
   154
      return "";
7f561c08de6b Initial load
duke
parents:
diff changeset
   155
7f561c08de6b Initial load
duke
parents:
diff changeset
   156
    StringBuffer sb = new StringBuffer();
7f561c08de6b Initial load
duke
parents:
diff changeset
   157
    int len = (int)length;
7f561c08de6b Initial load
duke
parents:
diff changeset
   158
    int numAdded = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
   159
    int index = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
   160
    while (numAdded < len)
7f561c08de6b Initial load
duke
parents:
diff changeset
   161
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   162
      if (index == pattern.length())
7f561c08de6b Initial load
duke
parents:
diff changeset
   163
        index = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
   164
7f561c08de6b Initial load
duke
parents:
diff changeset
   165
      sb.append(pattern.charAt(index));
7f561c08de6b Initial load
duke
parents:
diff changeset
   166
      index++;
7f561c08de6b Initial load
duke
parents:
diff changeset
   167
      numAdded++;
7f561c08de6b Initial load
duke
parents:
diff changeset
   168
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   169
7f561c08de6b Initial load
duke
parents:
diff changeset
   170
    return sb.toString();
7f561c08de6b Initial load
duke
parents:
diff changeset
   171
  }
7f561c08de6b Initial load
duke
parents:
diff changeset
   172
7f561c08de6b Initial load
duke
parents:
diff changeset
   173
  /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   174
   * See above
7f561c08de6b Initial load
duke
parents:
diff changeset
   175
   */
7f561c08de6b Initial load
duke
parents:
diff changeset
   176
  public static String padding(double length)
7f561c08de6b Initial load
duke
parents:
diff changeset
   177
  {
7f561c08de6b Initial load
duke
parents:
diff changeset
   178
    return padding(length, " ");
7f561c08de6b Initial load
duke
parents:
diff changeset
   179
  }
7f561c08de6b Initial load
duke
parents:
diff changeset
   180
7f561c08de6b Initial load
duke
parents:
diff changeset
   181
  /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   182
   * The str:split function splits up a string and returns a node set of token
7f561c08de6b Initial load
duke
parents:
diff changeset
   183
   * elements, each containing one token from the string.
7f561c08de6b Initial load
duke
parents:
diff changeset
   184
   * <p>
7f561c08de6b Initial load
duke
parents:
diff changeset
   185
   * The first argument is the string to be split. The second argument is a pattern
7f561c08de6b Initial load
duke
parents:
diff changeset
   186
   * string. The string given by the first argument is split at any occurrence of
7f561c08de6b Initial load
duke
parents:
diff changeset
   187
   * this pattern. For example:
7f561c08de6b Initial load
duke
parents:
diff changeset
   188
   * <pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
   189
   * str:split('a, simple, list', ', ') gives the node set consisting of:
7f561c08de6b Initial load
duke
parents:
diff changeset
   190
   *
7f561c08de6b Initial load
duke
parents:
diff changeset
   191
   * <token>a</token>
7f561c08de6b Initial load
duke
parents:
diff changeset
   192
   * <token>simple</token>
7f561c08de6b Initial load
duke
parents:
diff changeset
   193
   * <token>list</token>
7f561c08de6b Initial load
duke
parents:
diff changeset
   194
   * </pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
   195
   * If the second argument is omitted, the default is the string '&#x20;' (i.e. a space).
7f561c08de6b Initial load
duke
parents:
diff changeset
   196
   *
7f561c08de6b Initial load
duke
parents:
diff changeset
   197
   * @param str The string to be split
7f561c08de6b Initial load
duke
parents:
diff changeset
   198
   * @param pattern The pattern
7f561c08de6b Initial load
duke
parents:
diff changeset
   199
   *
7f561c08de6b Initial load
duke
parents:
diff changeset
   200
   * @return A node set of split tokens
7f561c08de6b Initial load
duke
parents:
diff changeset
   201
   */
7f561c08de6b Initial load
duke
parents:
diff changeset
   202
  public static NodeList split(String str, String pattern)
7f561c08de6b Initial load
duke
parents:
diff changeset
   203
  {
7f561c08de6b Initial load
duke
parents:
diff changeset
   204
7f561c08de6b Initial load
duke
parents:
diff changeset
   205
7f561c08de6b Initial load
duke
parents:
diff changeset
   206
    NodeSet resultSet = new NodeSet();
7f561c08de6b Initial load
duke
parents:
diff changeset
   207
    resultSet.setShouldCacheNodes(true);
7f561c08de6b Initial load
duke
parents:
diff changeset
   208
7f561c08de6b Initial load
duke
parents:
diff changeset
   209
    boolean done = false;
7f561c08de6b Initial load
duke
parents:
diff changeset
   210
    int fromIndex = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
   211
    int matchIndex = 0;
7f561c08de6b Initial load
duke
parents:
diff changeset
   212
    String token = null;
7f561c08de6b Initial load
duke
parents:
diff changeset
   213
7f561c08de6b Initial load
duke
parents:
diff changeset
   214
    while (!done && fromIndex < str.length())
7f561c08de6b Initial load
duke
parents:
diff changeset
   215
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   216
      matchIndex = str.indexOf(pattern, fromIndex);
7f561c08de6b Initial load
duke
parents:
diff changeset
   217
      if (matchIndex >= 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
   218
      {
7f561c08de6b Initial load
duke
parents:
diff changeset
   219
        token = str.substring(fromIndex, matchIndex);
7f561c08de6b Initial load
duke
parents:
diff changeset
   220
        fromIndex = matchIndex + pattern.length();
7f561c08de6b Initial load
duke
parents:
diff changeset
   221
      }
7f561c08de6b Initial load
duke
parents:
diff changeset
   222
      else
7f561c08de6b Initial load
duke
parents:
diff changeset
   223
      {
7f561c08de6b Initial load
duke
parents:
diff changeset
   224
        done = true;
7f561c08de6b Initial load
duke
parents:
diff changeset
   225
        token = str.substring(fromIndex);
7f561c08de6b Initial load
duke
parents:
diff changeset
   226
      }
7f561c08de6b Initial load
duke
parents:
diff changeset
   227
7f561c08de6b Initial load
duke
parents:
diff changeset
   228
      Document doc = DocumentHolder.m_doc;
7f561c08de6b Initial load
duke
parents:
diff changeset
   229
      synchronized (doc)
7f561c08de6b Initial load
duke
parents:
diff changeset
   230
      {
7f561c08de6b Initial load
duke
parents:
diff changeset
   231
        Element element = doc.createElement("token");
7f561c08de6b Initial load
duke
parents:
diff changeset
   232
        Text text = doc.createTextNode(token);
7f561c08de6b Initial load
duke
parents:
diff changeset
   233
        element.appendChild(text);
7f561c08de6b Initial load
duke
parents:
diff changeset
   234
        resultSet.addNode(element);
7f561c08de6b Initial load
duke
parents:
diff changeset
   235
      }
7f561c08de6b Initial load
duke
parents:
diff changeset
   236
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   237
7f561c08de6b Initial load
duke
parents:
diff changeset
   238
    return resultSet;
7f561c08de6b Initial load
duke
parents:
diff changeset
   239
  }
7f561c08de6b Initial load
duke
parents:
diff changeset
   240
7f561c08de6b Initial load
duke
parents:
diff changeset
   241
  /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   242
   * See above
7f561c08de6b Initial load
duke
parents:
diff changeset
   243
   */
7f561c08de6b Initial load
duke
parents:
diff changeset
   244
  public static NodeList split(String str)
7f561c08de6b Initial load
duke
parents:
diff changeset
   245
  {
7f561c08de6b Initial load
duke
parents:
diff changeset
   246
    return split(str, " ");
7f561c08de6b Initial load
duke
parents:
diff changeset
   247
  }
7f561c08de6b Initial load
duke
parents:
diff changeset
   248
7f561c08de6b Initial load
duke
parents:
diff changeset
   249
  /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   250
   * The str:tokenize function splits up a string and returns a node set of token
7f561c08de6b Initial load
duke
parents:
diff changeset
   251
   * elements, each containing one token from the string.
7f561c08de6b Initial load
duke
parents:
diff changeset
   252
   * <p>
7f561c08de6b Initial load
duke
parents:
diff changeset
   253
   * The first argument is the string to be tokenized. The second argument is a
7f561c08de6b Initial load
duke
parents:
diff changeset
   254
   * string consisting of a number of characters. Each character in this string is
7f561c08de6b Initial load
duke
parents:
diff changeset
   255
   * taken as a delimiting character. The string given by the first argument is split
7f561c08de6b Initial load
duke
parents:
diff changeset
   256
   * at any occurrence of any of these characters. For example:
7f561c08de6b Initial load
duke
parents:
diff changeset
   257
   * <pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
   258
   * str:tokenize('2001-06-03T11:40:23', '-T:') gives the node set consisting of:
7f561c08de6b Initial load
duke
parents:
diff changeset
   259
   *
7f561c08de6b Initial load
duke
parents:
diff changeset
   260
   * <token>2001</token>
7f561c08de6b Initial load
duke
parents:
diff changeset
   261
   * <token>06</token>
7f561c08de6b Initial load
duke
parents:
diff changeset
   262
   * <token>03</token>
7f561c08de6b Initial load
duke
parents:
diff changeset
   263
   * <token>11</token>
7f561c08de6b Initial load
duke
parents:
diff changeset
   264
   * <token>40</token>
7f561c08de6b Initial load
duke
parents:
diff changeset
   265
   * <token>23</token>
7f561c08de6b Initial load
duke
parents:
diff changeset
   266
   * </pre>
7f561c08de6b Initial load
duke
parents:
diff changeset
   267
   * If the second argument is omitted, the default is the string '&#x9;&#xA;&#xD;&#x20;'
7f561c08de6b Initial load
duke
parents:
diff changeset
   268
   * (i.e. whitespace characters).
7f561c08de6b Initial load
duke
parents:
diff changeset
   269
   * <p>
7f561c08de6b Initial load
duke
parents:
diff changeset
   270
   * If the second argument is an empty string, the function returns a set of token
7f561c08de6b Initial load
duke
parents:
diff changeset
   271
   * elements, each of which holds a single character.
7f561c08de6b Initial load
duke
parents:
diff changeset
   272
   * <p>
7f561c08de6b Initial load
duke
parents:
diff changeset
   273
   * Note: This one is different from the tokenize extension function in the Xalan
7f561c08de6b Initial load
duke
parents:
diff changeset
   274
   * namespace. The one in Xalan returns a set of Text nodes, while this one wraps
7f561c08de6b Initial load
duke
parents:
diff changeset
   275
   * the Text nodes inside the token Element nodes.
7f561c08de6b Initial load
duke
parents:
diff changeset
   276
   *
7f561c08de6b Initial load
duke
parents:
diff changeset
   277
   * @param toTokenize The string to be tokenized
7f561c08de6b Initial load
duke
parents:
diff changeset
   278
   * @param delims The delimiter string
7f561c08de6b Initial load
duke
parents:
diff changeset
   279
   *
7f561c08de6b Initial load
duke
parents:
diff changeset
   280
   * @return A node set of split token elements
7f561c08de6b Initial load
duke
parents:
diff changeset
   281
   */
7f561c08de6b Initial load
duke
parents:
diff changeset
   282
  public static NodeList tokenize(String toTokenize, String delims)
7f561c08de6b Initial load
duke
parents:
diff changeset
   283
  {
7f561c08de6b Initial load
duke
parents:
diff changeset
   284
7f561c08de6b Initial load
duke
parents:
diff changeset
   285
7f561c08de6b Initial load
duke
parents:
diff changeset
   286
    NodeSet resultSet = new NodeSet();
7f561c08de6b Initial load
duke
parents:
diff changeset
   287
7f561c08de6b Initial load
duke
parents:
diff changeset
   288
    if (delims != null && delims.length() > 0)
7f561c08de6b Initial load
duke
parents:
diff changeset
   289
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   290
      StringTokenizer lTokenizer = new StringTokenizer(toTokenize, delims);
7f561c08de6b Initial load
duke
parents:
diff changeset
   291
7f561c08de6b Initial load
duke
parents:
diff changeset
   292
      Document doc = DocumentHolder.m_doc;
7f561c08de6b Initial load
duke
parents:
diff changeset
   293
      synchronized (doc)
7f561c08de6b Initial load
duke
parents:
diff changeset
   294
      {
7f561c08de6b Initial load
duke
parents:
diff changeset
   295
        while (lTokenizer.hasMoreTokens())
7f561c08de6b Initial load
duke
parents:
diff changeset
   296
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   297
          Element element = doc.createElement("token");
7f561c08de6b Initial load
duke
parents:
diff changeset
   298
          element.appendChild(doc.createTextNode(lTokenizer.nextToken()));
7f561c08de6b Initial load
duke
parents:
diff changeset
   299
          resultSet.addNode(element);
7f561c08de6b Initial load
duke
parents:
diff changeset
   300
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   301
      }
7f561c08de6b Initial load
duke
parents:
diff changeset
   302
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   303
    // If the delimiter is an empty string, create one token Element for
7f561c08de6b Initial load
duke
parents:
diff changeset
   304
    // every single character.
7f561c08de6b Initial load
duke
parents:
diff changeset
   305
    else
7f561c08de6b Initial load
duke
parents:
diff changeset
   306
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   307
7f561c08de6b Initial load
duke
parents:
diff changeset
   308
      Document doc = DocumentHolder.m_doc;
7f561c08de6b Initial load
duke
parents:
diff changeset
   309
      synchronized (doc)
7f561c08de6b Initial load
duke
parents:
diff changeset
   310
      {
7f561c08de6b Initial load
duke
parents:
diff changeset
   311
        for (int i = 0; i < toTokenize.length(); i++)
7f561c08de6b Initial load
duke
parents:
diff changeset
   312
        {
7f561c08de6b Initial load
duke
parents:
diff changeset
   313
          Element element = doc.createElement("token");
7f561c08de6b Initial load
duke
parents:
diff changeset
   314
          element.appendChild(doc.createTextNode(toTokenize.substring(i, i+1)));
7f561c08de6b Initial load
duke
parents:
diff changeset
   315
          resultSet.addNode(element);
7f561c08de6b Initial load
duke
parents:
diff changeset
   316
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   317
      }
7f561c08de6b Initial load
duke
parents:
diff changeset
   318
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   319
7f561c08de6b Initial load
duke
parents:
diff changeset
   320
    return resultSet;
7f561c08de6b Initial load
duke
parents:
diff changeset
   321
  }
7f561c08de6b Initial load
duke
parents:
diff changeset
   322
7f561c08de6b Initial load
duke
parents:
diff changeset
   323
  /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   324
   * See above
7f561c08de6b Initial load
duke
parents:
diff changeset
   325
   */
7f561c08de6b Initial load
duke
parents:
diff changeset
   326
  public static NodeList tokenize(String toTokenize)
7f561c08de6b Initial load
duke
parents:
diff changeset
   327
  {
7f561c08de6b Initial load
duke
parents:
diff changeset
   328
    return tokenize(toTokenize, " \t\n\r");
7f561c08de6b Initial load
duke
parents:
diff changeset
   329
  }
7f561c08de6b Initial load
duke
parents:
diff changeset
   330
    /**
7f561c08de6b Initial load
duke
parents:
diff changeset
   331
     * This class is not loaded until first referenced (see Java Language
7f561c08de6b Initial load
duke
parents:
diff changeset
   332
     * Specification by Gosling/Joy/Steele, section 12.4.1)
7f561c08de6b Initial load
duke
parents:
diff changeset
   333
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   334
     * The static members are created when this class is first referenced, as a
7f561c08de6b Initial load
duke
parents:
diff changeset
   335
     * lazy initialization not needing checking against null or any
7f561c08de6b Initial load
duke
parents:
diff changeset
   336
     * synchronization.
7f561c08de6b Initial load
duke
parents:
diff changeset
   337
     *
7f561c08de6b Initial load
duke
parents:
diff changeset
   338
     */
7f561c08de6b Initial load
duke
parents:
diff changeset
   339
    private static class DocumentHolder
7f561c08de6b Initial load
duke
parents:
diff changeset
   340
    {
7f561c08de6b Initial load
duke
parents:
diff changeset
   341
        // Reuse the Document object to reduce memory usage.
7f561c08de6b Initial load
duke
parents:
diff changeset
   342
        private static final Document m_doc;
7f561c08de6b Initial load
duke
parents:
diff changeset
   343
        static {
7f561c08de6b Initial load
duke
parents:
diff changeset
   344
            try
7f561c08de6b Initial load
duke
parents:
diff changeset
   345
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   346
                m_doc =DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
7f561c08de6b Initial load
duke
parents:
diff changeset
   347
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   348
7f561c08de6b Initial load
duke
parents:
diff changeset
   349
            catch(ParserConfigurationException pce)
7f561c08de6b Initial load
duke
parents:
diff changeset
   350
            {
7f561c08de6b Initial load
duke
parents:
diff changeset
   351
                  throw new com.sun.org.apache.xml.internal.utils.WrappedRuntimeException(pce);
7f561c08de6b Initial load
duke
parents:
diff changeset
   352
            }
7f561c08de6b Initial load
duke
parents:
diff changeset
   353
7f561c08de6b Initial load
duke
parents:
diff changeset
   354
        }
7f561c08de6b Initial load
duke
parents:
diff changeset
   355
    }
7f561c08de6b Initial load
duke
parents:
diff changeset
   356
7f561c08de6b Initial load
duke
parents:
diff changeset
   357
}