hotspot/src/share/vm/utilities/utf8.cpp
changeset 14477 95e66ea71f71
parent 8921 14bfe81f2a9d
child 16602 5df51d3bc550
--- a/hotspot/src/share/vm/utilities/utf8.cpp	Fri Nov 09 08:36:17 2012 -0800
+++ b/hotspot/src/share/vm/utilities/utf8.cpp	Mon Nov 12 14:03:53 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -147,7 +147,7 @@
 
 void UTF8::convert_to_unicode(const char* utf8_str, jchar* unicode_str, int unicode_length) {
   unsigned char ch;
-  const char *ptr = (const char *)utf8_str;
+  const char *ptr = utf8_str;
   int index = 0;
 
   /* ASCII case loop optimization */
@@ -162,6 +162,119 @@
   }
 }
 
+// returns the quoted ascii length of a 0-terminated utf8 string
+int UTF8::quoted_ascii_length(const char* utf8_str, int utf8_length) {
+  const char *ptr = utf8_str;
+  const char* end = ptr + utf8_length;
+  int result = 0;
+  while (ptr < end) {
+    jchar c;
+    ptr = UTF8::next(ptr, &c);
+    if (c >= 32 && c < 127) {
+      result++;
+    } else {
+      result += 6;
+    }
+  }
+  return result;
+}
+
+// converts a utf8 string to quoted ascii
+void UTF8::as_quoted_ascii(const char* utf8_str, char* buf, int buflen) {
+  const char *ptr = utf8_str;
+  char* p = buf;
+  char* end = buf + buflen;
+  while (*ptr != '\0') {
+    jchar c;
+    ptr = UTF8::next(ptr, &c);
+    if (c >= 32 && c < 127) {
+      if (p + 1 >= end) break;      // string is truncated
+      *p++ = (char)c;
+    } else {
+      if (p + 6 >= end) break;      // string is truncated
+      sprintf(p, "\\u%04x", c);
+      p += 6;
+    }
+  }
+  *p = '\0';
+}
+
+
+const char* UTF8::from_quoted_ascii(const char* quoted_ascii_str) {
+  const char *ptr = quoted_ascii_str;
+  char* result = NULL;
+  while (*ptr != '\0') {
+    char c = *ptr;
+    if (c < 32 || c >= 127) break;
+  }
+  if (*ptr == '\0') {
+    // nothing to do so return original string
+    return quoted_ascii_str;
+  }
+  // everything up to this point was ok.
+  int length = ptr - quoted_ascii_str;
+  char* buffer = NULL;
+  for (int round = 0; round < 2; round++) {
+    while (*ptr != '\0') {
+      if (*ptr != '\\') {
+        if (buffer != NULL) {
+          buffer[length] = *ptr;
+        }
+        length++;
+      } else {
+        switch (ptr[1]) {
+          case 'u': {
+            ptr += 2;
+            jchar value=0;
+            for (int i=0; i<4; i++) {
+              char c = *ptr++;
+              switch (c) {
+                case '0': case '1': case '2': case '3': case '4':
+                case '5': case '6': case '7': case '8': case '9':
+                  value = (value << 4) + c - '0';
+                  break;
+                case 'a': case 'b': case 'c':
+                case 'd': case 'e': case 'f':
+                  value = (value << 4) + 10 + c - 'a';
+                  break;
+                case 'A': case 'B': case 'C':
+                case 'D': case 'E': case 'F':
+                  value = (value << 4) + 10 + c - 'A';
+                  break;
+                default:
+                  ShouldNotReachHere();
+              }
+            }
+            if (buffer == NULL) {
+              char utf8_buffer[4];
+              char* next = (char*)utf8_write((u_char*)utf8_buffer, value);
+              length += next - utf8_buffer;
+            } else {
+              char* next = (char*)utf8_write((u_char*)&buffer[length], value);
+              length += next - &buffer[length];
+            }
+            break;
+          }
+          case 't': if (buffer != NULL) buffer[length] = '\t'; ptr += 2; length++; break;
+          case 'n': if (buffer != NULL) buffer[length] = '\n'; ptr += 2; length++; break;
+          case 'r': if (buffer != NULL) buffer[length] = '\r'; ptr += 2; length++; break;
+          case 'f': if (buffer != NULL) buffer[length] = '\f'; ptr += 2; length++; break;
+          default:
+            ShouldNotReachHere();
+        }
+      }
+    }
+    if (round == 0) {
+      buffer = NEW_RESOURCE_ARRAY(char, length + 1);
+      ptr = quoted_ascii_str;
+    } else {
+      buffer[length] = '\0';
+    }
+  }
+  return buffer;
+}
+
+
 // Returns NULL if 'c' it not found. This only works as long
 // as 'c' is an ASCII character
 const jbyte* UTF8::strrchr(const jbyte* base, int length, jbyte c) {
@@ -242,3 +355,35 @@
   }
   *utf8_buffer = '\0';
 }
+
+// returns the quoted ascii length of a unicode string
+int UNICODE::quoted_ascii_length(jchar* base, int length) {
+  int result = 0;
+  for (int i = 0; i < length; i++) {
+    jchar c = base[i];
+    if (c >= 32 && c < 127) {
+      result++;
+    } else {
+      result += 6;
+    }
+  }
+  return result;
+}
+
+// converts a utf8 string to quoted ascii
+void UNICODE::as_quoted_ascii(const jchar* base, int length, char* buf, int buflen) {
+  char* p = buf;
+  char* end = buf + buflen;
+  for (int index = 0; index < length; index++) {
+    jchar c = base[index];
+    if (c >= 32 && c < 127) {
+      if (p + 1 >= end) break;      // string is truncated
+      *p++ = (char)c;
+    } else {
+      if (p + 6 >= end) break;      // string is truncated
+      sprintf(p, "\\u%04x", c);
+      p += 6;
+    }
+  }
+  *p = '\0';
+}