src/hotspot/share/utilities/json.cpp
changeset 47216 71c04702a3d5
parent 41335 27115e2e42cc
child 51334 cc2c79d22508
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/utilities/json.cpp	Tue Sep 12 19:03:39 2017 +0200
@@ -0,0 +1,688 @@
+/*
+ * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * This is not really json in the state it is now.
+ * Some differences:
+ * - Double quotes around the key in an object is not enforced.
+ *     i.e you can write: { foo : "bar" } instead of { "foo" : "bar" }.
+ * - Comments are allowed.
+ * - The last element in an object or array can have an ending comma.
+ */
+
+#include "precompiled.hpp"
+#include "utilities/json.hpp"
+#include "utilities/ostream.hpp"
+#include <math.h>
+
+const char* strchrnul_(const char *s, int c) {
+  const char* tmp = strchr(s, c);
+  return tmp == NULL ? s + strlen(s) : tmp;
+}
+
+JSON::JSON(const char* text, bool silent, outputStream* st)
+: start(text), pos(text), mark(text),
+  level(0), line(1), column(0), silent(silent), _valid(true), _st(st)
+{
+}
+
+void JSON::parse() {
+  assert(start != NULL, "Need something to parse");
+  if (start == NULL) {
+    _valid = false;
+    error(INTERNAL_ERROR, "JSON parser was called with a string that was NULL.");
+  } else {
+    _valid = parse_json_value();
+  }
+}
+
+bool JSON::valid() {
+  return _valid;
+}
+
+bool JSON::parse_json_value() {
+  int c;
+
+  c = skip_to_token();
+  if (c == -1) {
+    return false;
+  }
+
+  // Must start with object or array
+  if (level == 0) {
+
+    switch (c) {
+    case '{':
+      if (parse_json_object() == false) {
+        return false;
+      }
+      c = skip_to_token();
+      if (c > 0) {
+        mark_pos();
+        error(SYNTAX_ERROR, "Only one top level object/array is allowed.");
+        return false;
+      } else if (c < 0) {
+        return false;
+      }
+      return true;
+
+    case '[':
+      if (parse_json_array() == false) {
+        return false;
+      }
+      c = skip_to_token();
+      if (c > 0) {
+        mark_pos();
+        error(SYNTAX_ERROR, "Only one top level object/array is allowed.");
+        return false;
+      } else if (c < 0) {
+        return false;
+      }
+      return true;
+
+    case 0:
+      error(SYNTAX_ERROR, "EOS was encountered before any json declarations");
+      return false;
+
+    default:
+      error(SYNTAX_ERROR, "Json must start with an object or an array.");
+      return false;
+    }
+  } else { // level > 0
+    switch (c) {
+    case '{':
+      return parse_json_object();
+
+    case '[':
+      return parse_json_array();
+
+    case '"':
+      return parse_json_string();
+
+    case '-': case '0':
+    case '1': case '2': case '3':
+    case '4': case '5': case '6':
+    case '7': case '8': case '9':
+      return parse_json_number();
+
+    case 't':
+      return parse_json_symbol("true", JSON_TRUE);
+
+    case 'f':
+      return parse_json_symbol("false", JSON_FALSE);
+
+    case 'n':
+      return parse_json_symbol("null", JSON_NULL);
+
+    case 0:
+      error(SYNTAX_ERROR, "EOS was encountered when expecting a json value.");
+      return false;
+
+    default:
+      error(SYNTAX_ERROR, "Could not parse as a json value (did you forget to quote your strings?).");
+      return false;
+    }
+  }
+}
+
+// Should only be called when we actually have the start of an object
+// Otherwise it is an internal error
+bool JSON::parse_json_object() {
+  NOT_PRODUCT(const char* prev_pos);
+  int c;
+
+  mark_pos();
+  // Check that we are not called in error
+  if (expect_any("{", "object start", INTERNAL_ERROR) <= 0) {
+    return false;
+  }
+
+  if (!callback(JSON_OBJECT_BEGIN, NULL, level++)) {
+    return false;
+  }
+
+  for (;;) {
+    mark_pos();
+    c = skip_to_token();
+    if (c == 0) {
+      error(SYNTAX_ERROR, "EOS when expecting an object key or object end");
+      return false;
+    } else if (c < 0) {
+      return false;
+    } else if (c == '}') {
+      // We got here from either empty object "{}" or ending comma "{a:1,}"
+      next();
+      break;
+    }
+
+    NOT_PRODUCT(prev_pos = pos);
+    if (parse_json_key() == false) {
+      return false;
+    }
+    assert(pos > prev_pos, "parsing stalled");
+
+    skip_to_token();
+    mark_pos();
+    if (expect_any(":", "object key-value separator") <= 0) {
+      return false;
+    }
+
+    skip_to_token();
+    mark_pos();
+    NOT_PRODUCT(prev_pos = pos);
+    if (parse_json_value() == false) {
+      return false;
+    }
+    assert(pos > prev_pos, "parsing stalled");
+
+    c = skip_to_token();
+    mark_pos();
+    if (expect_any(",}", "value separator or object end") <= 0) {
+      return false;
+    }
+    if (c == '}') {
+      break;
+    }
+  }
+
+  assert(c == '}', "array parsing ended without object end token ('}')");
+  return callback(JSON_OBJECT_END, NULL, --level);
+}
+
+// Should only be called when we actually have the start of an array
+// Otherwise it is an internal error
+bool JSON::parse_json_array() {
+  NOT_PRODUCT(const char* prev_pos);
+  int c;
+
+  mark_pos();
+  // Check that we are not called in error
+  if (expect_any("[", "array start character", INTERNAL_ERROR) <= 0) {
+    return false;
+  }
+
+  if (!callback(JSON_ARRAY_BEGIN, NULL, level++)) {
+    return false;
+  }
+
+  for (;;) {
+    mark_pos();
+    c = skip_to_token();
+    if (c == 0) {
+      error(SYNTAX_ERROR, "EOS when expecting a json value or array end");
+      return false;
+    } else if (c < 0) {
+      return false;
+    } else if (c == ']') {
+      // We got here from either empty array "[]" or ending comma "[1,]"
+      next();
+      break;
+    }
+
+    mark_pos();
+    NOT_PRODUCT(prev_pos = pos);
+    if (parse_json_value() == false) {
+      return false;
+    }
+    assert(pos > prev_pos, "parsing stalled");
+
+    c = skip_to_token();
+    mark_pos();
+    if (expect_any(",]", "value separator or array end") <= 0) {
+      return false;
+    }
+    if (c == ']') {
+      break;
+    }
+  }
+
+  assert(c == ']', "array parsing ended without array end token (']')");
+  return callback(JSON_ARRAY_END, NULL, --level);
+}
+
+bool JSON::parse_json_string(bool key) {
+  const char* end;
+  JSON_VAL v;
+
+  mark_pos();
+  if (expect_any("\"", "string start character", INTERNAL_ERROR) <= 0) {
+    return false;
+  }
+
+  end = strchr(pos, '"'); // TODO: escapes
+  if (end == NULL) {
+    error(SYNTAX_ERROR, "String started here never ended. Expected \'\"\' before EOS.");
+    return false;
+  }
+
+  v.str.start = pos;
+  v.str.length = end - pos;
+  skip(end - pos);
+
+  if (expect_any("\"", "string end character", INTERNAL_ERROR) <= 0) {
+    return false;
+  }
+
+  if (key == true) {
+    return callback(JSON_KEY, &v, level);
+  } else {
+    return callback(JSON_STRING, &v, level);
+  }
+}
+
+// TODO: hotspot equivalents?
+static bool is_alpha(u_char c) {
+  return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+}
+static bool is_numeric(u_char c) {
+  return (c >= '0' && c <= '9');
+}
+static bool is_alnum(u_char c) {
+  return is_alpha(c) || is_numeric(c);
+}
+static bool is_word(u_char c) {
+  return c == '_' || is_alnum(c);
+}
+
+// Allow object keys to be without quotation,
+// but then restrict to ([a-zA-Z0-9_])+
+bool JSON::parse_json_key() {
+  const char* begin;
+  JSON_VAL v;
+  u_char c;
+
+  mark_pos();
+  c = peek();
+  if (c == '"') {
+    return parse_json_string(true);
+  }
+
+  begin = pos;
+  c = peek();
+  if (c == 0) {
+    error(SYNTAX_ERROR, "Got EOS when expecting an object key.");
+    return false;
+  } else if (is_word(c) == false) {
+    error(SYNTAX_ERROR, "Expected an object key, which can be a double-quoted (\") string or a simple string (only alphanumeric characters and underscore, separated by whitespace) that doesn't need to be quoted.");
+    return false;
+  }
+
+  for (;;) {
+    c = peek();
+    // Allow the key to be delimited by control characters and the object key-value separator ':'
+    if (c <= ' ' || c == ':') {
+      break;
+    } else if (is_word(c) == false) {
+      error(SYNTAX_ERROR, "Object key need to be quoted, or consist entirely of alphanumeric characters and underscores.");
+      return false;
+    }
+    next();
+  }
+
+  v.str.start = begin;
+  v.str.length = pos - begin;
+  return callback(JSON_KEY, &v, level);
+}
+
+bool JSON::parse_json_number() {
+  double double_value;
+  int tokens, read;
+  JSON_VAL v;
+
+  mark_pos();
+
+  // Parsing number - for simplicity ints are limited to 2**53
+  // sscanf as a double and check if part is 0.
+  tokens = sscanf(pos, "%lf%n", &double_value, &read);
+  assert(tokens <= 1, "scanf implementation that counts as a token, parsing json numbers will always fail");
+  if (tokens == 1) {
+    assert(read > 0, "sanity");
+
+    if (floor(double_value) == double_value) {
+      // No exponent - treat as an int
+      v.int_value = (int)double_value;
+      if (!callback(JSON_NUMBER_INT, &v, level)) {
+        return false;
+      }
+    } else {
+      v.double_value = double_value;
+      if (!callback(JSON_NUMBER_FLOAT, &v, level)) {
+        return false;
+      }
+    }
+    skip(read);
+    return true;
+  }
+
+  error(SYNTAX_ERROR, "Couldn't parse json number (note that exponents are not supported).");
+  return false;
+}
+
+bool JSON::parse_json_symbol(const char* name, JSON_TYPE symbol) {
+  if (expect_string(name, "maybe you forgot to quote your strings?") == false) {
+    mark_pos();
+    return false;
+  }
+  return callback(symbol, NULL, level);
+}
+
+void JSON::mark_pos() {
+  assert((mark == start || *(mark - 1)) != 0, "buffer overrun");
+  assert(mark <= pos, "mark runahead");
+
+  u_char c;
+
+  while (mark < pos) {
+    c = *mark;
+    assert(c != 0, "pos buffer overrun?");
+    if (c != 0) {
+      mark++;
+      column++;
+    }
+    if (c == '\n') {
+      line++;
+      column = 0;
+    }
+  }
+
+  assert(mark <= pos, "mark runahead");
+}
+
+u_char JSON::next() {
+  assert((pos == start || *(pos - 1)) != 0, "buffer overrun");
+
+  u_char c = *pos;
+  if (c != 0) {
+    pos++;
+  }
+  return c;
+}
+
+u_char JSON::peek() {
+  return *pos;
+}
+
+// Peek ahead i chars (0 is same as peek())
+u_char JSON::peek(size_t i) {
+  u_char c;
+  const char* p;
+
+  p = pos;
+  c = *p;
+  while (i > 0 && c != 0) {
+    i--;
+    p++;
+    c = *p;
+  }
+  return c;
+}
+
+/*
+ * Check that one of the expected characters is next in the stream.
+ * If not, it is an error.
+ * Returns 0 if EOS is encountered.
+ * Returns -1 if the next character was not one of the expected.
+ * Otherwise consumes and returns the expected character that was encountered.
+ */
+int JSON::expect_any(const char* valid_chars, const char* error_msg, JSON_ERROR e) {
+  size_t len;
+  u_char c;
+
+  len = strlen(valid_chars);
+  assert(len > 0, "need non-empty string");
+
+  c = peek();
+  if (c == 0) {
+    error(e, "Got EOS when expecting %s (%s\'%s\').", error_msg, len > 1 ? "one of " : "", valid_chars);
+    return 0;
+  }
+  for (size_t i = 0; i < len; i++) {
+    if (c == valid_chars[i]) {
+      return next();
+    }
+  }
+  error(e, "Expected %s (%s\'%s\').", error_msg, len > 1 ? "one of " : "", valid_chars);
+  return -1;
+}
+
+/*
+ * Check that the expected string is next in the stream.
+ * If not, it is an error.
+ * Consumes the expected characters if they are present.
+ * Returns true if the expected characters were present, otherwise false.
+ */
+bool JSON::expect_string(const char* expected_string, const char* error_msg, JSON_ERROR e) {
+  u_char c, expected_char;
+  size_t len;
+
+  assert(expected_string != NULL, "need non-null string");
+  len = strlen(expected_string);
+  assert(len > 0, "need non-empty string");
+
+  for (size_t i = 0; i < len; i++) {
+    expected_char = expected_string[i];
+    assert(expected_char > ' ', "not sane for control characters");
+    if (expected_char <= ' ') {
+      error(INTERNAL_ERROR, "expect got a control char");
+    }
+    c = pos[i];
+    if (c == 0) {
+      error(e, "EOS encountered when expecting %s (\"%s\")", error_msg, expected_string);
+      return false;
+    } else if (c != expected_char) {
+      error(e, "Expected \"%s\" (%s)", expected_string, error_msg);
+      return false;
+    }
+  }
+  skip(len);
+  return true;
+}
+
+/*
+ * Skip i characters.
+ * Returns number of characters skipped.
+ */
+size_t JSON::skip(size_t i) {
+  u_char c;
+  size_t j;
+
+  c = peek();
+  for (j = i; c != 0 && j > 0; j--) {
+    c = next();
+  }
+  return i - j;
+}
+
+/*
+ * Skip whitespace and comments.
+ * Returns the first token after whitespace/comments without consuming it
+ * Returns 0 if EOS is encountered.
+ * Returns -1 if there is an error
+ */
+int JSON::skip_to_token() {
+  for (;;) {
+    int c = peek(0);
+    if (c == '/') {
+      u_char c2 = peek(1);
+      if (c2 == '/') {
+        c = skip_line_comment();
+      } else if (c2 == '*') {
+        c = skip_block_comment();
+        if (c < 0) {
+          return -1;
+        }
+      }
+      // Fall through to keep checking if there
+      // are more whitespace / comments to skip
+    }
+    if (c == 0 || c > ' ') {
+      return c;
+    }
+    next();
+  }
+  return 0;
+}
+
+/*
+ * Skip to, and return the wanted char without consuming it
+ * Returns 0 if EOS is encountered.
+ */
+u_char JSON::skip_to(u_char want) {
+  // We want the bookkeeping done in next().
+  // Otherwise strchr could have been used.
+  u_char c;
+  for(;;) {
+    c = peek();
+    if (c == 0 || c == want) {
+      return c;
+    }
+    next();
+  }
+}
+
+/*
+ * Should only be called when we actually have a line comment to skip.
+ * Otherwise it is an internal error.
+ *
+ * Will return the first token after the line comment without consuming it.
+ * Returns 0 if EOS is encoutered.
+ */
+u_char JSON::skip_line_comment() {
+  u_char c;
+
+  // Check that we are not called in error
+  expect_any("/", "line comment start", INTERNAL_ERROR);
+  expect_any("/", "line comment start", INTERNAL_ERROR);
+
+  c = skip_to('\n');
+  if (c == 0) {
+    return 0;
+  }
+  next();
+  return next();
+}
+
+/*
+ * Should only be called when we actually have a block comment to skip.
+ * Otherwise it is an internal error.
+ *
+ * Returns the first token after the block comment without consuming it.
+ * Returns -1 if EOS is encountered in the middle of a comment.
+ */
+int JSON::skip_block_comment() {
+  const char* current;
+
+  // Check that we are not called in error.
+  if (peek() != '/' || peek(1) != '*') {
+    // Let expect handle EOS.
+    expect_string("/*", "block comment start", INTERNAL_ERROR);
+    return 0;
+  }
+
+  current = pos;
+  for (;;) {
+    current = strchrnul_(current, '*');
+
+    if (current[0] == 0 || current[1] == 0) {
+      // Advance error marker to start of block comment
+      mark_pos();
+      error(SYNTAX_ERROR, "Block comment started here never ended. Expected \"*/\" before EOS.");
+      return -1;
+    }
+
+    if (current[1] == '/') {
+      pos = current;
+      if (expect_string("*/", "block comment end", INTERNAL_ERROR) == false) {
+        return -1;
+      }
+      // Found block comment end
+      return peek();
+    }
+    current++;
+  }
+}
+
+const char* JSON::strerror(JSON_ERROR e) {
+  switch (e) {
+  case SYNTAX_ERROR:
+    return "Syntax error";
+  case INTERNAL_ERROR:
+    return "Internal error";
+  case KEY_ERROR:
+    return "Key error";
+  case VALUE_ERROR:
+    return "Value error";
+  default:
+    ShouldNotReachHere();
+    return "Unknown error";
+  }
+}
+
+void JSON::error(JSON_ERROR e, const char* format, ...) {
+  _valid = false;
+
+  if (!silent) {
+    const char* line_start;
+    const char* tmp;
+    size_t line_length;
+    va_list args;
+    u_char c;
+
+    _st->print("%s on line %u byte %u: ", JSON::strerror(e), line, column + 1);
+    va_start(args, format);
+    _st->vprint(format, args);
+    _st->cr();
+    va_end(args);
+
+    line_start = mark - column;
+    assert(line_start >= start, "out of bounds");
+    assert(line_start <= mark, "out of bounds");
+    assert(line_start == start || line_start[-1] == '\n', "line counting error");
+
+    c = *pos;
+    if (c == 0) {
+      _st->print("  Got ");
+      _st->print_cr("EOS.");
+    }
+    tmp = mark;
+    c = *tmp;
+    if (c > ' ') {
+      _st->print("  At ");
+      _st->print("'");
+      while (c > ' ') {
+        _st->print("%c", c);
+        tmp++;
+        c = *tmp;
+      }
+      _st->print_cr("'.");
+    }
+
+    // Skip to newline or EOS
+    tmp = strchrnul_(mark, '\n');
+    line_length = tmp - line_start;
+
+    _st->print_cr("%s", line_start);
+  }
+}
+