# HG changeset patch # User mlarsson # Date 1519634060 -3600 # Node ID 580bb0b85f63346f7aac3f4bc2621b90f50a88a9 # Parent c200b4700aeb99509aaf8e07a2345f7a410c54f4 8198554: Add fuzzy matching for log levels and tags when parsing -Xlog Reviewed-by: hseigel, coleenp diff -r c200b4700aeb -r 580bb0b85f63 src/hotspot/share/logging/logLevel.cpp --- a/src/hotspot/share/logging/logLevel.cpp Sat Feb 24 22:42:37 2018 -0800 +++ b/src/hotspot/share/logging/logLevel.cpp Mon Feb 26 09:34:20 2018 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "logging/logLevel.hpp" #include "utilities/globalDefinitions.hpp" +#include "utilities/stringUtils.hpp" const char* LogLevel::_name[] = { "off", @@ -40,3 +41,19 @@ } return Invalid; } + +LogLevelType LogLevel::fuzzy_match(const char *level) { + size_t len = strlen(level); + LogLevelType match = LogLevel::Invalid; + double best = 0.4; // required similarity to be considered a match + for (uint i = 1; i < Count; i++) { + LogLevelType cur = static_cast(i); + const char* levelname = LogLevel::name(cur); + double score = StringUtils::similarity(level, len, levelname, strlen(levelname)); + if (score >= best) { + match = cur; + best= score; + } + } + return match; +} diff -r c200b4700aeb -r 580bb0b85f63 src/hotspot/share/logging/logLevel.hpp --- a/src/hotspot/share/logging/logLevel.hpp Sat Feb 24 22:42:37 2018 -0800 +++ b/src/hotspot/share/logging/logLevel.hpp Mon Feb 26 09:34:20 2018 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -71,6 +71,7 @@ } static LogLevel::type from_string(const char* str); + static LogLevel::type fuzzy_match(const char *level); private: static const char* _name[]; diff -r c200b4700aeb -r 580bb0b85f63 src/hotspot/share/logging/logSelection.cpp --- a/src/hotspot/share/logging/logSelection.cpp Sat Feb 24 22:42:37 2018 -0800 +++ b/src/hotspot/share/logging/logSelection.cpp Mon Feb 26 09:34:20 2018 +0100 @@ -72,10 +72,16 @@ LogLevelType level = LogLevel::Unspecified; char* equals = strchr(str, '='); if (equals != NULL) { - level = LogLevel::from_string(equals + 1); + const char* levelstr = equals + 1; + level = LogLevel::from_string(levelstr); if (level == LogLevel::Invalid) { if (errstream != NULL) { - errstream->print_cr("Invalid level '%s' in log selection.", equals + 1); + errstream->print("Invalid level '%s' in log selection.", levelstr); + LogLevelType match = LogLevel::fuzzy_match(levelstr); + if (match != LogLevel::Invalid) { + errstream->print(" Did you mean '%s'?", LogLevel::name(match)); + } + errstream->cr(); } return LogSelection::Invalid; } @@ -109,7 +115,12 @@ LogTagType tag = LogTag::from_string(cur_tag); if (tag == LogTag::__NO_TAG) { if (errstream != NULL) { - errstream->print_cr("Invalid tag '%s' in log selection.", cur_tag); + errstream->print("Invalid tag '%s' in log selection.", cur_tag); + LogTagType match = LogTag::fuzzy_match(cur_tag); + if (match != LogTag::__NO_TAG) { + errstream->print(" Did you mean '%s'?", LogTag::name(match)); + } + errstream->cr(); } return LogSelection::Invalid; } diff -r c200b4700aeb -r 580bb0b85f63 src/hotspot/share/logging/logTag.cpp --- a/src/hotspot/share/logging/logTag.cpp Sat Feb 24 22:42:37 2018 -0800 +++ b/src/hotspot/share/logging/logTag.cpp Mon Feb 26 09:34:20 2018 +0100 @@ -23,6 +23,7 @@ */ #include "precompiled.hpp" #include "logging/logTag.hpp" +#include "utilities/stringUtils.hpp" #include "utilities/globalDefinitions.hpp" #include "utilities/ostream.hpp" #include "utilities/quickSort.hpp" @@ -43,6 +44,22 @@ return __NO_TAG; } +LogTagType LogTag::fuzzy_match(const char *str) { + size_t len = strlen(str); + LogTagType match = LogTag::__NO_TAG; + double best = 0.5; // required similarity to be considered a match + for (size_t i = 1; i < LogTag::Count; i++) { + LogTagType tag = static_cast(i); + const char* tagname = LogTag::name(tag); + double score = StringUtils::similarity(tagname, strlen(tagname), str, len); + if (score >= best) { + match = tag; + best = score; + } + } + return match; +} + static int cmp_logtag(LogTagType a, LogTagType b) { return strcmp(LogTag::name(a), LogTag::name(b)); } diff -r c200b4700aeb -r 580bb0b85f63 src/hotspot/share/logging/logTag.hpp --- a/src/hotspot/share/logging/logTag.hpp Sat Feb 24 22:42:37 2018 -0800 +++ b/src/hotspot/share/logging/logTag.hpp Mon Feb 26 09:34:20 2018 +0100 @@ -192,6 +192,7 @@ } static LogTag::type from_string(const char *str); + static LogTag::type fuzzy_match(const char *tag); static void list_tags(outputStream* out); private: diff -r c200b4700aeb -r 580bb0b85f63 src/hotspot/share/runtime/globals.cpp --- a/src/hotspot/share/runtime/globals.cpp Sat Feb 24 22:42:37 2018 -0800 +++ b/src/hotspot/share/runtime/globals.cpp Mon Feb 26 09:34:20 2018 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -38,6 +38,7 @@ #include "utilities/defaultStream.hpp" #include "utilities/macros.hpp" #include "utilities/ostream.hpp" +#include "utilities/stringUtils.hpp" #if INCLUDE_ALL_GCS #include "gc/g1/g1_globals.hpp" #endif // INCLUDE_ALL_GCS @@ -880,25 +881,6 @@ return _name_len; } -// Compute string similarity based on Dice's coefficient -static float str_similar(const char* str1, const char* str2, size_t len2) { - int len1 = (int) strlen(str1); - int total = len1 + (int) len2; - - int hit = 0; - - for (int i = 0; i < len1 -1; ++i) { - for (int j = 0; j < (int) len2 -1; ++j) { - if ((str1[i] == str2[j]) && (str1[i+1] == str2[j+1])) { - ++hit; - break; - } - } - } - - return 2.0f * (float) hit / (float) total; -} - Flag* Flag::fuzzy_match(const char* name, size_t length, bool allow_locked) { float VMOptionsFuzzyMatchSimilarity = 0.7f; Flag* match = NULL; @@ -906,7 +888,7 @@ float max_score = -1; for (Flag* current = &flagTable[0]; current->_name != NULL; current++) { - score = str_similar(current->_name, name, length); + score = StringUtils::similarity(current->_name, strlen(current->_name), name, length); if (score > max_score) { max_score = score; match = current; diff -r c200b4700aeb -r 580bb0b85f63 src/hotspot/share/utilities/stringUtils.cpp --- a/src/hotspot/share/utilities/stringUtils.cpp Sat Feb 24 22:42:37 2018 -0800 +++ b/src/hotspot/share/utilities/stringUtils.cpp Mon Feb 26 09:34:20 2018 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -41,3 +41,19 @@ return replace_count; } + +double StringUtils::similarity(const char* str1, size_t len1, const char* str2, size_t len2) { + size_t total = len1 + len2; + + size_t hit = 0; + for (size_t i = 0; i < len1 - 1; i++) { + for (size_t j = 0; j < len2 - 1; j++) { + if ((str1[i] == str2[j]) && (str1[i+1] == str2[j+1])) { + ++hit; + break; + } + } + } + + return 2.0 * (double) hit / (double) total; +} diff -r c200b4700aeb -r 580bb0b85f63 src/hotspot/share/utilities/stringUtils.hpp --- a/src/hotspot/share/utilities/stringUtils.hpp Sat Feb 24 22:42:37 2018 -0800 +++ b/src/hotspot/share/utilities/stringUtils.hpp Mon Feb 26 09:34:20 2018 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -37,6 +37,9 @@ // // Returns the count of substrings that have been replaced. static int replace_no_expand(char* string, const char* from, const char* to); + + // Compute string similarity based on Dice's coefficient + static double similarity(const char* str1, size_t len1, const char* str2, size_t len2); }; #endif // SHARE_VM_UTILITIES_STRINGUTILS_HPP diff -r c200b4700aeb -r 580bb0b85f63 test/hotspot/gtest/logging/test_logLevel.cpp --- a/test/hotspot/gtest/logging/test_logLevel.cpp Sat Feb 24 22:42:37 2018 -0800 +++ b/test/hotspot/gtest/logging/test_logLevel.cpp Mon Feb 26 09:34:20 2018 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -45,6 +45,18 @@ EXPECT_EQ(LogLevel::Invalid, LogLevel::from_string("infodebugwarning")); } +TEST(LogLevel, fuzzy_match) { + for (size_t i = 1; i < LogLevel::Count; i++) { + LogLevelType level = static_cast(i); + ASSERT_EQ(level, LogLevel::fuzzy_match(LogLevel::name(level))); + } + + ASSERT_EQ(LogLevel::Warning, LogLevel::fuzzy_match("warn")); + ASSERT_EQ(LogLevel::Error, LogLevel::fuzzy_match("err")); + + ASSERT_EQ(LogLevel::Invalid, LogLevel::fuzzy_match("unknown")); +} + TEST(LogLevel, name) { // Use names from macro as reference #define LOG_LEVEL(lname, lstring) \ diff -r c200b4700aeb -r 580bb0b85f63 test/hotspot/gtest/logging/test_logTag.cpp --- a/test/hotspot/gtest/logging/test_logTag.cpp Sat Feb 24 22:42:37 2018 -0800 +++ b/test/hotspot/gtest/logging/test_logTag.cpp Mon Feb 26 09:34:20 2018 +0100 @@ -45,6 +45,18 @@ } } +TEST(LogTag, fuzzy_match) { + for (size_t i = 1; i < LogTag::Count; i++) { + LogTagType tag = static_cast(i); + EXPECT_EQ(tag, LogTag::fuzzy_match(LogTag::name(tag))); + } + + EXPECT_EQ(LogTag::_logging, LogTag::fuzzy_match("loggin")); + EXPECT_EQ(LogTag::_logging, LogTag::fuzzy_match("loging")); + + EXPECT_EQ(LogTag::__NO_TAG, LogTag::fuzzy_match("unrecognizabletag")); +} + TEST(LogTag, name) { // Verify for each tag from the macro #define LOG_TAG(tag) \ diff -r c200b4700aeb -r 580bb0b85f63 test/hotspot/gtest/utilities/test_stringUtils.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/hotspot/gtest/utilities/test_stringUtils.cpp Mon Feb 26 09:34:20 2018 +0100 @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "utilities/stringUtils.hpp" +#include "unittest.hpp" + +TEST(StringUtils, similarity) { + const char* str1 = "the quick brown fox jumps over the lazy dog"; + const char* str2 = "the quick brown fox jumps over the lazy doh"; + EXPECT_NEAR(0.95349, StringUtils::similarity(str1, strlen(str1), str2, strlen(str2)), 1e-5); +}