8232168: Fix non wide char canonicalization on Windows
authorrschmelter
Tue, 15 Oct 2019 17:08:09 +0200
changeset 58878 452df727bebb
parent 58877 aec7bf35d6f5
child 58879 717ebfbac29d
8232168: Fix non wide char canonicalization on Windows Reviewed-by: clanger, alanb, ccheung
src/java.base/windows/native/libjava/canonicalize_md.c
test/hotspot/jtreg/runtime/LoadClass/LongBCP.java
--- a/src/java.base/windows/native/libjava/canonicalize_md.c	Thu Oct 31 16:54:16 2019 -0700
+++ b/src/java.base/windows/native/libjava/canonicalize_md.c	Tue Oct 15 17:08:09 2019 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,39 +41,9 @@
 
 #undef DEBUG_PATH        /* Define this to debug path code */
 
-#define isfilesep(c) ((c) == '/' || (c) == '\\')
-#define wisfilesep(c) ((c) == L'/' || (c) == L'\\')
-#define islb(c)      (IsDBCSLeadByte((BYTE)(c)))
-
-
 /* Copy bytes to dst, not going past dend; return dst + number of bytes copied,
    or NULL if dend would have been exceeded.  If first != '\0', copy that byte
    before copying bytes from src to send - 1. */
-
-static char *
-cp(char *dst, char *dend, char first, char *src, char *send)
-{
-    char *p = src, *q = dst;
-    if (first != '\0') {
-        if (q < dend) {
-            *q++ = first;
-        } else {
-            errno = ENAMETOOLONG;
-            return NULL;
-        }
-    }
-    if (send - p > dend - q) {
-        errno = ENAMETOOLONG;
-        return NULL;
-    }
-    while (p < send) {
-        *q++ = *p++;
-    }
-    return q;
-}
-
-/* Wide character version of cp */
-
 static WCHAR*
 wcp(WCHAR *dst, WCHAR *dend, WCHAR first, WCHAR *src, WCHAR *send)
 {
@@ -95,23 +65,8 @@
     return q;
 }
 
-
 /* Find first instance of '\\' at or following start.  Return the address of
    that byte or the address of the null terminator if '\\' is not found. */
-
-static char *
-nextsep(char *start)
-{
-    char *p = start;
-    int c;
-    while ((c = *p) && (c != '\\')) {
-        p += ((islb(c) && p[1]) ? 2 : 1);
-    }
-    return p;
-}
-
-/* Wide character version of nextsep */
-
 static WCHAR *
 wnextsep(WCHAR *start)
 {
@@ -123,21 +78,6 @@
 }
 
 /* Tell whether the given string contains any wildcard characters */
-
-static int
-wild(char *start)
-{
-    char *p = start;
-    int c;
-    while (c = *p) {
-        if ((c == '*') || (c == '?')) return 1;
-        p += ((islb(c) && p[1]) ? 2 : 1);
-    }
-    return 0;
-}
-
-/* Wide character version of wild */
-
 static int
 wwild(WCHAR *start)
 {
@@ -157,25 +97,6 @@
    Prohibited canonical paths: c:\..\x  c:\x.\d c:\...
 */
 static int
-dots(char *start)
-{
-    char *p = start;
-    while (*p) {
-        if ((p = strchr(p, '.')) == NULL) // find next occurrence of '.'
-            return 0; // no more dots
-        p++; // next char
-        while ((*p) == '.') // go to the end of dots
-            p++;
-        if (*p && (*p != '\\')) // path element does not end with a dot
-            p++; // go to the next char
-        else
-            return 1; // path element does end with a dot - prohibited
-    }
-    return 0; // no prohibited combinations of dots found
-}
-
-/* Wide character version of dots */
-static int
 wdots(WCHAR *start)
 {
     WCHAR *p = start;
@@ -203,7 +124,6 @@
    successfully after copying the rest of the original path to the result path.
    Other I/O errors cause an error return.
 */
-
 int
 lastErrorReportable()
 {
@@ -225,222 +145,10 @@
     return 1;
 }
 
-int wcanonicalize(WCHAR *orig_path, WCHAR *result, int size);
-
 /* Convert a pathname to canonical form.  The input orig_path is assumed to
    have been converted to native form already, via JVM_NativePath().  This is
    necessary because _fullpath() rejects duplicate separator characters on
    Win95, though it accepts them on NT. */
-
-int
-canonicalize(char *orig_path, char *result, int size)
-{
-    WIN32_FIND_DATA fd;
-    HANDLE h;
-    char path[1024];    /* Working copy of path */
-    char *src, *dst, *dend;
-    wchar_t *worig_path, *wresult;
-    size_t converted_chars = 0;
-
-    /* handle long path with length >= MAX_PATH */
-    if (strlen(orig_path) >= MAX_PATH) {
-        if ((worig_path = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL)
-            return -1;
-
-        if (mbstowcs_s(&converted_chars, worig_path, (size_t)size, orig_path, (size_t)(size - 1)) != 0) {
-            free(worig_path);
-            return -1;
-        }
-
-        if ((wresult = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL)
-            return -1;
-
-        if (wcanonicalize(worig_path, wresult, size) != 0) {
-            free(worig_path);
-            free(wresult);
-            return -1;
-        }
-
-        if (wcstombs_s(&converted_chars, result, (size_t)size, wresult, (size_t)(size - 1)) != 0) {
-            free(worig_path);
-            free(wresult);
-            return -1;
-        }
-
-        free(worig_path);
-        free(wresult);
-        return 0;
-    }
-
-    /* Reject paths that contain wildcards */
-    if (wild(orig_path)) {
-        errno = EINVAL;
-        return -1;
-    }
-
-    /* Collapse instances of "foo\.." and ensure absoluteness.  Note that
-      contrary to the documentation, the _fullpath procedure does not require
-      the drive to be available.  It also does not reliably change all
-      occurrences of '/' to '\\' on Win95, so now JVM_NativePath does that. */
-    if (!_fullpath(path, orig_path, sizeof(path))) {
-        return -1;
-    }
-
-    /* Correction for Win95: _fullpath may leave a trailing "\\"
-      on a UNC pathname */
-    if ((path[0] == '\\') && (path[1] == '\\')) {
-        char *p = path + strlen(path);
-        if ((p[-1] == '\\') && !islb(p[-2])) {
-            p[-1] = '\0';
-        }
-    }
-
-    if (dots(path)) /* Check for prohibited combinations of dots */
-        return -1;
-
-    src = path;            /* Start scanning here */
-    dst = result;        /* Place results here */
-    dend = dst + size;        /* Don't go to or past here */
-
-    /* Copy prefix, assuming path is absolute */
-    if (isalpha(src[0]) && (src[1] == ':') && (src[2] == '\\')) {
-        /* Drive specifier */
-        *src = toupper(*src);    /* Canonicalize drive letter */
-        if (!(dst = cp(dst, dend, '\0', src, src + 2))) {
-            return -1;
-        }
-        src += 2;
-    } else if ((src[0] == '\\') && (src[1] == '\\')) {
-        /* UNC pathname */
-        char *p;
-        p = nextsep(src + 2);    /* Skip past host name */
-        if (!*p) {
-            /* A UNC pathname must begin with "\\\\host\\share",
-            so reject this path as invalid if there is no share name */
-            errno = EINVAL;
-            return -1;
-        }
-        p = nextsep(p + 1);    /* Skip past share name */
-        if (!(dst = cp(dst, dend, '\0', src, p))) {
-            return -1;
-        }
-        src = p;
-    } else {
-        /* Invalid path */
-        errno = EINVAL;
-        return -1;
-    }
-
-    /* Windows 95/98/Me bug - FindFirstFile fails on network mounted drives */
-    /* for root pathes like "E:\" . If the path has this form, we should  */
-    /* simply return it, it is already canonicalized. */
-    if (strlen(path) == 3 && path[1] == ':' && path[2] == '\\') {
-        /* At this point we have already copied the drive specifier ("z:")*/
-        /* so we need to copy "\" and the null character. */
-        result[2] = '\\';
-        result[3] = '\0';
-        return 0;
-    }
-
-    /* At this point we have copied either a drive specifier ("z:") or a UNC
-    prefix ("\\\\host\\share") to the result buffer, and src points to the
-    first byte of the remainder of the path.  We now scan through the rest
-    of the path, looking up each prefix in order to find the true name of
-    the last element of each prefix, thereby computing the full true name of
-    the original path. */
-    while (*src) {
-        char *p = nextsep(src + 1);    /* Find next separator */
-        char c = *p;
-        assert(*src == '\\');        /* Invariant */
-        *p = '\0';            /* Temporarily clear separator */
-        h = FindFirstFile(path, &fd);    /* Look up prefix */
-        *p = c;                /* Restore separator */
-        if (h != INVALID_HANDLE_VALUE) {
-            /* Lookup succeeded; append true name to result and continue */
-            FindClose(h);
-            if (!(dst = cp(dst, dend, '\\',
-                fd.cFileName,
-                fd.cFileName + strlen(fd.cFileName)))) {
-                return -1;
-            }
-            src = p;
-            continue;
-        } else {
-            if (!lastErrorReportable()) {
-                if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
-                    return -1;
-                }
-                break;
-            } else {
-                return -1;
-            }
-        }
-    }
-
-    if (dst >= dend) {
-        errno = ENAMETOOLONG;
-        return -1;
-    }
-    *dst = '\0';
-    return 0;
-
-}
-
-
-/* Convert a pathname to canonical form.  The input prefix is assumed
-   to be in canonical form already, and the trailing filename must not
-   contain any wildcard, dot/double dot, or other "tricky" characters
-   that are rejected by the canonicalize() routine above.  This
-   routine is present to allow the canonicalization prefix cache to be
-   used while still returning canonical names with the correct
-   capitalization. */
-
-int
-canonicalizeWithPrefix(char* canonicalPrefix, char* pathWithCanonicalPrefix, char *result, int size)
-{
-    WIN32_FIND_DATA fd;
-    HANDLE h;
-    char *src, *dst, *dend;
-
-    src = pathWithCanonicalPrefix;
-    dst = result;        /* Place results here */
-    dend = dst + size;   /* Don't go to or past here */
-
-    h = FindFirstFile(pathWithCanonicalPrefix, &fd);    /* Look up file */
-    if (h != INVALID_HANDLE_VALUE) {
-        /* Lookup succeeded; concatenate true name to prefix */
-        FindClose(h);
-        if (!(dst = cp(dst, dend, '\0',
-                       canonicalPrefix,
-                       canonicalPrefix + strlen(canonicalPrefix)))) {
-            return -1;
-        }
-        if (!(dst = cp(dst, dend, '\\',
-                       fd.cFileName,
-                       fd.cFileName + strlen(fd.cFileName)))) {
-            return -1;
-        }
-    } else {
-        if (!lastErrorReportable()) {
-            if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
-                return -1;
-            }
-        } else {
-            return -1;
-        }
-    }
-
-    if (dst >= dend) {
-        errno = ENAMETOOLONG;
-        return -1;
-    }
-    *dst = '\0';
-    return 0;
-}
-
-
-/* Wide character version of canonicalize. Size is a wide-character size. */
-
 int
 wcanonicalize(WCHAR *orig_path, WCHAR *result, int size)
 {
@@ -559,9 +267,13 @@
     return -1;
 }
 
-
-/* Wide character version of canonicalizeWithPrefix. */
-
+/* Convert a pathname to canonical form.  The input prefix is assumed
+   to be in canonical form already, and the trailing filename must not
+   contain any wildcard, dot/double dot, or other "tricky" characters
+   that are rejected by the canonicalize() routine above.  This
+   routine is present to allow the canonicalization prefix cache to be
+   used while still returning canonical names with the correct
+   capitalization. */
 int
 wcanonicalizeWithPrefix(WCHAR *canonicalPrefix, WCHAR *pathWithCanonicalPrefix, WCHAR *result, int size)
 {
@@ -613,6 +325,46 @@
     return 0;
 }
 
+/* Non-Wide character version of canonicalize.
+   Converts to whchar and delegates to wcanonicalize. */
+int
+canonicalize(char* orig_path, char* result, int size) {
+    wchar_t* wpath = NULL;
+    wchar_t* wresult = NULL;
+    size_t conv;
+    size_t path_len = strlen(orig_path);
+    int ret = -1;
+
+    if ((wpath = (wchar_t*) malloc(sizeof(wchar_t) * (path_len + 1))) == NULL) {
+        goto finish;
+    }
+
+    if (mbstowcs_s(&conv, wpath, path_len + 1, orig_path, path_len) != 0) {
+        goto finish;
+    }
+
+    if ((wresult = (wchar_t*) malloc(sizeof(wchar_t) * size)) == NULL) {
+        goto finish;
+    }
+
+    if (wcanonicalize(wpath, wresult, size) != 0) {
+        goto finish;
+    }
+
+    if (wcstombs_s(&conv, result, (size_t) size, wresult, (size_t) (size - 1)) != 0) {
+        goto finish;
+    }
+
+    // Change return value to success.
+    ret = 0;
+
+finish:
+    free(wresult);
+    free(wpath);
+
+    return ret;
+}
+
 
 /* The appropriate location of getPrefixed() should be io_util_md.c, but
    java.lang.instrument package has hardwired canonicalize_md.c into their
--- a/test/hotspot/jtreg/runtime/LoadClass/LongBCP.java	Thu Oct 31 16:54:16 2019 -0700
+++ b/test/hotspot/jtreg/runtime/LoadClass/LongBCP.java	Tue Oct 15 17:08:09 2019 +0200
@@ -102,8 +102,6 @@
               .shouldHaveExitValue(0);
 
         // relative path tests
-        // We currently cannot handle relative path specified in the
-        // -Xbootclasspath/a on windows.
         //
         // relative path length within the file system limit
         int fn_max_length = 255;
@@ -128,6 +126,22 @@
         output.shouldContain("Hello World")
               .shouldHaveExitValue(0);
 
+        // Test a relative path for a jar file < MAX_PATH, but where the
+        // absolute path is > MAX_PATH.
+        Path jarDir = Paths.get(".");
+        for (int i = 0; i < 21; ++i) {
+            jarDir = jarDir.resolve("0123456789");
+        }
+        Files.createDirectories(jarDir);
+        Path jarPath = jarDir.resolve("hello.jar");
+        Files.copy(Paths.get(helloJar), jarPath);
+        bootCP = "-Xbootclasspath/a:" + jarPath.toString();
+        pb = ProcessTools.createJavaProcessBuilder(bootCP, "Hello");
+
+        output = new OutputAnalyzer(pb.start());
+        output.shouldContain("Hello World")
+              .shouldHaveExitValue(0);
+
         // total relative path length exceeds MAX_PATH
         destDir = Paths.get(destDir.toString(), "yyyyyyyy");