src/java.base/windows/native/libjava/canonicalize_md.c
changeset 58878 452df727bebb
parent 51750 9151fde080e6
equal deleted inserted replaced
58877:aec7bf35d6f5 58878:452df727bebb
     1 /*
     1 /*
     2  * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
     2  * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     7  * published by the Free Software Foundation.  Oracle designates this
    39 #include <errno.h>
    39 #include <errno.h>
    40 #include "io_util_md.h"
    40 #include "io_util_md.h"
    41 
    41 
    42 #undef DEBUG_PATH        /* Define this to debug path code */
    42 #undef DEBUG_PATH        /* Define this to debug path code */
    43 
    43 
    44 #define isfilesep(c) ((c) == '/' || (c) == '\\')
       
    45 #define wisfilesep(c) ((c) == L'/' || (c) == L'\\')
       
    46 #define islb(c)      (IsDBCSLeadByte((BYTE)(c)))
       
    47 
       
    48 
       
    49 /* Copy bytes to dst, not going past dend; return dst + number of bytes copied,
    44 /* Copy bytes to dst, not going past dend; return dst + number of bytes copied,
    50    or NULL if dend would have been exceeded.  If first != '\0', copy that byte
    45    or NULL if dend would have been exceeded.  If first != '\0', copy that byte
    51    before copying bytes from src to send - 1. */
    46    before copying bytes from src to send - 1. */
    52 
       
    53 static char *
       
    54 cp(char *dst, char *dend, char first, char *src, char *send)
       
    55 {
       
    56     char *p = src, *q = dst;
       
    57     if (first != '\0') {
       
    58         if (q < dend) {
       
    59             *q++ = first;
       
    60         } else {
       
    61             errno = ENAMETOOLONG;
       
    62             return NULL;
       
    63         }
       
    64     }
       
    65     if (send - p > dend - q) {
       
    66         errno = ENAMETOOLONG;
       
    67         return NULL;
       
    68     }
       
    69     while (p < send) {
       
    70         *q++ = *p++;
       
    71     }
       
    72     return q;
       
    73 }
       
    74 
       
    75 /* Wide character version of cp */
       
    76 
       
    77 static WCHAR*
    47 static WCHAR*
    78 wcp(WCHAR *dst, WCHAR *dend, WCHAR first, WCHAR *src, WCHAR *send)
    48 wcp(WCHAR *dst, WCHAR *dend, WCHAR first, WCHAR *src, WCHAR *send)
    79 {
    49 {
    80     WCHAR *p = src, *q = dst;
    50     WCHAR *p = src, *q = dst;
    81     if (first != L'\0') {
    51     if (first != L'\0') {
    93     while (p < send)
    63     while (p < send)
    94         *q++ = *p++;
    64         *q++ = *p++;
    95     return q;
    65     return q;
    96 }
    66 }
    97 
    67 
    98 
       
    99 /* Find first instance of '\\' at or following start.  Return the address of
    68 /* Find first instance of '\\' at or following start.  Return the address of
   100    that byte or the address of the null terminator if '\\' is not found. */
    69    that byte or the address of the null terminator if '\\' is not found. */
   101 
       
   102 static char *
       
   103 nextsep(char *start)
       
   104 {
       
   105     char *p = start;
       
   106     int c;
       
   107     while ((c = *p) && (c != '\\')) {
       
   108         p += ((islb(c) && p[1]) ? 2 : 1);
       
   109     }
       
   110     return p;
       
   111 }
       
   112 
       
   113 /* Wide character version of nextsep */
       
   114 
       
   115 static WCHAR *
    70 static WCHAR *
   116 wnextsep(WCHAR *start)
    71 wnextsep(WCHAR *start)
   117 {
    72 {
   118     WCHAR *p = start;
    73     WCHAR *p = start;
   119     int c;
    74     int c;
   121         p++;
    76         p++;
   122     return p;
    77     return p;
   123 }
    78 }
   124 
    79 
   125 /* Tell whether the given string contains any wildcard characters */
    80 /* Tell whether the given string contains any wildcard characters */
   126 
       
   127 static int
       
   128 wild(char *start)
       
   129 {
       
   130     char *p = start;
       
   131     int c;
       
   132     while (c = *p) {
       
   133         if ((c == '*') || (c == '?')) return 1;
       
   134         p += ((islb(c) && p[1]) ? 2 : 1);
       
   135     }
       
   136     return 0;
       
   137 }
       
   138 
       
   139 /* Wide character version of wild */
       
   140 
       
   141 static int
    81 static int
   142 wwild(WCHAR *start)
    82 wwild(WCHAR *start)
   143 {
    83 {
   144     WCHAR *p = start;
    84     WCHAR *p = start;
   145     int c;
    85     int c;
   154 /* Tell whether the given string contains prohibited combinations of dots.
    94 /* Tell whether the given string contains prohibited combinations of dots.
   155    In the canonicalized form no path element may have dots at its end.
    95    In the canonicalized form no path element may have dots at its end.
   156    Allowed canonical paths: c:\xa...dksd\..ksa\.lk    c:\...a\.b\cd..x.x
    96    Allowed canonical paths: c:\xa...dksd\..ksa\.lk    c:\...a\.b\cd..x.x
   157    Prohibited canonical paths: c:\..\x  c:\x.\d c:\...
    97    Prohibited canonical paths: c:\..\x  c:\x.\d c:\...
   158 */
    98 */
   159 static int
       
   160 dots(char *start)
       
   161 {
       
   162     char *p = start;
       
   163     while (*p) {
       
   164         if ((p = strchr(p, '.')) == NULL) // find next occurrence of '.'
       
   165             return 0; // no more dots
       
   166         p++; // next char
       
   167         while ((*p) == '.') // go to the end of dots
       
   168             p++;
       
   169         if (*p && (*p != '\\')) // path element does not end with a dot
       
   170             p++; // go to the next char
       
   171         else
       
   172             return 1; // path element does end with a dot - prohibited
       
   173     }
       
   174     return 0; // no prohibited combinations of dots found
       
   175 }
       
   176 
       
   177 /* Wide character version of dots */
       
   178 static int
    99 static int
   179 wdots(WCHAR *start)
   100 wdots(WCHAR *start)
   180 {
   101 {
   181     WCHAR *p = start;
   102     WCHAR *p = start;
   182     // Skip "\\.\" prefix
   103     // Skip "\\.\" prefix
   201    because it is of the wrong type, because access is denied, or because the
   122    because it is of the wrong type, because access is denied, or because the
   202    network is unreachable then canonicalization does not fail, it terminates
   123    network is unreachable then canonicalization does not fail, it terminates
   203    successfully after copying the rest of the original path to the result path.
   124    successfully after copying the rest of the original path to the result path.
   204    Other I/O errors cause an error return.
   125    Other I/O errors cause an error return.
   205 */
   126 */
   206 
       
   207 int
   127 int
   208 lastErrorReportable()
   128 lastErrorReportable()
   209 {
   129 {
   210     DWORD errval = GetLastError();
   130     DWORD errval = GetLastError();
   211     if ((errval == ERROR_FILE_NOT_FOUND)
   131     if ((errval == ERROR_FILE_NOT_FOUND)
   223     jio_fprintf(stderr, "canonicalize: errval %d\n", errval);
   143     jio_fprintf(stderr, "canonicalize: errval %d\n", errval);
   224 #endif
   144 #endif
   225     return 1;
   145     return 1;
   226 }
   146 }
   227 
   147 
   228 int wcanonicalize(WCHAR *orig_path, WCHAR *result, int size);
       
   229 
       
   230 /* Convert a pathname to canonical form.  The input orig_path is assumed to
   148 /* Convert a pathname to canonical form.  The input orig_path is assumed to
   231    have been converted to native form already, via JVM_NativePath().  This is
   149    have been converted to native form already, via JVM_NativePath().  This is
   232    necessary because _fullpath() rejects duplicate separator characters on
   150    necessary because _fullpath() rejects duplicate separator characters on
   233    Win95, though it accepts them on NT. */
   151    Win95, though it accepts them on NT. */
   234 
       
   235 int
       
   236 canonicalize(char *orig_path, char *result, int size)
       
   237 {
       
   238     WIN32_FIND_DATA fd;
       
   239     HANDLE h;
       
   240     char path[1024];    /* Working copy of path */
       
   241     char *src, *dst, *dend;
       
   242     wchar_t *worig_path, *wresult;
       
   243     size_t converted_chars = 0;
       
   244 
       
   245     /* handle long path with length >= MAX_PATH */
       
   246     if (strlen(orig_path) >= MAX_PATH) {
       
   247         if ((worig_path = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL)
       
   248             return -1;
       
   249 
       
   250         if (mbstowcs_s(&converted_chars, worig_path, (size_t)size, orig_path, (size_t)(size - 1)) != 0) {
       
   251             free(worig_path);
       
   252             return -1;
       
   253         }
       
   254 
       
   255         if ((wresult = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL)
       
   256             return -1;
       
   257 
       
   258         if (wcanonicalize(worig_path, wresult, size) != 0) {
       
   259             free(worig_path);
       
   260             free(wresult);
       
   261             return -1;
       
   262         }
       
   263 
       
   264         if (wcstombs_s(&converted_chars, result, (size_t)size, wresult, (size_t)(size - 1)) != 0) {
       
   265             free(worig_path);
       
   266             free(wresult);
       
   267             return -1;
       
   268         }
       
   269 
       
   270         free(worig_path);
       
   271         free(wresult);
       
   272         return 0;
       
   273     }
       
   274 
       
   275     /* Reject paths that contain wildcards */
       
   276     if (wild(orig_path)) {
       
   277         errno = EINVAL;
       
   278         return -1;
       
   279     }
       
   280 
       
   281     /* Collapse instances of "foo\.." and ensure absoluteness.  Note that
       
   282       contrary to the documentation, the _fullpath procedure does not require
       
   283       the drive to be available.  It also does not reliably change all
       
   284       occurrences of '/' to '\\' on Win95, so now JVM_NativePath does that. */
       
   285     if (!_fullpath(path, orig_path, sizeof(path))) {
       
   286         return -1;
       
   287     }
       
   288 
       
   289     /* Correction for Win95: _fullpath may leave a trailing "\\"
       
   290       on a UNC pathname */
       
   291     if ((path[0] == '\\') && (path[1] == '\\')) {
       
   292         char *p = path + strlen(path);
       
   293         if ((p[-1] == '\\') && !islb(p[-2])) {
       
   294             p[-1] = '\0';
       
   295         }
       
   296     }
       
   297 
       
   298     if (dots(path)) /* Check for prohibited combinations of dots */
       
   299         return -1;
       
   300 
       
   301     src = path;            /* Start scanning here */
       
   302     dst = result;        /* Place results here */
       
   303     dend = dst + size;        /* Don't go to or past here */
       
   304 
       
   305     /* Copy prefix, assuming path is absolute */
       
   306     if (isalpha(src[0]) && (src[1] == ':') && (src[2] == '\\')) {
       
   307         /* Drive specifier */
       
   308         *src = toupper(*src);    /* Canonicalize drive letter */
       
   309         if (!(dst = cp(dst, dend, '\0', src, src + 2))) {
       
   310             return -1;
       
   311         }
       
   312         src += 2;
       
   313     } else if ((src[0] == '\\') && (src[1] == '\\')) {
       
   314         /* UNC pathname */
       
   315         char *p;
       
   316         p = nextsep(src + 2);    /* Skip past host name */
       
   317         if (!*p) {
       
   318             /* A UNC pathname must begin with "\\\\host\\share",
       
   319             so reject this path as invalid if there is no share name */
       
   320             errno = EINVAL;
       
   321             return -1;
       
   322         }
       
   323         p = nextsep(p + 1);    /* Skip past share name */
       
   324         if (!(dst = cp(dst, dend, '\0', src, p))) {
       
   325             return -1;
       
   326         }
       
   327         src = p;
       
   328     } else {
       
   329         /* Invalid path */
       
   330         errno = EINVAL;
       
   331         return -1;
       
   332     }
       
   333 
       
   334     /* Windows 95/98/Me bug - FindFirstFile fails on network mounted drives */
       
   335     /* for root pathes like "E:\" . If the path has this form, we should  */
       
   336     /* simply return it, it is already canonicalized. */
       
   337     if (strlen(path) == 3 && path[1] == ':' && path[2] == '\\') {
       
   338         /* At this point we have already copied the drive specifier ("z:")*/
       
   339         /* so we need to copy "\" and the null character. */
       
   340         result[2] = '\\';
       
   341         result[3] = '\0';
       
   342         return 0;
       
   343     }
       
   344 
       
   345     /* At this point we have copied either a drive specifier ("z:") or a UNC
       
   346     prefix ("\\\\host\\share") to the result buffer, and src points to the
       
   347     first byte of the remainder of the path.  We now scan through the rest
       
   348     of the path, looking up each prefix in order to find the true name of
       
   349     the last element of each prefix, thereby computing the full true name of
       
   350     the original path. */
       
   351     while (*src) {
       
   352         char *p = nextsep(src + 1);    /* Find next separator */
       
   353         char c = *p;
       
   354         assert(*src == '\\');        /* Invariant */
       
   355         *p = '\0';            /* Temporarily clear separator */
       
   356         h = FindFirstFile(path, &fd);    /* Look up prefix */
       
   357         *p = c;                /* Restore separator */
       
   358         if (h != INVALID_HANDLE_VALUE) {
       
   359             /* Lookup succeeded; append true name to result and continue */
       
   360             FindClose(h);
       
   361             if (!(dst = cp(dst, dend, '\\',
       
   362                 fd.cFileName,
       
   363                 fd.cFileName + strlen(fd.cFileName)))) {
       
   364                 return -1;
       
   365             }
       
   366             src = p;
       
   367             continue;
       
   368         } else {
       
   369             if (!lastErrorReportable()) {
       
   370                 if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
       
   371                     return -1;
       
   372                 }
       
   373                 break;
       
   374             } else {
       
   375                 return -1;
       
   376             }
       
   377         }
       
   378     }
       
   379 
       
   380     if (dst >= dend) {
       
   381         errno = ENAMETOOLONG;
       
   382         return -1;
       
   383     }
       
   384     *dst = '\0';
       
   385     return 0;
       
   386 
       
   387 }
       
   388 
       
   389 
       
   390 /* Convert a pathname to canonical form.  The input prefix is assumed
       
   391    to be in canonical form already, and the trailing filename must not
       
   392    contain any wildcard, dot/double dot, or other "tricky" characters
       
   393    that are rejected by the canonicalize() routine above.  This
       
   394    routine is present to allow the canonicalization prefix cache to be
       
   395    used while still returning canonical names with the correct
       
   396    capitalization. */
       
   397 
       
   398 int
       
   399 canonicalizeWithPrefix(char* canonicalPrefix, char* pathWithCanonicalPrefix, char *result, int size)
       
   400 {
       
   401     WIN32_FIND_DATA fd;
       
   402     HANDLE h;
       
   403     char *src, *dst, *dend;
       
   404 
       
   405     src = pathWithCanonicalPrefix;
       
   406     dst = result;        /* Place results here */
       
   407     dend = dst + size;   /* Don't go to or past here */
       
   408 
       
   409     h = FindFirstFile(pathWithCanonicalPrefix, &fd);    /* Look up file */
       
   410     if (h != INVALID_HANDLE_VALUE) {
       
   411         /* Lookup succeeded; concatenate true name to prefix */
       
   412         FindClose(h);
       
   413         if (!(dst = cp(dst, dend, '\0',
       
   414                        canonicalPrefix,
       
   415                        canonicalPrefix + strlen(canonicalPrefix)))) {
       
   416             return -1;
       
   417         }
       
   418         if (!(dst = cp(dst, dend, '\\',
       
   419                        fd.cFileName,
       
   420                        fd.cFileName + strlen(fd.cFileName)))) {
       
   421             return -1;
       
   422         }
       
   423     } else {
       
   424         if (!lastErrorReportable()) {
       
   425             if (!(dst = cp(dst, dend, '\0', src, src + strlen(src)))) {
       
   426                 return -1;
       
   427             }
       
   428         } else {
       
   429             return -1;
       
   430         }
       
   431     }
       
   432 
       
   433     if (dst >= dend) {
       
   434         errno = ENAMETOOLONG;
       
   435         return -1;
       
   436     }
       
   437     *dst = '\0';
       
   438     return 0;
       
   439 }
       
   440 
       
   441 
       
   442 /* Wide character version of canonicalize. Size is a wide-character size. */
       
   443 
       
   444 int
   152 int
   445 wcanonicalize(WCHAR *orig_path, WCHAR *result, int size)
   153 wcanonicalize(WCHAR *orig_path, WCHAR *result, int size)
   446 {
   154 {
   447     WIN32_FIND_DATAW fd;
   155     WIN32_FIND_DATAW fd;
   448     HANDLE h;
   156     HANDLE h;
   557  err:
   265  err:
   558     free(path);
   266     free(path);
   559     return -1;
   267     return -1;
   560 }
   268 }
   561 
   269 
   562 
   270 /* Convert a pathname to canonical form.  The input prefix is assumed
   563 /* Wide character version of canonicalizeWithPrefix. */
   271    to be in canonical form already, and the trailing filename must not
   564 
   272    contain any wildcard, dot/double dot, or other "tricky" characters
       
   273    that are rejected by the canonicalize() routine above.  This
       
   274    routine is present to allow the canonicalization prefix cache to be
       
   275    used while still returning canonical names with the correct
       
   276    capitalization. */
   565 int
   277 int
   566 wcanonicalizeWithPrefix(WCHAR *canonicalPrefix, WCHAR *pathWithCanonicalPrefix, WCHAR *result, int size)
   278 wcanonicalizeWithPrefix(WCHAR *canonicalPrefix, WCHAR *pathWithCanonicalPrefix, WCHAR *result, int size)
   567 {
   279 {
   568     WIN32_FIND_DATAW fd;
   280     WIN32_FIND_DATAW fd;
   569     HANDLE h;
   281     HANDLE h;
   609         errno = ENAMETOOLONG;
   321         errno = ENAMETOOLONG;
   610         return -1;
   322         return -1;
   611     }
   323     }
   612     *dst = L'\0';
   324     *dst = L'\0';
   613     return 0;
   325     return 0;
       
   326 }
       
   327 
       
   328 /* Non-Wide character version of canonicalize.
       
   329    Converts to whchar and delegates to wcanonicalize. */
       
   330 int
       
   331 canonicalize(char* orig_path, char* result, int size) {
       
   332     wchar_t* wpath = NULL;
       
   333     wchar_t* wresult = NULL;
       
   334     size_t conv;
       
   335     size_t path_len = strlen(orig_path);
       
   336     int ret = -1;
       
   337 
       
   338     if ((wpath = (wchar_t*) malloc(sizeof(wchar_t) * (path_len + 1))) == NULL) {
       
   339         goto finish;
       
   340     }
       
   341 
       
   342     if (mbstowcs_s(&conv, wpath, path_len + 1, orig_path, path_len) != 0) {
       
   343         goto finish;
       
   344     }
       
   345 
       
   346     if ((wresult = (wchar_t*) malloc(sizeof(wchar_t) * size)) == NULL) {
       
   347         goto finish;
       
   348     }
       
   349 
       
   350     if (wcanonicalize(wpath, wresult, size) != 0) {
       
   351         goto finish;
       
   352     }
       
   353 
       
   354     if (wcstombs_s(&conv, result, (size_t) size, wresult, (size_t) (size - 1)) != 0) {
       
   355         goto finish;
       
   356     }
       
   357 
       
   358     // Change return value to success.
       
   359     ret = 0;
       
   360 
       
   361 finish:
       
   362     free(wresult);
       
   363     free(wpath);
       
   364 
       
   365     return ret;
   614 }
   366 }
   615 
   367 
   616 
   368 
   617 /* The appropriate location of getPrefixed() should be io_util_md.c, but
   369 /* The appropriate location of getPrefixed() should be io_util_md.c, but
   618    java.lang.instrument package has hardwired canonicalize_md.c into their
   370    java.lang.instrument package has hardwired canonicalize_md.c into their