195 lexError(reader.bp, Errors.IllegalEscChar); |
221 lexError(reader.bp, Errors.IllegalEscChar); |
196 } |
222 } |
197 } |
223 } |
198 } else if (reader.bp != reader.buflen) { |
224 } else if (reader.bp != reader.buflen) { |
199 reader.putChar(true); |
225 reader.putChar(true); |
|
226 } |
|
227 } |
|
228 |
|
229 /** Read next character in character or string literal and copy into sbuf |
|
230 * without translating escapes. Used by text blocks to preflight verify |
|
231 * escapes sequences. |
|
232 */ |
|
233 private void scanLitCharRaw(int pos) { |
|
234 if (reader.ch == '\\') { |
|
235 if (reader.peekChar() == '\\' && !reader.isUnicode()) { |
|
236 reader.skipChar(); |
|
237 reader.putChar('\\', false); |
|
238 reader.putChar('\\', true); |
|
239 } else { |
|
240 reader.putChar('\\', true); |
|
241 switch (reader.ch) { |
|
242 case '0': case '1': case '2': case '3': |
|
243 case '4': case '5': case '6': case '7': |
|
244 char leadch = reader.ch; |
|
245 reader.putChar(true); |
|
246 if ('0' <= reader.ch && reader.ch <= '7') { |
|
247 reader.putChar(true); |
|
248 if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') { |
|
249 reader.putChar(true); |
|
250 } |
|
251 } |
|
252 break; |
|
253 // Effectively list of valid escape sequences. |
|
254 case 'b': |
|
255 case 't': |
|
256 case 'n': |
|
257 case 'f': |
|
258 case 'r': |
|
259 case '\'': |
|
260 case '\"': |
|
261 case '\\': |
|
262 reader.putChar(true); break; |
|
263 default: |
|
264 lexError(reader.bp, Errors.IllegalEscChar); |
|
265 } |
|
266 } |
|
267 } else if (reader.bp != reader.buflen) { |
|
268 reader.putChar(true); |
|
269 } |
|
270 } |
|
271 |
|
272 /** Interim access to String methods used to support text blocks. |
|
273 * Required to handle bootstrapping with pre-text block jdks. |
|
274 * Could be reworked in the 'next' jdk. |
|
275 */ |
|
276 static class TextBlockSupport { |
|
277 /** Reflection method to remove incidental indentation. |
|
278 */ |
|
279 private static final Method stripIndent; |
|
280 |
|
281 /** Reflection method to translate escape sequences. |
|
282 */ |
|
283 private static final Method translateEscapes; |
|
284 |
|
285 /** true if stripIndent and translateEscapes are available in the bootstrap jdk. |
|
286 */ |
|
287 private static final boolean hasSupport; |
|
288 |
|
289 /** Get a string method via refection or null if not available. |
|
290 */ |
|
291 private static Method getStringMethodOrNull(String name) { |
|
292 try { |
|
293 return String.class.getMethod(name); |
|
294 } catch (Exception ex) { |
|
295 // Method not available, return null. |
|
296 } |
|
297 return null; |
|
298 } |
|
299 |
|
300 static { |
|
301 // Get text block string methods. |
|
302 stripIndent = getStringMethodOrNull("stripIndent"); |
|
303 translateEscapes = getStringMethodOrNull("translateEscapes"); |
|
304 // true if stripIndent and translateEscapes are available in the bootstrap jdk. |
|
305 hasSupport = stripIndent != null && translateEscapes != null; |
|
306 } |
|
307 |
|
308 /** Return true if stripIndent and translateEscapes are available in the bootstrap jdk. |
|
309 */ |
|
310 static boolean hasSupport() { |
|
311 return hasSupport; |
|
312 } |
|
313 |
|
314 /** Return the leading whitespace count (indentation) of the line. |
|
315 */ |
|
316 private static int indent(String line) { |
|
317 return line.length() - line.stripLeading().length(); |
|
318 } |
|
319 |
|
320 enum WhitespaceChecks { |
|
321 INCONSISTENT, |
|
322 TRAILING |
|
323 }; |
|
324 |
|
325 /** Check that the use of white space in content is not problematic. |
|
326 */ |
|
327 static Set<WhitespaceChecks> checkWhitespace(String string) { |
|
328 // Start with empty result set. |
|
329 Set<WhitespaceChecks> checks = new HashSet<>(); |
|
330 // No need to check empty strings. |
|
331 if (string.isEmpty()) { |
|
332 return checks; |
|
333 } |
|
334 // Maximum common indentation. |
|
335 int outdent = 0; |
|
336 // No need to check indentation if opting out (last line is empty.) |
|
337 char lastChar = string.charAt(string.length() - 1); |
|
338 boolean optOut = lastChar == '\n' || lastChar == '\r'; |
|
339 // Split string based at line terminators. |
|
340 String[] lines = string.split("\\R"); |
|
341 int length = lines.length; |
|
342 // Extract last line. |
|
343 String lastLine = lines[length - 1]; |
|
344 if (!optOut) { |
|
345 // Prime with the last line indentation (may be blank.) |
|
346 outdent = indent(lastLine); |
|
347 for (String line : lines) { |
|
348 // Blanks lines have no influence (last line accounted for.) |
|
349 if (!line.isBlank()) { |
|
350 outdent = Integer.min(outdent, indent(line)); |
|
351 if (outdent == 0) { |
|
352 break; |
|
353 } |
|
354 } |
|
355 } |
|
356 } |
|
357 // Last line is representative. |
|
358 String start = lastLine.substring(0, outdent); |
|
359 for (String line : lines) { |
|
360 // Fail if a line does not have the same indentation. |
|
361 if (!line.isBlank() && !line.startsWith(start)) { |
|
362 // Mix of different white space |
|
363 checks.add(WhitespaceChecks.INCONSISTENT); |
|
364 } |
|
365 // Line has content even after indent is removed. |
|
366 if (outdent < line.length()) { |
|
367 // Is the last character a white space. |
|
368 lastChar = line.charAt(line.length() - 1); |
|
369 if (Character.isWhitespace(lastChar)) { |
|
370 // Has trailing white space. |
|
371 checks.add(WhitespaceChecks.TRAILING); |
|
372 } |
|
373 } |
|
374 } |
|
375 return checks; |
|
376 } |
|
377 |
|
378 /** Invoke String::stripIndent through reflection. |
|
379 */ |
|
380 static String stripIndent(String string) { |
|
381 try { |
|
382 string = (String)stripIndent.invoke(string); |
|
383 } catch (InvocationTargetException | IllegalAccessException ex) { |
|
384 throw new RuntimeException(ex); |
|
385 } |
|
386 return string; |
|
387 } |
|
388 |
|
389 /** Invoke String::translateEscapes through reflection. |
|
390 */ |
|
391 static String translateEscapes(String string) { |
|
392 try { |
|
393 string = (String)translateEscapes.invoke(string); |
|
394 } catch (InvocationTargetException | IllegalAccessException ex) { |
|
395 throw new RuntimeException(ex); |
|
396 } |
|
397 return string; |
|
398 } |
|
399 } |
|
400 |
|
401 /** Test for EOLN. |
|
402 */ |
|
403 private boolean isEOLN() { |
|
404 return reader.ch == LF || reader.ch == CR; |
|
405 } |
|
406 |
|
407 /** Test for CRLF. |
|
408 */ |
|
409 private boolean isCRLF() { |
|
410 return reader.ch == CR && reader.peekChar() == LF; |
|
411 } |
|
412 |
|
413 /** Count and skip repeated occurances of the specified character. |
|
414 */ |
|
415 private int countChar(char ch, int max) { |
|
416 int count = 0; |
|
417 for ( ; count < max && reader.bp < reader.buflen && reader.ch == ch; count++) { |
|
418 reader.scanChar(); |
|
419 } |
|
420 return count; |
|
421 } |
|
422 |
|
423 /** Scan a string literal or text block. |
|
424 */ |
|
425 private void scanString(int pos) { |
|
426 // Clear flags. |
|
427 shouldStripIndent = false; |
|
428 shouldTranslateEscapes = false; |
|
429 // Check if text block string methods are present. |
|
430 boolean hasTextBlockSupport = TextBlockSupport.hasSupport(); |
|
431 // Track the end of first line for error recovery. |
|
432 int firstEOLN = -1; |
|
433 // Attempt to scan for up to 3 double quotes. |
|
434 int openCount = countChar('\"', 3); |
|
435 switch (openCount) { |
|
436 case 1: // Starting a string literal. |
|
437 break; |
|
438 case 2: // Starting an empty string literal. |
|
439 // Start again but only consume one quote. |
|
440 reader.reset(pos); |
|
441 openCount = countChar('\"', 1); |
|
442 break; |
|
443 case 3: // Starting a text block. |
|
444 // Check if preview feature is enabled for text blocks. |
|
445 checkSourceLevel(pos, Feature.TEXT_BLOCKS); |
|
446 // Only proceed if text block string methods are present. |
|
447 if (hasTextBlockSupport) { |
|
448 // Indicate that the final string should have incidental indentation removed. |
|
449 shouldStripIndent = true; |
|
450 // Verify the open delimiter sequence. |
|
451 boolean hasOpenEOLN = false; |
|
452 while (reader.bp < reader.buflen && Character.isWhitespace(reader.ch)) { |
|
453 hasOpenEOLN = isEOLN(); |
|
454 if (hasOpenEOLN) { |
|
455 break; |
|
456 } |
|
457 reader.scanChar(); |
|
458 } |
|
459 // Error if the open delimiter sequence not is """<Whitespace>*<LineTerminator>. |
|
460 if (!hasOpenEOLN) { |
|
461 lexError(reader.bp, Errors.IllegalTextBlockOpen); |
|
462 return; |
|
463 } |
|
464 // Skip line terminator. |
|
465 int start = reader.bp; |
|
466 if (isCRLF()) { |
|
467 reader.scanChar(); |
|
468 } |
|
469 reader.scanChar(); |
|
470 processLineTerminator(start, reader.bp); |
|
471 } else { |
|
472 // No text block string methods are present, so reset and treat like string literal. |
|
473 reader.reset(pos); |
|
474 openCount = countChar('\"', 1); |
|
475 } |
|
476 break; |
|
477 } |
|
478 // While characters are available. |
|
479 while (reader.bp < reader.buflen) { |
|
480 // If possible close delimiter sequence. |
|
481 if (reader.ch == '\"') { |
|
482 // Check to see if enough double quotes are present. |
|
483 int closeCount = countChar('\"', openCount); |
|
484 if (openCount == closeCount) { |
|
485 // Good result. |
|
486 tk = Tokens.TokenKind.STRINGLITERAL; |
|
487 return; |
|
488 } |
|
489 // False alarm, add double quotes to string buffer. |
|
490 reader.repeat('\"', closeCount); |
|
491 } else if (isEOLN()) { |
|
492 // Line terminator in string literal is an error. |
|
493 // Fall out to unclosed string literal error. |
|
494 if (openCount == 1) { |
|
495 break; |
|
496 } |
|
497 // Add line terminator to string buffer. |
|
498 int start = reader.bp; |
|
499 if (isCRLF()) { |
|
500 reader.scanChar(); |
|
501 } |
|
502 reader.putChar('\n', true); |
|
503 processLineTerminator(start, reader.bp); |
|
504 // Record first line terminator for error recovery. |
|
505 if (firstEOLN == -1) { |
|
506 firstEOLN = reader.bp; |
|
507 } |
|
508 } else if (reader.ch == '\\') { |
|
509 // Handle escape sequences. |
|
510 if (hasTextBlockSupport) { |
|
511 // Indicate that the final string should have escapes translated. |
|
512 shouldTranslateEscapes = true; |
|
513 // Validate escape sequence and add to string buffer. |
|
514 scanLitCharRaw(pos); |
|
515 } else { |
|
516 // Translate escape sequence and add result to string buffer. |
|
517 scanLitChar(pos); |
|
518 } |
|
519 } else { |
|
520 // Add character to string buffer. |
|
521 reader.putChar(true); |
|
522 } |
|
523 } |
|
524 // String ended without close delimiter sequence. |
|
525 lexError(pos, openCount == 1 ? Errors.UnclosedStrLit : Errors.UnclosedTextBlock); |
|
526 if (firstEOLN != -1) { |
|
527 // Reset recovery position to point after open delimiter sequence. |
|
528 reader.reset(firstEOLN); |
200 } |
529 } |
201 } |
530 } |
202 |
531 |
203 private void scanDigits(int pos, int digitRadix) { |
532 private void scanDigits(int pos, int digitRadix) { |
204 char saveCh; |
533 char saveCh; |
693 } |
1014 } |
694 endPos = reader.bp; |
1015 endPos = reader.bp; |
695 switch (tk.tag) { |
1016 switch (tk.tag) { |
696 case DEFAULT: return new Token(tk, pos, endPos, comments); |
1017 case DEFAULT: return new Token(tk, pos, endPos, comments); |
697 case NAMED: return new NamedToken(tk, pos, endPos, name, comments); |
1018 case NAMED: return new NamedToken(tk, pos, endPos, name, comments); |
698 case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments); |
1019 case STRING: { |
|
1020 // Get characters from string buffer. |
|
1021 String string = reader.chars(); |
|
1022 // If a text block. |
|
1023 if (shouldStripIndent) { |
|
1024 // Verify that the incidental indentation is consistent. |
|
1025 if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) { |
|
1026 Set<TextBlockSupport.WhitespaceChecks> checks = |
|
1027 TextBlockSupport.checkWhitespace(string); |
|
1028 if (checks.contains(TextBlockSupport.WhitespaceChecks.INCONSISTENT)) { |
|
1029 lexWarning(LintCategory.TEXT_BLOCKS, pos, |
|
1030 Warnings.InconsistentWhiteSpaceIndentation); |
|
1031 } |
|
1032 if (checks.contains(TextBlockSupport.WhitespaceChecks.TRAILING)) { |
|
1033 lexWarning(LintCategory.TEXT_BLOCKS, pos, |
|
1034 Warnings.TrailingWhiteSpaceWillBeRemoved); |
|
1035 } |
|
1036 } |
|
1037 // Remove incidental indentation. |
|
1038 string = TextBlockSupport.stripIndent(string); |
|
1039 } |
|
1040 // Translate escape sequences if present. |
|
1041 if (shouldTranslateEscapes) { |
|
1042 string = TextBlockSupport.translateEscapes(string); |
|
1043 } |
|
1044 // Build string token. |
|
1045 return new StringToken(tk, pos, endPos, string, comments); |
|
1046 } |
699 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments); |
1047 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments); |
700 default: throw new AssertionError(); |
1048 default: throw new AssertionError(); |
701 } |
1049 } |
702 } |
1050 } |
703 finally { |
1051 finally { |