author | mikejwre |
Thu, 25 Mar 2010 15:05:15 -0700 | |
changeset 5074 | 9c9bfe8f3a47 |
parent 3432 | 8acd97c69118 |
child 5506 | 202f599c92aa |
permissions | -rw-r--r-- |
2 | 1 |
/* |
2 |
* Copyright 2004-2007 Sun Microsystems, Inc. All Rights Reserved. |
|
3 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 |
* |
|
5 |
* This code is free software; you can redistribute it and/or modify it |
|
6 |
* under the terms of the GNU General Public License version 2 only, as |
|
7 |
* published by the Free Software Foundation. |
|
8 |
* |
|
9 |
* This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 |
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 |
* version 2 for more details (a copy is included in the LICENSE file that |
|
13 |
* accompanied this code). |
|
14 |
* |
|
15 |
* You should have received a copy of the GNU General Public License version |
|
16 |
* 2 along with this work; if not, write to the Free Software Foundation, |
|
17 |
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 |
* |
|
19 |
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
|
20 |
* CA 95054 USA or visit www.sun.com if you need additional information or |
|
21 |
* have any questions. |
|
22 |
*/ |
|
23 |
||
24 |
/** |
|
25 |
* @test |
|
26 |
* @bug 5033550 |
|
27 |
* @summary JDWP back end uses modified UTF-8 |
|
28 |
* |
|
29 |
* @author jjh |
|
30 |
* |
|
31 |
* @run build TestScaffold VMConnection TargetListener TargetAdapter |
|
3432
8acd97c69118
6868533: 3/4 JDI: remove '-source 1.5' and '-target 1.5' options from com.sun.jdi tests
dcubed
parents:
2
diff
changeset
|
32 |
* @run compile -g UTF8Test.java |
2 | 33 |
* @run main UTF8Test |
34 |
*/ |
|
35 |
||
36 |
/* |
|
37 |
There is UTF-8 and there is modified UTF-8, which I will call M-UTF-8. |
|
38 |
The two differ in the representation of binary 0, and |
|
39 |
in some other more esoteric representations. |
|
40 |
See |
|
41 |
http://java.sun.com/developer/technicalArticles/Intl/Supplementary/#Modified_UTF-8 |
|
42 |
http://java.sun.com/javase/6/docs/technotes/guides/jni/spec/types.html#wp16542 |
|
43 |
||
44 |
All the following are observations of the treatment |
|
45 |
of binary 0. In UTF-8, this represented as one byte: |
|
46 |
0x00 |
|
47 |
||
48 |
while in modified UTF-8, it is represented as two bytes |
|
49 |
0xc0 0x80 |
|
50 |
||
51 |
** I haven't investigated if the other differences between UTF-8 and |
|
52 |
M-UTF-8 are handled in the same way. |
|
53 |
||
54 |
Here is how these our handled in our BE, JDWP, and FE: |
|
55 |
||
56 |
- Strings in .class files are M-UTF-8. |
|
57 |
||
58 |
- To get the value of a string object from the VM, our BE calls |
|
59 |
char * utf = JNI_FUNC_PTR(env,GetStringUTFChars)(env, string, NULL); |
|
60 |
which returns M-UTF-8. |
|
61 |
||
62 |
- To create a string object in the VM, our BE VirtualMachine.createString() calls |
|
63 |
string = JNI_FUNC_PTR(env,NewStringUTF)(env, cstring); |
|
64 |
This function expects the string to be M-UTF-8 |
|
65 |
BUG: If the string came from JDWP, then it is actually UTF-8 |
|
66 |
||
67 |
- I haven't investigated strings in JVMTI. |
|
68 |
||
69 |
- The JDWP spec says that strings are UTF-8. The intro |
|
70 |
says this for all strings, and the createString command and |
|
71 |
the StringRefernce.value command say it explicitly. |
|
72 |
||
73 |
- Our FE java writes strings to JDWP as UTF-8. |
|
74 |
||
75 |
- BE function outStream_writeString uses strlen meaning |
|
76 |
it expects no 0 bytes, meaning that it expects M-UTF-8 |
|
77 |
This function writes the byte length and then calls |
|
78 |
outStream.c::writeBytes which just writes the bytes to JDWP as is. |
|
79 |
||
80 |
BUG: If such a string came from the VM via JNI, it is actually |
|
81 |
M-UTF-8 |
|
82 |
FIX: - scan string to see if contains an M-UTF-8 char. |
|
83 |
if yes, |
|
84 |
- call String(bytes, 0, len, "UTF8") |
|
85 |
to get a java string. Will this work -ie, the |
|
86 |
input is M-UTF-8 instead of real UTF-8 |
|
87 |
- call some java method (NOT JNI which |
|
88 |
would just come back with M-UTF-8) |
|
89 |
on the String to get real UTF-8 |
|
90 |
||
91 |
||
92 |
- The JDWP StringReference.value command does reads a string |
|
93 |
from the BE out of the JDWP stream and does this to |
|
94 |
createe a Java String for it (see PacketStream.readString): |
|
95 |
String readString() { |
|
96 |
String ret; |
|
97 |
int len = readInt(); |
|
98 |
||
99 |
try { |
|
100 |
ret = new String(pkt.data, inCursor, len, "UTF8"); |
|
101 |
} catch(java.io.UnsupportedEncodingException e) { |
|
102 |
||
103 |
This String ctor converts _both- the M-UTF-8 0xc0 0x80 |
|
104 |
and UTF-8 0x00 into a Java char containing 0x0000 |
|
105 |
||
106 |
Does it do this for the other differences too? |
|
107 |
||
108 |
Summary: |
|
109 |
1. JDWP says strings are UTF-8. |
|
110 |
We interpret this to mean standard UTF-8. |
|
111 |
||
112 |
2. JVMTI will be changed to match JNI saying that strings |
|
113 |
are M-UTF-8. |
|
114 |
||
115 |
3. The BE gets UTF-8 strings off JDWP and must convert them to |
|
116 |
M-UTF-8 before giving it to JVMTI or JNI. |
|
117 |
||
118 |
4. The BE gets M-UTF-8 strings from JNI and JVMTI and |
|
119 |
must convert them to UTF-8 when writing to JDWP. |
|
120 |
||
121 |
||
122 |
Here is how the supplementals are represented in java Strings. |
|
123 |
This from java.lang.Character doc: |
|
124 |
The Java 2 platform uses the UTF-16 representation in char arrays and |
|
125 |
in the String and StringBuffer classes. In this representation, |
|
126 |
supplementary characters are represented as a pair of char values, |
|
127 |
the first from the high-surrogates range, (\uD800-\uDBFF), the second |
|
128 |
from the low-surrogates range (\uDC00-\uDFFF). |
|
129 |
See utf8.txt |
|
130 |
||
131 |
||
132 |
---- |
|
133 |
||
134 |
NSK Packet.java in the nsk/share/jdwp framework does this to write |
|
135 |
a string to JDWP: |
|
136 |
public void addString(String value) { |
|
137 |
final int count = JDWP.TypeSize.INT + value.length(); |
|
138 |
addInt(value.length()); |
|
139 |
try { |
|
140 |
addBytes(value.getBytes("UTF-8"), 0, value.length()); |
|
141 |
} catch (UnsupportedEncodingException e) { |
|
142 |
throw new Failure("Unsupported UTF-8 ecnoding while adding string value to JDWP packet:\n\t" |
|
143 |
+ e); |
|
144 |
} |
|
145 |
} |
|
146 |
?? Does this get the standard UTF-8? I would expect so. |
|
147 |
||
148 |
and the readString method does this: |
|
149 |
for (int i = 0; i < len; i++) |
|
150 |
s[i] = getByte(); |
|
151 |
||
152 |
try { |
|
153 |
return new String(s, "UTF-8"); |
|
154 |
} catch (UnsupportedEncodingException e) { |
|
155 |
throw new Failure("Unsupported UTF-8 ecnoding while extracting string value from JDWP packet:\n\t" |
|
156 |
+ e); |
|
157 |
} |
|
158 |
Thus, this won't notice the modified UTF-8 coming in from JDWP . |
|
159 |
||
160 |
||
161 |
*/ |
|
162 |
||
163 |
import com.sun.jdi.*; |
|
164 |
import com.sun.jdi.event.*; |
|
165 |
import com.sun.jdi.request.*; |
|
166 |
import java.io.UnsupportedEncodingException; |
|
167 |
import java.util.*; |
|
168 |
||
169 |
/********** target program **********/ |
|
170 |
||
171 |
/* |
|
172 |
* The debuggee has a few Strings the debugger reads via JDI |
|
173 |
*/ |
|
174 |
class UTF8Targ { |
|
175 |
static String[] vals = new String[] {"xx\u0000yy", // standard UTF-8 0 |
|
176 |
"xx\ud800\udc00yy", // first supplementary |
|
177 |
"xx\udbff\udfffyy" // last supplementary |
|
178 |
// d800 = 1101 1000 0000 0000 dc00 = 1101 1100 0000 0000 |
|
179 |
// dbff = 1101 1011 1111 1111 dfff = 1101 1111 1111 1111 |
|
180 |
}; |
|
181 |
||
182 |
static String aField; |
|
183 |
||
184 |
public static void main(String[] args){ |
|
185 |
System.out.println("Howdy!"); |
|
186 |
gus(); |
|
187 |
System.out.println("Goodbye from UTF8Targ!"); |
|
188 |
} |
|
189 |
static void gus() { |
|
190 |
} |
|
191 |
} |
|
192 |
||
193 |
/********** test program **********/ |
|
194 |
||
195 |
public class UTF8Test extends TestScaffold { |
|
196 |
ClassType targetClass; |
|
197 |
ThreadReference mainThread; |
|
198 |
Field targetField; |
|
199 |
UTF8Test (String args[]) { |
|
200 |
super(args); |
|
201 |
} |
|
202 |
||
203 |
public static void main(String[] args) throws Exception { |
|
204 |
new UTF8Test(args).startTests(); |
|
205 |
} |
|
206 |
||
207 |
/********** test core **********/ |
|
208 |
||
209 |
protected void runTests() throws Exception { |
|
210 |
/* |
|
211 |
* Get to the top of main() |
|
212 |
* to determine targetClass and mainThread |
|
213 |
*/ |
|
214 |
BreakpointEvent bpe = startToMain("UTF8Targ"); |
|
215 |
targetClass = (ClassType)bpe.location().declaringType(); |
|
216 |
targetField = targetClass.fieldByName("aField"); |
|
217 |
||
218 |
ArrayReference targetVals = (ArrayReference)targetClass.getValue(targetClass.fieldByName("vals")); |
|
219 |
||
220 |
/* For each string in the debuggee's 'val' array, verify that we can |
|
221 |
* read that value via JDI. |
|
222 |
*/ |
|
223 |
||
224 |
for (int ii = 0; ii < UTF8Targ.vals.length; ii++) { |
|
225 |
StringReference val = (StringReference)targetVals.getValue(ii); |
|
226 |
String valStr = val.value(); |
|
227 |
||
228 |
/* |
|
229 |
* Verify that we can read a value correctly. |
|
230 |
* We read it via JDI, and access it directly from the static |
|
231 |
* var in the debuggee class. |
|
232 |
*/ |
|
233 |
if (!valStr.equals(UTF8Targ.vals[ii]) || |
|
234 |
valStr.length() != UTF8Targ.vals[ii].length()) { |
|
235 |
failure(" FAILED: Expected /" + printIt(UTF8Targ.vals[ii]) + |
|
236 |
"/, but got /" + printIt(valStr) + "/, length = " + valStr.length()); |
|
237 |
} |
|
238 |
} |
|
239 |
||
240 |
/* Test 'all' unicode chars - send them to the debuggee via JDI |
|
241 |
* and then read them back. |
|
242 |
*/ |
|
243 |
doFancyVersion(); |
|
244 |
||
245 |
resumeTo("UTF8Targ", "gus", "()V"); |
|
246 |
try { |
|
247 |
Thread.sleep(1000); |
|
248 |
} catch (InterruptedException ee) { |
|
249 |
} |
|
250 |
||
251 |
||
252 |
/* |
|
253 |
* resume the target listening for events |
|
254 |
*/ |
|
255 |
||
256 |
listenUntilVMDisconnect(); |
|
257 |
||
258 |
/* |
|
259 |
* deal with results of test |
|
260 |
* if anything has called failure("foo") testFailed will be true |
|
261 |
*/ |
|
262 |
if (!testFailed) { |
|
263 |
println("UTF8Test: passed"); |
|
264 |
} else { |
|
265 |
throw new Exception("UTF8Test: failed"); |
|
266 |
} |
|
267 |
} |
|
268 |
||
269 |
/** |
|
270 |
* For each unicode value, send a string containing |
|
271 |
* it to the debuggee via JDI, read it back via JDI, and see if |
|
272 |
* we get the same value. |
|
273 |
*/ |
|
274 |
void doFancyVersion() throws Exception { |
|
275 |
// This does 4 chars at a time just to save time. |
|
276 |
for (int ii = Character.MIN_CODE_POINT; |
|
277 |
ii < Character.MIN_SUPPLEMENTARY_CODE_POINT; |
|
278 |
ii += 4) { |
|
279 |
// Skip the surrogates |
|
280 |
if (ii == Character.MIN_SURROGATE) { |
|
281 |
ii = Character.MAX_SURROGATE - 3; |
|
282 |
break; |
|
283 |
} |
|
284 |
doFancyTest(ii, ii + 1, ii + 2, ii + 3); |
|
285 |
} |
|
286 |
||
287 |
// Do the supplemental chars. |
|
288 |
for (int ii = Character.MIN_SUPPLEMENTARY_CODE_POINT; |
|
289 |
ii <= Character.MAX_CODE_POINT; |
|
290 |
ii += 2000) { |
|
291 |
// Too many of these so just do a few |
|
292 |
doFancyTest(ii, ii + 1, ii + 2, ii + 3); |
|
293 |
} |
|
294 |
||
295 |
} |
|
296 |
||
297 |
void doFancyTest(int ... args) throws Exception { |
|
298 |
String ss = new String(args, 0, 4); |
|
299 |
targetClass.setValue(targetField, vm().mirrorOf(ss)); |
|
300 |
||
301 |
StringReference returnedVal = (StringReference)targetClass.getValue(targetField); |
|
302 |
String returnedStr = returnedVal.value(); |
|
303 |
||
304 |
if (!ss.equals(returnedStr)) { |
|
305 |
failure("Set: FAILED: Expected /" + printIt(ss) + |
|
306 |
"/, but got /" + printIt(returnedStr) + "/, length = " + returnedStr.length()); |
|
307 |
} |
|
308 |
} |
|
309 |
||
310 |
/** |
|
311 |
* Return a String containing binary representations of |
|
312 |
* the chars in a String. |
|
313 |
*/ |
|
314 |
String printIt(String arg) { |
|
315 |
char[] carray = arg.toCharArray(); |
|
316 |
StringBuffer bb = new StringBuffer(arg.length() * 5); |
|
317 |
for (int ii = 0; ii < arg.length(); ii++) { |
|
318 |
int ccc = arg.charAt(ii); |
|
319 |
bb.append(String.format("%1$04x ", ccc)); |
|
320 |
} |
|
321 |
return bb.toString(); |
|
322 |
} |
|
323 |
||
324 |
String printIt1(String arg) { |
|
325 |
byte[] barray = null; |
|
326 |
try { |
|
327 |
barray = arg.getBytes("UTF-8"); |
|
328 |
} catch (UnsupportedEncodingException ee) { |
|
329 |
} |
|
330 |
StringBuffer bb = new StringBuffer(barray.length * 3); |
|
331 |
for (int ii = 0; ii < barray.length; ii++) { |
|
332 |
bb.append(String.format("%1$02x ", barray[ii])); |
|
333 |
} |
|
334 |
return bb.toString(); |
|
335 |
} |
|
336 |
||
337 |
} |