jdk/src/share/classes/sun/text/resources/CollationData_th.java
changeset 10836 d9ac699afc2a
parent 5506 202f599c92aa
equal deleted inserted replaced
10294:8fcdae2a7ec7 10836:d9ac699afc2a
     1 /*
     1 /*
     2  * Copyright (c) 2005, Oracle and/or its affiliates. All rights reserved.
     2  * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     7  * published by the Free Software Foundation.  Oracle designates this
   101                 + "< \u0E2E "                   //  HO NOKHUK
   101                 + "< \u0E2E "                   //  HO NOKHUK
   102 
   102 
   103                 //
   103                 //
   104                 // Normal vowels
   104                 // Normal vowels
   105                 //
   105                 //
       
   106                 + "< \u0E4D "                   //  NIKHAHIT
   106                 + "< \u0E30 "                   //  SARA A
   107                 + "< \u0E30 "                   //  SARA A
   107                 + "< \u0E31 "                   //  MAI HAN-AKAT
   108                 + "< \u0E31 "                   //  MAI HAN-AKAT
   108                 + "< \u0E32 "                   //  SARA AA
   109                 + "< \u0E32 "                   //  SARA AA
   109 
   110 
   110                 // Normalizer will decompose this character to \u0e4d\u0e32.  This is
   111                 // Normalizer will decompose this character to \u0e4d\u0e32.
   111                 // a Bad Thing, because we want the separate characters to sort
   112                 + "< \u0E33 = \u0E4D\u0E32 "                   //  SARA AM
   112                 // differently than this individual one.  Since there's no public way to
       
   113                 // set the decomposition to be used when creating a collator, there's
       
   114                 // no way around this right now.
       
   115                 // It's best to go ahead and leave the character in, because it occurs
       
   116                 // this way a lot more often than it occurs as separate characters.
       
   117                 + "< \u0E33 "                   //  SARA AM
       
   118 
   113 
   119                 + "< \u0E34 "                   //  SARA I
   114                 + "< \u0E34 "                   //  SARA I
   120 
   115 
   121                 + "< \u0E35 "                   //  SARA II
   116                 + "< \u0E35 "                   //  SARA II
   122                 + "< \u0E36 "                   //  SARA UE
   117                 + "< \u0E36 "                   //  SARA UE
   131                 + "< \u0E41 "                   //  SARA AE
   126                 + "< \u0E41 "                   //  SARA AE
   132                 + "< \u0E42 "                   //  SARA O
   127                 + "< \u0E42 "                   //  SARA O
   133                 + "< \u0E43 "                   //  SARA AI MAIMUAN
   128                 + "< \u0E43 "                   //  SARA AI MAIMUAN
   134                 + "< \u0E44 "                   //  SARA AI MAIMALAI
   129                 + "< \u0E44 "                   //  SARA AI MAIMALAI
   135 
   130 
   136                 //
       
   137                 // Digits
       
   138                 //
       
   139                 + "< \u0E50 "                   //  DIGIT ZERO
       
   140                 + "< \u0E51 "                   //  DIGIT ONE
       
   141                 + "< \u0E52 "                   //  DIGIT TWO
       
   142                 + "< \u0E53 "                   //  DIGIT THREE
       
   143                 + "< \u0E54 "                   //  DIGIT FOUR
       
   144                 + "< \u0E55 "                   //  DIGIT FIVE
       
   145                 + "< \u0E56 "                   //  DIGIT SIX
       
   146                 + "< \u0E57 "                   //  DIGIT SEVEN
       
   147                 + "< \u0E58 "                   //  DIGIT EIGHT
       
   148                 + "< \u0E59 "                   //  DIGIT NINE
       
   149 
   131 
   150                 // Sorta tonal marks, but maybe not really
   132                 //according to CLDR, it's after 0e44
   151                 + "< \u0E4D "                   //  NIKHAHIT
   133                 + "< \u0E3A "                   //  PHINTHU
   152 
   134 
   153                 //
       
   154                 // Thai symbols are supposed to sort "after white space".
       
   155                 // I'm treating this as making them sort just after the normal Latin-1
       
   156                 // symbols, which are in turn after the white space.
       
   157                 //
       
   158                 + "&'\u007d'"  //  right-brace
       
   159                 + "< \u0E2F "                   //  PAIYANNOI      (ellipsis, abbreviation)
       
   160                 + "< \u0E46 "                   //  MAIYAMOK
       
   161                 + "< \u0E4F "                   //  FONGMAN
       
   162                 + "< \u0E5A "                   //  ANGKHANKHU
       
   163                 + "< \u0E5B "                   //  KHOMUT
       
   164                 + "< \u0E3F "                   //  CURRENCY SYMBOL BAHT
       
   165 
   135 
   166                 // These symbols are supposed to be "after all characters"
       
   167                 + "< \u0E4E "                   //  YAMAKKAN
       
   168 
   136 
   169                 // This rare symbol also comes after all characters.  But when it is
   137                 // This rare symbol comes after all characters.
   170                 // used in combination with RU and LU, the combination is treated as
       
   171                 // a separate letter, ala "CH" sorting after "C" in traditional Spanish.
       
   172                 + "< \u0E45 "                   //  LAKKHANGYAO
   138                 + "< \u0E45 "                   //  LAKKHANGYAO
   173                 + "& \u0E24 < \u0E24\u0E45 "
   139                 + "& \u0E32 , \0E45 "           // According to CLDR, 0E45 is after 0E32 in tertiary level
   174                 + "& \u0E26 < \u0E26\u0E45 "
       
   175 
   140 
   176                 // Tonal marks are primary ignorables but are treated as secondary
   141 
   177                 // differences
   142 
       
   143 
       
   144                 // Below are thai puntuation marks and Tonal(Accent) marks. According to CLDR 1.9 and
       
   145                 // ISO/IEC 14651, Annex C, C.2.1 Thai ordering principles, 0E2F to 0E5B are punctuaion marks that need to be ignored
       
   146                 // in the first three leveles.  0E4E to 0E4B are tonal marks to be compared in secondary level.
       
   147                 // In real implmentation, set puncutation marks in tertiary as there is no fourth level in Java.
       
   148                 // Set all these special marks after \u0301, the accute accent.
   178                 + "& \u0301 "   // acute accent
   149                 + "& \u0301 "   // acute accent
       
   150 
       
   151                 //puncutation marks
       
   152                 + ", \u0E2F "                   //  PAIYANNOI      (ellipsis, abbreviation)
       
   153                 + ", \u0E46 "                   //  MAIYAMOK
       
   154                 + ", \u0E4F "                   //  FONGMAN
       
   155                 + ", \u0E5A "                   //  ANGKHANKHU
       
   156                 + ", \u0E5B "                   //  KHOMUT
       
   157 
       
   158                 //tonal marks
       
   159                 + "; \u0E4E "                   //  YAMAKKAN
       
   160                 + "; \u0E4C "                   //  THANTHAKHAT
   179                 + "; \u0E47 "                   //  MAITAIKHU
   161                 + "; \u0E47 "                   //  MAITAIKHU
   180                 + "; \u0E48 "                   //  MAI EK
   162                 + "; \u0E48 "                   //  MAI EK
   181                 + "; \u0E49 "                   //  MAI THO
   163                 + "; \u0E49 "                   //  MAI THO
   182                 + "; \u0E4A "                   //  MAI TRI
   164                 + "; \u0E4A "                   //  MAI TRI
   183                 + "; \u0E4B "                   //  MAI CHATTAWA
   165                 + "; \u0E4B "                   //  MAI CHATTAWA
   184                 + "; \u0E4C "                   //  THANTHAKHAT
   166 
       
   167                 //
       
   168                 // Digits are equal to their corresponding Arabic digits in the first level
       
   169                 //
       
   170                 + "& 0 = \u0E50 "                   //  DIGIT ZERO
       
   171                 + "& 1 = \u0E51 "                   //  DIGIT ONE
       
   172                 + "& 2 = \u0E52 "                   //  DIGIT TWO
       
   173                 + "& 3 = \u0E53 "                   //  DIGIT THREE
       
   174                 + "& 4 = \u0E54 "                   //  DIGIT FOUR
       
   175                 + "& 5 = \u0E55 "                   //  DIGIT FIVE
       
   176                 + "& 6 = \u0E56 "                   //  DIGIT SIX
       
   177                 + "& 7 = \u0E57 "                   //  DIGIT SEVEN
       
   178                 + "& 8 = \u0E58 "                   //  DIGIT EIGHT
       
   179                 + "& 9 = \u0E59 "                   //  DIGIT NINE
   185 
   180 
   186 
   181 
   187                 // These are supposed to be ignored, so I'm treating them as controls
   182             }
   188                 + "& \u0001 "
       
   189                 + "= \u0E3A "                   //  PHINTHU
       
   190                 + "= '.' "                      //  period
       
   191                 }
       
   192         };
   183         };
   193     }
   184     }
   194 }
   185 }