jdk-sandbox: jdk/src/share/native/sun/font/layout/KhmerReordering.cpp@90ce3da70b43 (annotated)

2 90ce3da70b43 Initial load duke parents: diff changeset	1	/*
90ce3da70b43 Initial load duke parents: diff changeset	2	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
90ce3da70b43 Initial load duke parents: diff changeset	3	*
90ce3da70b43 Initial load duke parents: diff changeset	4	* This code is free software; you can redistribute it and/or modify it
90ce3da70b43 Initial load duke parents: diff changeset	5	* under the terms of the GNU General Public License version 2 only, as
90ce3da70b43 Initial load duke parents: diff changeset	6	* published by the Free Software Foundation. Sun designates this
90ce3da70b43 Initial load duke parents: diff changeset	7	* particular file as subject to the "Classpath" exception as provided
90ce3da70b43 Initial load duke parents: diff changeset	8	* by Sun in the LICENSE file that accompanied this code.
90ce3da70b43 Initial load duke parents: diff changeset	9	*
90ce3da70b43 Initial load duke parents: diff changeset	10	* This code is distributed in the hope that it will be useful, but WITHOUT
90ce3da70b43 Initial load duke parents: diff changeset	11	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
90ce3da70b43 Initial load duke parents: diff changeset	12	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
90ce3da70b43 Initial load duke parents: diff changeset	13	* version 2 for more details (a copy is included in the LICENSE file that
90ce3da70b43 Initial load duke parents: diff changeset	14	* accompanied this code).
90ce3da70b43 Initial load duke parents: diff changeset	15	*
90ce3da70b43 Initial load duke parents: diff changeset	16	* You should have received a copy of the GNU General Public License version
90ce3da70b43 Initial load duke parents: diff changeset	17	* 2 along with this work; if not, write to the Free Software Foundation,
90ce3da70b43 Initial load duke parents: diff changeset	18	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
90ce3da70b43 Initial load duke parents: diff changeset	19	*
90ce3da70b43 Initial load duke parents: diff changeset	20	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
90ce3da70b43 Initial load duke parents: diff changeset	21	* CA 95054 USA or visit www.sun.com if you need additional information or
90ce3da70b43 Initial load duke parents: diff changeset	22	* have any questions.
90ce3da70b43 Initial load duke parents: diff changeset	23	*
90ce3da70b43 Initial load duke parents: diff changeset	24	*/
90ce3da70b43 Initial load duke parents: diff changeset	25
90ce3da70b43 Initial load duke parents: diff changeset	26	/*
90ce3da70b43 Initial load duke parents: diff changeset	27	*
90ce3da70b43 Initial load duke parents: diff changeset	28	* (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved
90ce3da70b43 Initial load duke parents: diff changeset	29	*
90ce3da70b43 Initial load duke parents: diff changeset	30	* This file is a modification of the ICU file IndicReordering.cpp
90ce3da70b43 Initial load duke parents: diff changeset	31	* by Jens Herden and Javier Sola for Khmer language
90ce3da70b43 Initial load duke parents: diff changeset	32	*
90ce3da70b43 Initial load duke parents: diff changeset	33	*/
90ce3da70b43 Initial load duke parents: diff changeset	34
90ce3da70b43 Initial load duke parents: diff changeset	35	#include "LETypes.h"
90ce3da70b43 Initial load duke parents: diff changeset	36	#include "OpenTypeTables.h"
90ce3da70b43 Initial load duke parents: diff changeset	37	#include "KhmerReordering.h"
90ce3da70b43 Initial load duke parents: diff changeset	38	#include "LEGlyphStorage.h"
90ce3da70b43 Initial load duke parents: diff changeset	39
90ce3da70b43 Initial load duke parents: diff changeset	40	// Characters that get refered to by name...
90ce3da70b43 Initial load duke parents: diff changeset	41	enum
90ce3da70b43 Initial load duke parents: diff changeset	42	{
90ce3da70b43 Initial load duke parents: diff changeset	43	C_SIGN_ZWNJ = 0x200C,
90ce3da70b43 Initial load duke parents: diff changeset	44	C_SIGN_ZWJ = 0x200D,
90ce3da70b43 Initial load duke parents: diff changeset	45	C_DOTTED_CIRCLE = 0x25CC,
90ce3da70b43 Initial load duke parents: diff changeset	46	C_RO = 0x179A,
90ce3da70b43 Initial load duke parents: diff changeset	47	C_VOWEL_AA = 0x17B6,
90ce3da70b43 Initial load duke parents: diff changeset	48	C_SIGN_NIKAHIT = 0x17C6,
90ce3da70b43 Initial load duke parents: diff changeset	49	C_VOWEL_E = 0x17C1,
90ce3da70b43 Initial load duke parents: diff changeset	50	C_COENG = 0x17D2
90ce3da70b43 Initial load duke parents: diff changeset	51	};
90ce3da70b43 Initial load duke parents: diff changeset	52
90ce3da70b43 Initial load duke parents: diff changeset	53
90ce3da70b43 Initial load duke parents: diff changeset	54	enum
90ce3da70b43 Initial load duke parents: diff changeset	55	{
90ce3da70b43 Initial load duke parents: diff changeset	56	// simple classes, they are used in the statetable (in this file)
90ce3da70b43 Initial load duke parents: diff changeset	57	// to control the length of a syllable they are also used to know
90ce3da70b43 Initial load duke parents: diff changeset	58	// where a character should be placed (location in reference to
90ce3da70b43 Initial load duke parents: diff changeset	59	// the base character) and also to know if a character, when
90ce3da70b43 Initial load duke parents: diff changeset	60	// independtly displayed, should be displayed with a dotted-circle
90ce3da70b43 Initial load duke parents: diff changeset	61	// to indicate error in syllable construction
90ce3da70b43 Initial load duke parents: diff changeset	62
90ce3da70b43 Initial load duke parents: diff changeset	63	_xx = KhmerClassTable::CC_RESERVED,
90ce3da70b43 Initial load duke parents: diff changeset	64	_sa = KhmerClassTable::CC_SIGN_ABOVE \| KhmerClassTable::CF_DOTTED_CIRCLE
90ce3da70b43 Initial load duke parents: diff changeset	65	\| KhmerClassTable::CF_POS_ABOVE,
90ce3da70b43 Initial load duke parents: diff changeset	66	_sp = KhmerClassTable::CC_SIGN_AFTER \| KhmerClassTable::CF_DOTTED_CIRCLE
90ce3da70b43 Initial load duke parents: diff changeset	67	\| KhmerClassTable::CF_POS_AFTER,
90ce3da70b43 Initial load duke parents: diff changeset	68	_c1 = KhmerClassTable::CC_CONSONANT \| KhmerClassTable::CF_CONSONANT,
90ce3da70b43 Initial load duke parents: diff changeset	69	_c2 = KhmerClassTable::CC_CONSONANT2 \| KhmerClassTable::CF_CONSONANT,
90ce3da70b43 Initial load duke parents: diff changeset	70	_c3 = KhmerClassTable::CC_CONSONANT3 \| KhmerClassTable::CF_CONSONANT,
90ce3da70b43 Initial load duke parents: diff changeset	71	_rb = KhmerClassTable::CC_ROBAT \| KhmerClassTable::CF_POS_ABOVE
90ce3da70b43 Initial load duke parents: diff changeset	72	\| KhmerClassTable::CF_DOTTED_CIRCLE,
90ce3da70b43 Initial load duke parents: diff changeset	73	_cs = KhmerClassTable::CC_CONSONANT_SHIFTER \| KhmerClassTable::CF_DOTTED_CIRCLE
90ce3da70b43 Initial load duke parents: diff changeset	74	\| KhmerClassTable::CF_SHIFTER,
90ce3da70b43 Initial load duke parents: diff changeset	75	_dl = KhmerClassTable::CC_DEPENDENT_VOWEL \| KhmerClassTable::CF_POS_BEFORE
90ce3da70b43 Initial load duke parents: diff changeset	76	\| KhmerClassTable::CF_DOTTED_CIRCLE,
90ce3da70b43 Initial load duke parents: diff changeset	77	_db = KhmerClassTable::CC_DEPENDENT_VOWEL \| KhmerClassTable::CF_POS_BELOW
90ce3da70b43 Initial load duke parents: diff changeset	78	\| KhmerClassTable::CF_DOTTED_CIRCLE,
90ce3da70b43 Initial load duke parents: diff changeset	79	_da = KhmerClassTable::CC_DEPENDENT_VOWEL \| KhmerClassTable::CF_POS_ABOVE
90ce3da70b43 Initial load duke parents: diff changeset	80	\| KhmerClassTable::CF_DOTTED_CIRCLE \| KhmerClassTable::CF_ABOVE_VOWEL,
90ce3da70b43 Initial load duke parents: diff changeset	81	_dr = KhmerClassTable::CC_DEPENDENT_VOWEL \| KhmerClassTable::CF_POS_AFTER
90ce3da70b43 Initial load duke parents: diff changeset	82	\| KhmerClassTable::CF_DOTTED_CIRCLE,
90ce3da70b43 Initial load duke parents: diff changeset	83	_co = KhmerClassTable::CC_COENG \| KhmerClassTable::CF_COENG
90ce3da70b43 Initial load duke parents: diff changeset	84	\| KhmerClassTable::CF_DOTTED_CIRCLE,
90ce3da70b43 Initial load duke parents: diff changeset	85
90ce3da70b43 Initial load duke parents: diff changeset	86	// split vowel
90ce3da70b43 Initial load duke parents: diff changeset	87	_va = _da \| KhmerClassTable::CF_SPLIT_VOWEL,
90ce3da70b43 Initial load duke parents: diff changeset	88	_vr = _dr \| KhmerClassTable::CF_SPLIT_VOWEL
90ce3da70b43 Initial load duke parents: diff changeset	89	};
90ce3da70b43 Initial load duke parents: diff changeset	90
90ce3da70b43 Initial load duke parents: diff changeset	91
90ce3da70b43 Initial load duke parents: diff changeset	92	// Character class tables
90ce3da70b43 Initial load duke parents: diff changeset	93
90ce3da70b43 Initial load duke parents: diff changeset	94	// _xx character does not combine into syllable, such as numbers,
90ce3da70b43 Initial load duke parents: diff changeset	95	// puntuation marks, non-Khmer signs...
90ce3da70b43 Initial load duke parents: diff changeset	96	// _sa Sign placed above the base
90ce3da70b43 Initial load duke parents: diff changeset	97	// _sp Sign placed after the base
90ce3da70b43 Initial load duke parents: diff changeset	98	// _c1 Consonant of type 1 or independent vowel (independent vowels
90ce3da70b43 Initial load duke parents: diff changeset	99	// behave as type 1 consonants)
90ce3da70b43 Initial load duke parents: diff changeset	100	// _c2 Consonant of type 2 (only RO)
90ce3da70b43 Initial load duke parents: diff changeset	101	// _c3 Consonant of type 3
90ce3da70b43 Initial load duke parents: diff changeset	102	// _rb Khmer sign robat u17CC. combining mark for subscript consonants
90ce3da70b43 Initial load duke parents: diff changeset	103	// _cd Consonant-shifter
90ce3da70b43 Initial load duke parents: diff changeset	104	// _dl Dependent vowel placed before the base (left of the base)
90ce3da70b43 Initial load duke parents: diff changeset	105	// _db Dependent vowel placed below the base
90ce3da70b43 Initial load duke parents: diff changeset	106	// _da Dependent vowel placed above the base
90ce3da70b43 Initial load duke parents: diff changeset	107	// _dr Dependent vowel placed behind the base (right of the base)
90ce3da70b43 Initial load duke parents: diff changeset	108	// _co Khmer combining mark COENG u17D2, combines with the consonant
90ce3da70b43 Initial load duke parents: diff changeset	109	// or independent vowel following it to create a subscript consonant
90ce3da70b43 Initial load duke parents: diff changeset	110	// or independent vowel
90ce3da70b43 Initial load duke parents: diff changeset	111	// _va Khmer split vowel in wich the first part is before the base and
90ce3da70b43 Initial load duke parents: diff changeset	112	// the second one above the base
90ce3da70b43 Initial load duke parents: diff changeset	113	// _vr Khmer split vowel in wich the first part is before the base and
90ce3da70b43 Initial load duke parents: diff changeset	114	// the second one behind (right of) the base
90ce3da70b43 Initial load duke parents: diff changeset	115
90ce3da70b43 Initial load duke parents: diff changeset	116	static const KhmerClassTable::CharClass khmerCharClasses[] =
90ce3da70b43 Initial load duke parents: diff changeset	117	{
90ce3da70b43 Initial load duke parents: diff changeset	118	_c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, // 1780 - 178F
90ce3da70b43 Initial load duke parents: diff changeset	119	_c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _c3, // 1790 - 179F
90ce3da70b43 Initial load duke parents: diff changeset	120	_c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, // 17A0 - 17AF
90ce3da70b43 Initial load duke parents: diff changeset	121	_c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _vr, // 17B0 - 17BF
90ce3da70b43 Initial load duke parents: diff changeset	122	_vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _sa, // 17C0 - 17CF
90ce3da70b43 Initial load duke parents: diff changeset	123	_sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _xx, // 17D0 - 17DF
90ce3da70b43 Initial load duke parents: diff changeset	124	};
90ce3da70b43 Initial load duke parents: diff changeset	125
90ce3da70b43 Initial load duke parents: diff changeset	126
90ce3da70b43 Initial load duke parents: diff changeset	127	//
90ce3da70b43 Initial load duke parents: diff changeset	128	// Khmer Class Tables
90ce3da70b43 Initial load duke parents: diff changeset	129	//
90ce3da70b43 Initial load duke parents: diff changeset	130
90ce3da70b43 Initial load duke parents: diff changeset	131	//
90ce3da70b43 Initial load duke parents: diff changeset	132	// The range of characters defined in the above table is defined
90ce3da70b43 Initial load duke parents: diff changeset	133	// here. FOr Khmer 1780 to 17DF Even if the Khmer range is bigger, all
90ce3da70b43 Initial load duke parents: diff changeset	134	// other characters are not combinable, and therefore treated as _xx
90ce3da70b43 Initial load duke parents: diff changeset	135	static const KhmerClassTable khmerClassTable = {0x1780, 0x17df, khmerCharClasses};
90ce3da70b43 Initial load duke parents: diff changeset	136
90ce3da70b43 Initial load duke parents: diff changeset	137
90ce3da70b43 Initial load duke parents: diff changeset	138	// Below we define how a character in the input string is either in
90ce3da70b43 Initial load duke parents: diff changeset	139	// the khmerCharClasses table (in which case we get its type back), a
90ce3da70b43 Initial load duke parents: diff changeset	140	// ZWJ or ZWNJ (two characters that may appear within the syllable,
90ce3da70b43 Initial load duke parents: diff changeset	141	// but are not in the table) we also get their type back, or an
90ce3da70b43 Initial load duke parents: diff changeset	142	// unknown object in which case we get _xx (CC_RESERVED) back
90ce3da70b43 Initial load duke parents: diff changeset	143	KhmerClassTable::CharClass KhmerClassTable::getCharClass(LEUnicode ch) const
90ce3da70b43 Initial load duke parents: diff changeset	144	{
90ce3da70b43 Initial load duke parents: diff changeset	145	if (ch == C_SIGN_ZWJ) {
90ce3da70b43 Initial load duke parents: diff changeset	146	return CC_ZERO_WIDTH_J_MARK;
90ce3da70b43 Initial load duke parents: diff changeset	147	}
90ce3da70b43 Initial load duke parents: diff changeset	148
90ce3da70b43 Initial load duke parents: diff changeset	149	if (ch == C_SIGN_ZWNJ) {
90ce3da70b43 Initial load duke parents: diff changeset	150	return CC_ZERO_WIDTH_NJ_MARK;
90ce3da70b43 Initial load duke parents: diff changeset	151	}
90ce3da70b43 Initial load duke parents: diff changeset	152
90ce3da70b43 Initial load duke parents: diff changeset	153	if (ch < firstChar \|\| ch > lastChar) {
90ce3da70b43 Initial load duke parents: diff changeset	154	return CC_RESERVED;
90ce3da70b43 Initial load duke parents: diff changeset	155	}
90ce3da70b43 Initial load duke parents: diff changeset	156
90ce3da70b43 Initial load duke parents: diff changeset	157	return classTable[ch - firstChar];
90ce3da70b43 Initial load duke parents: diff changeset	158	}
90ce3da70b43 Initial load duke parents: diff changeset	159
90ce3da70b43 Initial load duke parents: diff changeset	160	const KhmerClassTable *KhmerClassTable::getKhmerClassTable()
90ce3da70b43 Initial load duke parents: diff changeset	161	{
90ce3da70b43 Initial load duke parents: diff changeset	162	return &khmerClassTable;
90ce3da70b43 Initial load duke parents: diff changeset	163	}
90ce3da70b43 Initial load duke parents: diff changeset	164
90ce3da70b43 Initial load duke parents: diff changeset	165
90ce3da70b43 Initial load duke parents: diff changeset	166
90ce3da70b43 Initial load duke parents: diff changeset	167	class ReorderingOutput {
90ce3da70b43 Initial load duke parents: diff changeset	168	private:
90ce3da70b43 Initial load duke parents: diff changeset	169	le_int32 fOutIndex;
90ce3da70b43 Initial load duke parents: diff changeset	170	LEUnicode *fOutChars;
90ce3da70b43 Initial load duke parents: diff changeset	171
90ce3da70b43 Initial load duke parents: diff changeset	172	LEGlyphStorage &fGlyphStorage;
90ce3da70b43 Initial load duke parents: diff changeset	173
90ce3da70b43 Initial load duke parents: diff changeset	174	public:
90ce3da70b43 Initial load duke parents: diff changeset	175	ReorderingOutput(LEUnicode *outChars, LEGlyphStorage &glyphStorage)
90ce3da70b43 Initial load duke parents: diff changeset	176	: fOutIndex(0), fOutChars(outChars), fGlyphStorage(glyphStorage)
90ce3da70b43 Initial load duke parents: diff changeset	177	{
90ce3da70b43 Initial load duke parents: diff changeset	178	// nothing else to do...
90ce3da70b43 Initial load duke parents: diff changeset	179	}
90ce3da70b43 Initial load duke parents: diff changeset	180
90ce3da70b43 Initial load duke parents: diff changeset	181	~ReorderingOutput()
90ce3da70b43 Initial load duke parents: diff changeset	182	{
90ce3da70b43 Initial load duke parents: diff changeset	183	// nothing to do here...
90ce3da70b43 Initial load duke parents: diff changeset	184	}
90ce3da70b43 Initial load duke parents: diff changeset	185
90ce3da70b43 Initial load duke parents: diff changeset	186	void writeChar(LEUnicode ch, le_uint32 charIndex, FeatureMask charFeatures)
90ce3da70b43 Initial load duke parents: diff changeset	187	{
90ce3da70b43 Initial load duke parents: diff changeset	188	LEErrorCode success = LE_NO_ERROR;
90ce3da70b43 Initial load duke parents: diff changeset	189
90ce3da70b43 Initial load duke parents: diff changeset	190	fOutChars[fOutIndex] = ch;
90ce3da70b43 Initial load duke parents: diff changeset	191
90ce3da70b43 Initial load duke parents: diff changeset	192	fGlyphStorage.setCharIndex(fOutIndex, charIndex, success);
90ce3da70b43 Initial load duke parents: diff changeset	193	fGlyphStorage.setAuxData(fOutIndex, charFeatures, success);
90ce3da70b43 Initial load duke parents: diff changeset	194
90ce3da70b43 Initial load duke parents: diff changeset	195	fOutIndex += 1;
90ce3da70b43 Initial load duke parents: diff changeset	196	}
90ce3da70b43 Initial load duke parents: diff changeset	197
90ce3da70b43 Initial load duke parents: diff changeset	198	le_int32 getOutputIndex()
90ce3da70b43 Initial load duke parents: diff changeset	199	{
90ce3da70b43 Initial load duke parents: diff changeset	200	return fOutIndex;
90ce3da70b43 Initial load duke parents: diff changeset	201	}
90ce3da70b43 Initial load duke parents: diff changeset	202	};
90ce3da70b43 Initial load duke parents: diff changeset	203
90ce3da70b43 Initial load duke parents: diff changeset	204
90ce3da70b43 Initial load duke parents: diff changeset	205	#define blwfFeatureTag LE_BLWF_FEATURE_TAG
90ce3da70b43 Initial load duke parents: diff changeset	206	#define pstfFeatureTag LE_PSTF_FEATURE_TAG
90ce3da70b43 Initial load duke parents: diff changeset	207	#define presFeatureTag LE_PRES_FEATURE_TAG
90ce3da70b43 Initial load duke parents: diff changeset	208	#define blwsFeatureTag LE_BLWS_FEATURE_TAG
90ce3da70b43 Initial load duke parents: diff changeset	209	#define abvsFeatureTag LE_ABVS_FEATURE_TAG
90ce3da70b43 Initial load duke parents: diff changeset	210	#define pstsFeatureTag LE_PSTS_FEATURE_TAG
90ce3da70b43 Initial load duke parents: diff changeset	211
90ce3da70b43 Initial load duke parents: diff changeset	212	#define blwmFeatureTag LE_BLWM_FEATURE_TAG
90ce3da70b43 Initial load duke parents: diff changeset	213	#define abvmFeatureTag LE_ABVM_FEATURE_TAG
90ce3da70b43 Initial load duke parents: diff changeset	214	#define distFeatureTag LE_DIST_FEATURE_TAG
90ce3da70b43 Initial load duke parents: diff changeset	215
90ce3da70b43 Initial load duke parents: diff changeset	216	#define prefFeatureTag LE_PREF_FEATURE_TAG
90ce3da70b43 Initial load duke parents: diff changeset	217	#define abvfFeatureTag LE_ABVF_FEATURE_TAG
90ce3da70b43 Initial load duke parents: diff changeset	218	#define cligFeatureTag LE_CLIG_FEATURE_TAG
90ce3da70b43 Initial load duke parents: diff changeset	219	#define mkmkFeatureTag LE_MKMK_FEATURE_TAG
90ce3da70b43 Initial load duke parents: diff changeset	220
90ce3da70b43 Initial load duke parents: diff changeset	221	#define prefFeatureMask 0x80000000UL
90ce3da70b43 Initial load duke parents: diff changeset	222	#define blwfFeatureMask 0x40000000UL
90ce3da70b43 Initial load duke parents: diff changeset	223	#define abvfFeatureMask 0x20000000UL
90ce3da70b43 Initial load duke parents: diff changeset	224	#define pstfFeatureMask 0x10000000UL
90ce3da70b43 Initial load duke parents: diff changeset	225	#define presFeatureMask 0x08000000UL
90ce3da70b43 Initial load duke parents: diff changeset	226	#define blwsFeatureMask 0x04000000UL
90ce3da70b43 Initial load duke parents: diff changeset	227	#define abvsFeatureMask 0x02000000UL
90ce3da70b43 Initial load duke parents: diff changeset	228	#define pstsFeatureMask 0x01000000UL
90ce3da70b43 Initial load duke parents: diff changeset	229	#define cligFeatureMask 0x00800000UL
90ce3da70b43 Initial load duke parents: diff changeset	230	#define distFeatureMask 0x00400000UL
90ce3da70b43 Initial load duke parents: diff changeset	231	#define blwmFeatureMask 0x00200000UL
90ce3da70b43 Initial load duke parents: diff changeset	232	#define abvmFeatureMask 0x00100000UL
90ce3da70b43 Initial load duke parents: diff changeset	233	#define mkmkFeatureMask 0x00080000UL
90ce3da70b43 Initial load duke parents: diff changeset	234
90ce3da70b43 Initial load duke parents: diff changeset	235	#define tagPref (prefFeatureMask \| presFeatureMask \| \
90ce3da70b43 Initial load duke parents: diff changeset	236	cligFeatureMask \| distFeatureMask)
90ce3da70b43 Initial load duke parents: diff changeset	237	#define tagAbvf (abvfFeatureMask \| abvsFeatureMask \| \
90ce3da70b43 Initial load duke parents: diff changeset	238	cligFeatureMask \| distFeatureMask \| abvmFeatureMask \| mkmkFeatureMask)
90ce3da70b43 Initial load duke parents: diff changeset	239	#define tagPstf (blwfFeatureMask \| blwsFeatureMask \| prefFeatureMask \| \
90ce3da70b43 Initial load duke parents: diff changeset	240	presFeatureMask \| pstfFeatureMask \| pstsFeatureMask \| cligFeatureMask \| \
90ce3da70b43 Initial load duke parents: diff changeset	241	distFeatureMask \| blwmFeatureMask)
90ce3da70b43 Initial load duke parents: diff changeset	242	#define tagBlwf (blwfFeatureMask \| blwsFeatureMask \| cligFeatureMask \| \
90ce3da70b43 Initial load duke parents: diff changeset	243	distFeatureMask \| blwmFeatureMask \| mkmkFeatureMask)
90ce3da70b43 Initial load duke parents: diff changeset	244	#define tagDefault (prefFeatureMask \| blwfFeatureMask \| presFeatureMask \| \
90ce3da70b43 Initial load duke parents: diff changeset	245	blwsFeatureMask \| cligFeatureMask \| distFeatureMask \| abvmFeatureMask \| \
90ce3da70b43 Initial load duke parents: diff changeset	246	blwmFeatureMask \| mkmkFeatureMask)
90ce3da70b43 Initial load duke parents: diff changeset	247
90ce3da70b43 Initial load duke parents: diff changeset	248
90ce3da70b43 Initial load duke parents: diff changeset	249
90ce3da70b43 Initial load duke parents: diff changeset	250	// These are in the order in which the features need to be applied
90ce3da70b43 Initial load duke parents: diff changeset	251	// for correct processing
90ce3da70b43 Initial load duke parents: diff changeset	252	static const FeatureMap featureMap[] =
90ce3da70b43 Initial load duke parents: diff changeset	253	{
90ce3da70b43 Initial load duke parents: diff changeset	254	// Shaping features
90ce3da70b43 Initial load duke parents: diff changeset	255	{prefFeatureTag, prefFeatureMask},
90ce3da70b43 Initial load duke parents: diff changeset	256	{blwfFeatureTag, blwfFeatureMask},
90ce3da70b43 Initial load duke parents: diff changeset	257	{abvfFeatureTag, abvfFeatureMask},
90ce3da70b43 Initial load duke parents: diff changeset	258	{pstfFeatureTag, pstfFeatureMask},
90ce3da70b43 Initial load duke parents: diff changeset	259	{presFeatureTag, presFeatureMask},
90ce3da70b43 Initial load duke parents: diff changeset	260	{blwsFeatureTag, blwsFeatureMask},
90ce3da70b43 Initial load duke parents: diff changeset	261	{abvsFeatureTag, abvsFeatureMask},
90ce3da70b43 Initial load duke parents: diff changeset	262	{pstsFeatureTag, pstsFeatureMask},
90ce3da70b43 Initial load duke parents: diff changeset	263	{cligFeatureTag, cligFeatureMask},
90ce3da70b43 Initial load duke parents: diff changeset	264
90ce3da70b43 Initial load duke parents: diff changeset	265	// Positioning features
90ce3da70b43 Initial load duke parents: diff changeset	266	{distFeatureTag, distFeatureMask},
90ce3da70b43 Initial load duke parents: diff changeset	267	{blwmFeatureTag, blwmFeatureMask},
90ce3da70b43 Initial load duke parents: diff changeset	268	{abvmFeatureTag, abvmFeatureMask},
90ce3da70b43 Initial load duke parents: diff changeset	269	{mkmkFeatureTag, mkmkFeatureMask},
90ce3da70b43 Initial load duke parents: diff changeset	270	};
90ce3da70b43 Initial load duke parents: diff changeset	271
90ce3da70b43 Initial load duke parents: diff changeset	272	static const le_int32 featureMapCount = LE_ARRAY_SIZE(featureMap);
90ce3da70b43 Initial load duke parents: diff changeset	273
90ce3da70b43 Initial load duke parents: diff changeset	274	// The stateTable is used to calculate the end (the length) of a well
90ce3da70b43 Initial load duke parents: diff changeset	275	// formed Khmer Syllable.
90ce3da70b43 Initial load duke parents: diff changeset	276	//
90ce3da70b43 Initial load duke parents: diff changeset	277	// Each horizontal line is ordered exactly the same way as the values
90ce3da70b43 Initial load duke parents: diff changeset	278	// in KhmerClassTable CharClassValues in KhmerReordering.h This
90ce3da70b43 Initial load duke parents: diff changeset	279	// coincidence of values allows the follow up of the table.
90ce3da70b43 Initial load duke parents: diff changeset	280	//
90ce3da70b43 Initial load duke parents: diff changeset	281	// Each line corresponds to a state, which does not necessarily need
90ce3da70b43 Initial load duke parents: diff changeset	282	// to be a type of component... for example, state 2 is a base, with
90ce3da70b43 Initial load duke parents: diff changeset	283	// is always a first character in the syllable, but the state could be
90ce3da70b43 Initial load duke parents: diff changeset	284	// produced a consonant of any type when it is the first character
90ce3da70b43 Initial load duke parents: diff changeset	285	// that is analysed (in ground state).
90ce3da70b43 Initial load duke parents: diff changeset	286	//
90ce3da70b43 Initial load duke parents: diff changeset	287	// Differentiating 3 types of consonants is necessary in order to
90ce3da70b43 Initial load duke parents: diff changeset	288	// forbid the use of certain combinations, such as having a second
90ce3da70b43 Initial load duke parents: diff changeset	289	// coeng after a coeng RO.
90ce3da70b43 Initial load duke parents: diff changeset	290	// The inexistent possibility of having a type 3 after another type 3
90ce3da70b43 Initial load duke parents: diff changeset	291	// is permitted, eliminating it would very much complicate the table,
90ce3da70b43 Initial load duke parents: diff changeset	292	// and it does not create typing problems, as the case above.
90ce3da70b43 Initial load duke parents: diff changeset	293	//
90ce3da70b43 Initial load duke parents: diff changeset	294	// The table is quite complex, in order to limit the number of coeng
90ce3da70b43 Initial load duke parents: diff changeset	295	// consonants to 2 (by means of the table).
90ce3da70b43 Initial load duke parents: diff changeset	296	//
90ce3da70b43 Initial load duke parents: diff changeset	297	// There a peculiarity, as far as Unicode is concerned:
90ce3da70b43 Initial load duke parents: diff changeset	298	// - The consonant-shifter is considered in two possible different
90ce3da70b43 Initial load duke parents: diff changeset	299	// locations, the one considered in Unicode 3.0 and the one considered
90ce3da70b43 Initial load duke parents: diff changeset	300	// in Unicode 4.0. (there is a backwards compatibility problem in this
90ce3da70b43 Initial load duke parents: diff changeset	301	// standard).
90ce3da70b43 Initial load duke parents: diff changeset	302
90ce3da70b43 Initial load duke parents: diff changeset	303
90ce3da70b43 Initial load duke parents: diff changeset	304	// xx independent character, such as a number, punctuation sign or
90ce3da70b43 Initial load duke parents: diff changeset	305	// non-khmer char
90ce3da70b43 Initial load duke parents: diff changeset	306	//
90ce3da70b43 Initial load duke parents: diff changeset	307	// c1 Khmer consonant of type 1 or an independent vowel
90ce3da70b43 Initial load duke parents: diff changeset	308	// that is, a letter in which the subscript for is only under the
90ce3da70b43 Initial load duke parents: diff changeset	309	// base, not taking any space to the right or to the left
90ce3da70b43 Initial load duke parents: diff changeset	310	//
90ce3da70b43 Initial load duke parents: diff changeset	311	// c2 Khmer consonant of type 2, the coeng form takes space under
90ce3da70b43 Initial load duke parents: diff changeset	312	// and to the left of the base (only RO is of this type)
90ce3da70b43 Initial load duke parents: diff changeset	313	//
90ce3da70b43 Initial load duke parents: diff changeset	314	// c3 Khmer consonant of type 3. Its subscript form takes space under
90ce3da70b43 Initial load duke parents: diff changeset	315	// and to the right of the base.
90ce3da70b43 Initial load duke parents: diff changeset	316	//
90ce3da70b43 Initial load duke parents: diff changeset	317	// cs Khmer consonant shifter
90ce3da70b43 Initial load duke parents: diff changeset	318	//
90ce3da70b43 Initial load duke parents: diff changeset	319	// rb Khmer robat
90ce3da70b43 Initial load duke parents: diff changeset	320	//
90ce3da70b43 Initial load duke parents: diff changeset	321	// co coeng character (u17D2)
90ce3da70b43 Initial load duke parents: diff changeset	322	//
90ce3da70b43 Initial load duke parents: diff changeset	323	// dv dependent vowel (including split vowels, they are treated in the
90ce3da70b43 Initial load duke parents: diff changeset	324	// same way). even if dv is not defined above, the component that is
90ce3da70b43 Initial load duke parents: diff changeset	325	// really tested for is KhmerClassTable::CC_DEPENDENT_VOWEL, which is
90ce3da70b43 Initial load duke parents: diff changeset	326	// common to all dependent vowels
90ce3da70b43 Initial load duke parents: diff changeset	327	//
90ce3da70b43 Initial load duke parents: diff changeset	328	// zwj Zero Width joiner
90ce3da70b43 Initial load duke parents: diff changeset	329	//
90ce3da70b43 Initial load duke parents: diff changeset	330	// zwnj Zero width non joiner
90ce3da70b43 Initial load duke parents: diff changeset	331	//
90ce3da70b43 Initial load duke parents: diff changeset	332	// sa above sign
90ce3da70b43 Initial load duke parents: diff changeset	333	//
90ce3da70b43 Initial load duke parents: diff changeset	334	// sp post sign
90ce3da70b43 Initial load duke parents: diff changeset	335	//
90ce3da70b43 Initial load duke parents: diff changeset	336	// there are lines with equal content but for an easier understanding
90ce3da70b43 Initial load duke parents: diff changeset	337	// (and maybe change in the future) we did not join them
90ce3da70b43 Initial load duke parents: diff changeset	338	//
90ce3da70b43 Initial load duke parents: diff changeset	339	static const le_int8 khmerStateTable[][KhmerClassTable::CC_COUNT] =
90ce3da70b43 Initial load duke parents: diff changeset	340	{
90ce3da70b43 Initial load duke parents: diff changeset	341
90ce3da70b43 Initial load duke parents: diff changeset	342	// xx c1 c2 c3 zwnj cs rb co dv sa sp zwj
90ce3da70b43 Initial load duke parents: diff changeset	343	{ 1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2}, // 0 - ground state
90ce3da70b43 Initial load duke parents: diff changeset	344	{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state (or sign to the right of the syllable)
90ce3da70b43 Initial load duke parents: diff changeset	345	{-1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1}, // 2 - Base consonant
90ce3da70b43 Initial load duke parents: diff changeset	346	{-1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1}, // 3 - First ZWNJ before a register shifter
90ce3da70b43 Initial load duke parents: diff changeset	347	// It can only be followed by a shifter or a vowel
90ce3da70b43 Initial load duke parents: diff changeset	348	{-1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14}, // 4 - First register shifter
90ce3da70b43 Initial load duke parents: diff changeset	349	{-1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1}, // 5 - Robat
90ce3da70b43 Initial load duke parents: diff changeset	350	{-1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - First Coeng
90ce3da70b43 Initial load duke parents: diff changeset	351	{-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, // 7 - First consonant of type 1 after coeng
90ce3da70b43 Initial load duke parents: diff changeset	352	{-1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14}, // 8 - First consonant of type 2 after coeng
90ce3da70b43 Initial load duke parents: diff changeset	353	{-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, // 9 - First consonant or type 3 after ceong
90ce3da70b43 Initial load duke parents: diff changeset	354	{-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, // 10 - Second Coeng (no register shifter before)
90ce3da70b43 Initial load duke parents: diff changeset	355	{-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, // 11 - Second coeng consonant
90ce3da70b43 Initial load duke parents: diff changeset	356	// (or ind. vowel) no register shifter before
90ce3da70b43 Initial load duke parents: diff changeset	357	{-1, -1, 1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, // 12 - Second ZWNJ before a register shifter
90ce3da70b43 Initial load duke parents: diff changeset	358	{-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, // 13 - Second register shifter
90ce3da70b43 Initial load duke parents: diff changeset	359	{-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, // 14 - ZWJ before vowel
90ce3da70b43 Initial load duke parents: diff changeset	360	{-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, // 15 - ZWNJ before vowel
90ce3da70b43 Initial load duke parents: diff changeset	361	{-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18}, // 16 - dependent vowel
90ce3da70b43 Initial load duke parents: diff changeset	362	{-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, 1, 18}, // 17 - sign above
90ce3da70b43 Initial load duke parents: diff changeset	363	{-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, // 18 - ZWJ after vowel
90ce3da70b43 Initial load duke parents: diff changeset	364	{-1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1}, // 19 - Third coeng
90ce3da70b43 Initial load duke parents: diff changeset	365	{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}, // 20 - dependent vowel after a Robat
90ce3da70b43 Initial load duke parents: diff changeset	366	};
90ce3da70b43 Initial load duke parents: diff changeset	367
90ce3da70b43 Initial load duke parents: diff changeset	368
90ce3da70b43 Initial load duke parents: diff changeset	369	const FeatureMap *KhmerReordering::getFeatureMap(le_int32 &count)
90ce3da70b43 Initial load duke parents: diff changeset	370	{
90ce3da70b43 Initial load duke parents: diff changeset	371	count = featureMapCount;
90ce3da70b43 Initial load duke parents: diff changeset	372
90ce3da70b43 Initial load duke parents: diff changeset	373	return featureMap;
90ce3da70b43 Initial load duke parents: diff changeset	374	}
90ce3da70b43 Initial load duke parents: diff changeset	375
90ce3da70b43 Initial load duke parents: diff changeset	376
90ce3da70b43 Initial load duke parents: diff changeset	377	// Given an input string of characters and a location in which to start looking
90ce3da70b43 Initial load duke parents: diff changeset	378	// calculate, using the state table, which one is the last character of the syllable
90ce3da70b43 Initial load duke parents: diff changeset	379	// that starts in the starting position.
90ce3da70b43 Initial load duke parents: diff changeset	380	le_int32 KhmerReordering::findSyllable(const KhmerClassTable *classTable,
90ce3da70b43 Initial load duke parents: diff changeset	381	const LEUnicode *chars, le_int32 prev, le_int32 charCount)
90ce3da70b43 Initial load duke parents: diff changeset	382	{
90ce3da70b43 Initial load duke parents: diff changeset	383	le_int32 cursor = prev;
90ce3da70b43 Initial load duke parents: diff changeset	384	le_int8 state = 0;
90ce3da70b43 Initial load duke parents: diff changeset	385
90ce3da70b43 Initial load duke parents: diff changeset	386	while (cursor < charCount) {
90ce3da70b43 Initial load duke parents: diff changeset	387	KhmerClassTable::CharClass charClass = (classTable->getCharClass(chars[cursor])
90ce3da70b43 Initial load duke parents: diff changeset	388	& KhmerClassTable::CF_CLASS_MASK);
90ce3da70b43 Initial load duke parents: diff changeset	389
90ce3da70b43 Initial load duke parents: diff changeset	390	state = khmerStateTable[state][charClass];
90ce3da70b43 Initial load duke parents: diff changeset	391
90ce3da70b43 Initial load duke parents: diff changeset	392	if (state < 0) {
90ce3da70b43 Initial load duke parents: diff changeset	393	break;
90ce3da70b43 Initial load duke parents: diff changeset	394	}
90ce3da70b43 Initial load duke parents: diff changeset	395
90ce3da70b43 Initial load duke parents: diff changeset	396	cursor += 1;
90ce3da70b43 Initial load duke parents: diff changeset	397	}
90ce3da70b43 Initial load duke parents: diff changeset	398
90ce3da70b43 Initial load duke parents: diff changeset	399	return cursor;
90ce3da70b43 Initial load duke parents: diff changeset	400	}
90ce3da70b43 Initial load duke parents: diff changeset	401
90ce3da70b43 Initial load duke parents: diff changeset	402
90ce3da70b43 Initial load duke parents: diff changeset	403	// This is the real reordering function as applied to the Khmer language
90ce3da70b43 Initial load duke parents: diff changeset	404
90ce3da70b43 Initial load duke parents: diff changeset	405	le_int32 KhmerReordering::reorder(const LEUnicode *chars, le_int32 charCount,
90ce3da70b43 Initial load duke parents: diff changeset	406	le_int32 /scriptCode/, LEUnicode *outChars, LEGlyphStorage &glyphStorage)
90ce3da70b43 Initial load duke parents: diff changeset	407	{
90ce3da70b43 Initial load duke parents: diff changeset	408	const KhmerClassTable *classTable = KhmerClassTable::getKhmerClassTable();
90ce3da70b43 Initial load duke parents: diff changeset	409
90ce3da70b43 Initial load duke parents: diff changeset	410	ReorderingOutput output(outChars, glyphStorage);
90ce3da70b43 Initial load duke parents: diff changeset	411	KhmerClassTable::CharClass charClass;
90ce3da70b43 Initial load duke parents: diff changeset	412	le_int32 i, prev = 0, coengRo;
90ce3da70b43 Initial load duke parents: diff changeset	413
90ce3da70b43 Initial load duke parents: diff changeset	414
90ce3da70b43 Initial load duke parents: diff changeset	415	// This loop only exits when we reach the end of a run, which may contain
90ce3da70b43 Initial load duke parents: diff changeset	416	// several syllables.
90ce3da70b43 Initial load duke parents: diff changeset	417	while (prev < charCount) {
90ce3da70b43 Initial load duke parents: diff changeset	418	le_int32 syllable = findSyllable(classTable, chars, prev, charCount);
90ce3da70b43 Initial load duke parents: diff changeset	419
90ce3da70b43 Initial load duke parents: diff changeset	420	// write a pre vowel or the pre part of a split vowel first
90ce3da70b43 Initial load duke parents: diff changeset	421	// and look out for coeng + ro. RO is the only vowel of type 2, and
90ce3da70b43 Initial load duke parents: diff changeset	422	// therefore the only one that requires saving space before the base.
90ce3da70b43 Initial load duke parents: diff changeset	423	coengRo = -1; // There is no Coeng Ro, if found this value will change
90ce3da70b43 Initial load duke parents: diff changeset	424	for (i = prev; i < syllable; i += 1) {
90ce3da70b43 Initial load duke parents: diff changeset	425	charClass = classTable->getCharClass(chars[i]);
90ce3da70b43 Initial load duke parents: diff changeset	426
90ce3da70b43 Initial load duke parents: diff changeset	427	// if a split vowel, write the pre part. In Khmer the pre part
90ce3da70b43 Initial load duke parents: diff changeset	428	// is the same for all split vowels, same glyph as pre vowel C_VOWEL_E
90ce3da70b43 Initial load duke parents: diff changeset	429	if (charClass & KhmerClassTable::CF_SPLIT_VOWEL) {
90ce3da70b43 Initial load duke parents: diff changeset	430	output.writeChar(C_VOWEL_E, i, tagPref);
90ce3da70b43 Initial load duke parents: diff changeset	431	break; // there can be only one vowel
90ce3da70b43 Initial load duke parents: diff changeset	432	}
90ce3da70b43 Initial load duke parents: diff changeset	433
90ce3da70b43 Initial load duke parents: diff changeset	434	// if a vowel with pos before write it out
90ce3da70b43 Initial load duke parents: diff changeset	435	if (charClass & KhmerClassTable::CF_POS_BEFORE) {
90ce3da70b43 Initial load duke parents: diff changeset	436	output.writeChar(chars[i], i, tagPref);
90ce3da70b43 Initial load duke parents: diff changeset	437	break; // there can be only one vowel
90ce3da70b43 Initial load duke parents: diff changeset	438	}
90ce3da70b43 Initial load duke parents: diff changeset	439
90ce3da70b43 Initial load duke parents: diff changeset	440	// look for coeng + ro and remember position
90ce3da70b43 Initial load duke parents: diff changeset	441	// works because coeng + ro is always in front of a vowel (if there is a vowel)
90ce3da70b43 Initial load duke parents: diff changeset	442	// and because CC_CONSONANT2 is enough to identify it, as it is the only consonant
90ce3da70b43 Initial load duke parents: diff changeset	443	// with this flag
90ce3da70b43 Initial load duke parents: diff changeset	444	if ( (charClass & KhmerClassTable::CF_COENG) && (i + 1 < syllable) &&
90ce3da70b43 Initial load duke parents: diff changeset	445	( (classTable->getCharClass(chars[i + 1]) &
90ce3da70b43 Initial load duke parents: diff changeset	446	KhmerClassTable::CF_CLASS_MASK) == KhmerClassTable::CC_CONSONANT2) )
90ce3da70b43 Initial load duke parents: diff changeset	447	{
90ce3da70b43 Initial load duke parents: diff changeset	448	coengRo = i;
90ce3da70b43 Initial load duke parents: diff changeset	449	}
90ce3da70b43 Initial load duke parents: diff changeset	450	}
90ce3da70b43 Initial load duke parents: diff changeset	451
90ce3da70b43 Initial load duke parents: diff changeset	452	// write coeng + ro if found
90ce3da70b43 Initial load duke parents: diff changeset	453	if (coengRo > -1) {
90ce3da70b43 Initial load duke parents: diff changeset	454	output.writeChar(C_COENG, coengRo, tagPref);
90ce3da70b43 Initial load duke parents: diff changeset	455	output.writeChar(C_RO, coengRo + 1, tagPref);
90ce3da70b43 Initial load duke parents: diff changeset	456	}
90ce3da70b43 Initial load duke parents: diff changeset	457
90ce3da70b43 Initial load duke parents: diff changeset	458	// shall we add a dotted circle? If in the position in which
90ce3da70b43 Initial load duke parents: diff changeset	459	// the base should be (first char in the string) there is a
90ce3da70b43 Initial load duke parents: diff changeset	460	// character that has the Dotted circle flag (a character that
90ce3da70b43 Initial load duke parents: diff changeset	461	// cannot be a base) then write a dotted circle
90ce3da70b43 Initial load duke parents: diff changeset	462	if (classTable->getCharClass(chars[prev]) & KhmerClassTable::CF_DOTTED_CIRCLE) {
90ce3da70b43 Initial load duke parents: diff changeset	463	output.writeChar(C_DOTTED_CIRCLE, prev, tagDefault);
90ce3da70b43 Initial load duke parents: diff changeset	464	}
90ce3da70b43 Initial load duke parents: diff changeset	465
90ce3da70b43 Initial load duke parents: diff changeset	466	// copy what is left to the output, skipping before vowels and
90ce3da70b43 Initial load duke parents: diff changeset	467	// coeng Ro if they are present
90ce3da70b43 Initial load duke parents: diff changeset	468	for (i = prev; i < syllable; i += 1) {
90ce3da70b43 Initial load duke parents: diff changeset	469	charClass = classTable->getCharClass(chars[i]);
90ce3da70b43 Initial load duke parents: diff changeset	470
90ce3da70b43 Initial load duke parents: diff changeset	471	// skip a before vowel, it was already processed
90ce3da70b43 Initial load duke parents: diff changeset	472	if (charClass & KhmerClassTable::CF_POS_BEFORE) {
90ce3da70b43 Initial load duke parents: diff changeset	473	continue;
90ce3da70b43 Initial load duke parents: diff changeset	474	}
90ce3da70b43 Initial load duke parents: diff changeset	475
90ce3da70b43 Initial load duke parents: diff changeset	476	// skip coeng + ro, it was already processed
90ce3da70b43 Initial load duke parents: diff changeset	477	if (i == coengRo) {
90ce3da70b43 Initial load duke parents: diff changeset	478	i += 1;
90ce3da70b43 Initial load duke parents: diff changeset	479	continue;
90ce3da70b43 Initial load duke parents: diff changeset	480	}
90ce3da70b43 Initial load duke parents: diff changeset	481
90ce3da70b43 Initial load duke parents: diff changeset	482	switch (charClass & KhmerClassTable::CF_POS_MASK) {
90ce3da70b43 Initial load duke parents: diff changeset	483	case KhmerClassTable::CF_POS_ABOVE :
90ce3da70b43 Initial load duke parents: diff changeset	484	output.writeChar(chars[i], i, tagAbvf);
90ce3da70b43 Initial load duke parents: diff changeset	485	break;
90ce3da70b43 Initial load duke parents: diff changeset	486
90ce3da70b43 Initial load duke parents: diff changeset	487	case KhmerClassTable::CF_POS_AFTER :
90ce3da70b43 Initial load duke parents: diff changeset	488	output.writeChar(chars[i], i, tagPstf);
90ce3da70b43 Initial load duke parents: diff changeset	489	break;
90ce3da70b43 Initial load duke parents: diff changeset	490
90ce3da70b43 Initial load duke parents: diff changeset	491	case KhmerClassTable::CF_POS_BELOW :
90ce3da70b43 Initial load duke parents: diff changeset	492	output.writeChar(chars[i], i, tagBlwf);
90ce3da70b43 Initial load duke parents: diff changeset	493	break;
90ce3da70b43 Initial load duke parents: diff changeset	494
90ce3da70b43 Initial load duke parents: diff changeset	495	default:
90ce3da70b43 Initial load duke parents: diff changeset	496	// assign the correct flags to a coeng consonant
90ce3da70b43 Initial load duke parents: diff changeset	497	// Consonants of type 3 are taged as Post forms and those type 1 as below forms
90ce3da70b43 Initial load duke parents: diff changeset	498	if ( (charClass & KhmerClassTable::CF_COENG) && i + 1 < syllable ) {
90ce3da70b43 Initial load duke parents: diff changeset	499	if ( (classTable->getCharClass(chars[i + 1]) & KhmerClassTable::CF_CLASS_MASK)
90ce3da70b43 Initial load duke parents: diff changeset	500	== KhmerClassTable::CC_CONSONANT3) {
90ce3da70b43 Initial load duke parents: diff changeset	501	output.writeChar(chars[i], i, tagPstf);
90ce3da70b43 Initial load duke parents: diff changeset	502	i += 1;
90ce3da70b43 Initial load duke parents: diff changeset	503	output.writeChar(chars[i], i, tagPstf);
90ce3da70b43 Initial load duke parents: diff changeset	504	}
90ce3da70b43 Initial load duke parents: diff changeset	505	else {
90ce3da70b43 Initial load duke parents: diff changeset	506	output.writeChar(chars[i], i, tagBlwf);
90ce3da70b43 Initial load duke parents: diff changeset	507	i += 1;
90ce3da70b43 Initial load duke parents: diff changeset	508	output.writeChar(chars[i], i, tagBlwf);
90ce3da70b43 Initial load duke parents: diff changeset	509	}
90ce3da70b43 Initial load duke parents: diff changeset	510	break;
90ce3da70b43 Initial load duke parents: diff changeset	511	}
90ce3da70b43 Initial load duke parents: diff changeset	512	// if a shifter is followed by an above vowel change the shifter to below form,
90ce3da70b43 Initial load duke parents: diff changeset	513	// an above vowel can have two possible positions i + 1 or i + 3
90ce3da70b43 Initial load duke parents: diff changeset	514	// (position i+1 corresponds to unicode 3, position i+3 to Unicode 4)
90ce3da70b43 Initial load duke parents: diff changeset	515	// and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT also for two
90ce3da70b43 Initial load duke parents: diff changeset	516	// different positions, right after the shifter or after a vowel (Unicode 4)
90ce3da70b43 Initial load duke parents: diff changeset	517	if ( (charClass & KhmerClassTable::CF_SHIFTER) && (i + 1 < syllable) ) {
90ce3da70b43 Initial load duke parents: diff changeset	518	if (classTable->getCharClass(chars[i + 1]) & KhmerClassTable::CF_ABOVE_VOWEL ) {
90ce3da70b43 Initial load duke parents: diff changeset	519	output.writeChar(chars[i], i, tagBlwf);
90ce3da70b43 Initial load duke parents: diff changeset	520	break;
90ce3da70b43 Initial load duke parents: diff changeset	521	}
90ce3da70b43 Initial load duke parents: diff changeset	522	if (i + 2 < syllable &&
90ce3da70b43 Initial load duke parents: diff changeset	523	( (classTable->getCharClass(chars[i + 1]) &
90ce3da70b43 Initial load duke parents: diff changeset	524	KhmerClassTable::CF_CLASS_MASK) == C_VOWEL_AA) &&
90ce3da70b43 Initial load duke parents: diff changeset	525	( (classTable->getCharClass(chars[i + 2]) &
90ce3da70b43 Initial load duke parents: diff changeset	526	KhmerClassTable::CF_CLASS_MASK) == C_SIGN_NIKAHIT) )
90ce3da70b43 Initial load duke parents: diff changeset	527	{
90ce3da70b43 Initial load duke parents: diff changeset	528	output.writeChar(chars[i], i, tagBlwf);
90ce3da70b43 Initial load duke parents: diff changeset	529	break;
90ce3da70b43 Initial load duke parents: diff changeset	530	}
90ce3da70b43 Initial load duke parents: diff changeset	531	if (i + 3 < syllable && (classTable->getCharClass(chars[i + 3]) &
90ce3da70b43 Initial load duke parents: diff changeset	532	KhmerClassTable::CF_ABOVE_VOWEL) )
90ce3da70b43 Initial load duke parents: diff changeset	533	{
90ce3da70b43 Initial load duke parents: diff changeset	534	output.writeChar(chars[i], i, tagBlwf);
90ce3da70b43 Initial load duke parents: diff changeset	535	break;
90ce3da70b43 Initial load duke parents: diff changeset	536	}
90ce3da70b43 Initial load duke parents: diff changeset	537	if (i + 4 < syllable &&
90ce3da70b43 Initial load duke parents: diff changeset	538	( (classTable->getCharClass(chars[i + 3]) &
90ce3da70b43 Initial load duke parents: diff changeset	539	KhmerClassTable::CF_CLASS_MASK) == C_VOWEL_AA) &&
90ce3da70b43 Initial load duke parents: diff changeset	540	( (classTable->getCharClass(chars[i + 4]) &
90ce3da70b43 Initial load duke parents: diff changeset	541	KhmerClassTable::CF_CLASS_MASK) == C_SIGN_NIKAHIT) )
90ce3da70b43 Initial load duke parents: diff changeset	542	{
90ce3da70b43 Initial load duke parents: diff changeset	543	output.writeChar(chars[i], i, tagBlwf);
90ce3da70b43 Initial load duke parents: diff changeset	544	break;
90ce3da70b43 Initial load duke parents: diff changeset	545	}
90ce3da70b43 Initial load duke parents: diff changeset	546
90ce3da70b43 Initial load duke parents: diff changeset	547	}
90ce3da70b43 Initial load duke parents: diff changeset	548	// default - any other characters
90ce3da70b43 Initial load duke parents: diff changeset	549	output.writeChar(chars[i], i, tagDefault);
90ce3da70b43 Initial load duke parents: diff changeset	550	break;
90ce3da70b43 Initial load duke parents: diff changeset	551	} // switch
90ce3da70b43 Initial load duke parents: diff changeset	552	} // for
90ce3da70b43 Initial load duke parents: diff changeset	553
90ce3da70b43 Initial load duke parents: diff changeset	554	prev = syllable; // move the pointer to the start of next syllable
90ce3da70b43 Initial load duke parents: diff changeset	555	}
90ce3da70b43 Initial load duke parents: diff changeset	556
90ce3da70b43 Initial load duke parents: diff changeset	557	return output.getOutputIndex();
90ce3da70b43 Initial load duke parents: diff changeset	558	}

author	duke
	Sat, 01 Dec 2007 00:00:00 +0000
changeset 2	90ce3da70b43
child 3935	afcdb712a9c5
permissions	-rw-r--r--