48274
|
1 |
/*
|
|
2 |
* Copyright © 2012 Google, Inc.
|
|
3 |
*
|
|
4 |
* This is part of HarfBuzz, a text shaping library.
|
|
5 |
*
|
|
6 |
* Permission is hereby granted, without written agreement and without
|
|
7 |
* license or royalty fees, to use, copy, modify, and distribute this
|
|
8 |
* software and its documentation for any purpose, provided that the
|
|
9 |
* above copyright notice and the following two paragraphs appear in
|
|
10 |
* all copies of this software.
|
|
11 |
*
|
|
12 |
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
|
|
13 |
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
|
14 |
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
|
|
15 |
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
|
16 |
* DAMAGE.
|
|
17 |
*
|
|
18 |
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
|
|
19 |
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
20 |
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
|
21 |
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
|
|
22 |
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
|
23 |
*
|
|
24 |
* Google Author(s): Behdad Esfahbod
|
|
25 |
*/
|
|
26 |
|
54232
|
27 |
#ifndef HB_SET_DIGEST_HH
|
|
28 |
#define HB_SET_DIGEST_HH
|
48274
|
29 |
|
54232
|
30 |
#include "hb.hh"
|
48274
|
31 |
|
|
32 |
/*
|
|
33 |
* The set digests here implement various "filters" that support
|
|
34 |
* "approximate member query". Conceptually these are like Bloom
|
|
35 |
* Filter and Quotient Filter, however, much smaller, faster, and
|
|
36 |
* designed to fit the requirements of our uses for glyph coverage
|
|
37 |
* queries.
|
|
38 |
*
|
|
39 |
* Our filters are highly accurate if the lookup covers fairly local
|
|
40 |
* set of glyphs, but fully flooded and ineffective if coverage is
|
|
41 |
* all over the place.
|
|
42 |
*
|
|
43 |
* The frozen-set can be used instead of a digest, to trade more
|
|
44 |
* memory for 100% accuracy, but in practice, that doesn't look like
|
|
45 |
* an attractive trade-off.
|
|
46 |
*/
|
|
47 |
|
|
48 |
template <typename mask_t, unsigned int shift>
|
|
49 |
struct hb_set_digest_lowest_bits_t
|
|
50 |
{
|
54232
|
51 |
static constexpr unsigned mask_bytes = sizeof (mask_t);
|
|
52 |
static constexpr unsigned mask_bits = sizeof (mask_t) * 8;
|
|
53 |
static constexpr unsigned num_bits = 0
|
48274
|
54 |
+ (mask_bytes >= 1 ? 3 : 0)
|
|
55 |
+ (mask_bytes >= 2 ? 1 : 0)
|
|
56 |
+ (mask_bytes >= 4 ? 1 : 0)
|
|
57 |
+ (mask_bytes >= 8 ? 1 : 0)
|
|
58 |
+ (mask_bytes >= 16? 1 : 0)
|
|
59 |
+ 0;
|
|
60 |
|
|
61 |
static_assert ((shift < sizeof (hb_codepoint_t) * 8), "");
|
|
62 |
static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), "");
|
|
63 |
|
54232
|
64 |
void init () { mask = 0; }
|
48274
|
65 |
|
54232
|
66 |
void add (hb_codepoint_t g) { mask |= mask_for (g); }
|
48274
|
67 |
|
54232
|
68 |
bool add_range (hb_codepoint_t a, hb_codepoint_t b)
|
|
69 |
{
|
48274
|
70 |
if ((b >> shift) - (a >> shift) >= mask_bits - 1)
|
|
71 |
mask = (mask_t) -1;
|
|
72 |
else {
|
|
73 |
mask_t ma = mask_for (a);
|
|
74 |
mask_t mb = mask_for (b);
|
|
75 |
mask |= mb + (mb - ma) - (mb < ma);
|
|
76 |
}
|
50352
|
77 |
return true;
|
|
78 |
}
|
|
79 |
|
|
80 |
template <typename T>
|
54232
|
81 |
void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
|
50352
|
82 |
{
|
|
83 |
for (unsigned int i = 0; i < count; i++)
|
|
84 |
{
|
|
85 |
add (*array);
|
|
86 |
array = (const T *) (stride + (const char *) array);
|
|
87 |
}
|
|
88 |
}
|
|
89 |
template <typename T>
|
54232
|
90 |
bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
|
50352
|
91 |
{
|
|
92 |
for (unsigned int i = 0; i < count; i++)
|
|
93 |
{
|
|
94 |
add (*array);
|
|
95 |
array = (const T *) (stride + (const char *) array);
|
|
96 |
}
|
|
97 |
return true;
|
48274
|
98 |
}
|
|
99 |
|
54232
|
100 |
bool may_have (hb_codepoint_t g) const
|
|
101 |
{ return !!(mask & mask_for (g)); }
|
48274
|
102 |
|
|
103 |
private:
|
|
104 |
|
54232
|
105 |
static mask_t mask_for (hb_codepoint_t g)
|
|
106 |
{ return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); }
|
48274
|
107 |
mask_t mask;
|
|
108 |
};
|
|
109 |
|
|
110 |
template <typename head_t, typename tail_t>
|
|
111 |
struct hb_set_digest_combiner_t
|
|
112 |
{
|
54232
|
113 |
void init ()
|
|
114 |
{
|
48274
|
115 |
head.init ();
|
|
116 |
tail.init ();
|
|
117 |
}
|
|
118 |
|
54232
|
119 |
void add (hb_codepoint_t g)
|
|
120 |
{
|
48274
|
121 |
head.add (g);
|
|
122 |
tail.add (g);
|
|
123 |
}
|
|
124 |
|
54232
|
125 |
bool add_range (hb_codepoint_t a, hb_codepoint_t b)
|
|
126 |
{
|
48274
|
127 |
head.add_range (a, b);
|
|
128 |
tail.add_range (a, b);
|
50352
|
129 |
return true;
|
|
130 |
}
|
|
131 |
template <typename T>
|
54232
|
132 |
void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
|
50352
|
133 |
{
|
|
134 |
head.add_array (array, count, stride);
|
|
135 |
tail.add_array (array, count, stride);
|
|
136 |
}
|
|
137 |
template <typename T>
|
54232
|
138 |
bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
|
50352
|
139 |
{
|
|
140 |
head.add_sorted_array (array, count, stride);
|
|
141 |
tail.add_sorted_array (array, count, stride);
|
|
142 |
return true;
|
48274
|
143 |
}
|
|
144 |
|
54232
|
145 |
bool may_have (hb_codepoint_t g) const
|
|
146 |
{
|
48274
|
147 |
return head.may_have (g) && tail.may_have (g);
|
|
148 |
}
|
|
149 |
|
|
150 |
private:
|
|
151 |
head_t head;
|
|
152 |
tail_t tail;
|
|
153 |
};
|
|
154 |
|
|
155 |
|
|
156 |
/*
|
|
157 |
* hb_set_digest_t
|
|
158 |
*
|
|
159 |
* This is a combination of digests that performs "best".
|
|
160 |
* There is not much science to this: it's a result of intuition
|
|
161 |
* and testing.
|
|
162 |
*/
|
|
163 |
typedef hb_set_digest_combiner_t
|
|
164 |
<
|
|
165 |
hb_set_digest_lowest_bits_t<unsigned long, 4>,
|
|
166 |
hb_set_digest_combiner_t
|
|
167 |
<
|
|
168 |
hb_set_digest_lowest_bits_t<unsigned long, 0>,
|
|
169 |
hb_set_digest_lowest_bits_t<unsigned long, 9>
|
|
170 |
>
|
|
171 |
> hb_set_digest_t;
|
|
172 |
|
|
173 |
|
54232
|
174 |
#endif /* HB_SET_DIGEST_HH */
|