14 |
|
|
15 |
class ClassGenerator: |
class ClassGenerator: |
16 |
|
|
17 |
def GenSingletonsFromList(self, list, numGroups, ramp): |
def GenSingletonsFromList(self, _list, numGroups, ramp): |
18 |
"""Generate a new classification consisting solely of singletons. |
"""Generate a new classification consisting solely of singletons. |
19 |
|
|
20 |
The resulting classification will consist of at most 'numGroups' |
The resulting classification will consist of at most 'numGroups' |
21 |
groups whose group properties ramp between 'prop1' and 'prop2'. There |
groups whose group properties ramp between 'prop1' and 'prop2'. There |
22 |
could be fewer groups if 'list' contains fewer that 'numGroups' items. |
could be fewer groups if '_list' contains fewer that 'numGroups' items. |
23 |
|
|
24 |
list -- any object that implements the iterator interface |
_list -- any object that implements the iterator interface |
25 |
|
|
26 |
numGroups -- how many groups to generate. This can not be |
numGroups -- how many groups to generate. This can not be |
27 |
determined while the classification is being |
determined while the classification is being |
28 |
generated because the stepping values must |
generated because the stepping values must |
29 |
be precalculated to ramp between prop1 and prop2. |
be precalculated to ramp between prop1 and prop2. |
30 |
|
|
31 |
prop1 -- initial group property values |
ramp -- an object which implements the CustomRamp interface |
|
|
|
|
prop2 -- final group property values |
|
32 |
""" |
""" |
33 |
|
|
34 |
clazz = Classification() |
clazz = Classification() |
36 |
|
|
37 |
ramp.SetNumGroups(numGroups) |
ramp.SetNumGroups(numGroups) |
38 |
|
|
39 |
for value, prop in zip(list, ramp): |
for value, prop in zip(_list, ramp): |
40 |
clazz.AppendGroup(ClassGroupSingleton(value, prop)) |
clazz.AppendGroup(ClassGroupSingleton(value, prop)) |
41 |
|
|
42 |
return clazz |
return clazz |
60 |
|
|
61 |
return clazz |
return clazz |
62 |
|
|
63 |
def GenUnifromDistribution(self, min, max, numGroups, |
def GenUniformDistribution(self, min, max, numGroups, |
64 |
ramp, intStep = False): |
ramp, intStep = False): |
65 |
"""Generate a classification with numGroups range groups |
"""Generate a classification with numGroups range groups |
66 |
each with the same interval. |
each with the same interval. |
106 |
return clazz |
return clazz |
107 |
|
|
108 |
|
|
109 |
def GenQuantiles(self, list, percents, ramp, _range): |
def GenQuantiles(self, _list, percents, ramp, _range): |
110 |
|
"""Generates a Classification which has groups of ranges that |
111 |
|
represent quantiles of _list at the percentages given in percents. |
112 |
|
Only the values that fall within _range are considered. |
113 |
|
|
114 |
|
Returns a tuple (adjusted, Classification) where adjusted is |
115 |
|
True if the Classification does not exactly represent the given |
116 |
|
range, or if the Classification is empty. |
117 |
|
|
118 |
|
_list -- a sort list of values |
119 |
|
|
120 |
|
percents -- a sorted list of floats in the range 0.0-1.0 which |
121 |
|
represent the upper bound of each quantile |
122 |
|
|
123 |
|
ramp -- an object which implements the CustomRamp interface |
124 |
|
|
125 |
|
_range -- a Range object |
126 |
|
""" |
127 |
|
|
128 |
clazz = Classification() |
clazz = Classification() |
129 |
quantiles = self.CalculateQuantiles(list, percents, _range) |
quantiles = self.CalculateQuantiles(_list, percents, _range) |
130 |
numGroups = len(quantiles[1]) |
adjusted = True |
|
if numGroups == 0: return clazz |
|
131 |
|
|
132 |
ramp.SetNumGroups(numGroups) |
if quantiles is not None: |
133 |
|
|
134 |
|
numGroups = len(quantiles[3]) |
135 |
|
|
136 |
|
if numGroups != 0: |
137 |
|
|
138 |
|
adjusted = quantiles[0] |
139 |
|
|
140 |
|
ramp.SetNumGroups(numGroups) |
141 |
|
|
142 |
left, min, max, right = _range.GetRange() |
start, min, endMax, right = _range.GetRange() |
143 |
|
|
144 |
start = "[" |
oldp = 0 |
145 |
oldp = 0 |
i = 1 |
146 |
for (q, p), prop in zip(quantiles[1], ramp): |
end = "]" |
147 |
max = list[q] |
|
148 |
group = ClassGroupRange(Range(start + str(min) + ";" + |
for (q, p), prop in zip(quantiles[3], ramp): |
149 |
str(max) + "]"), |
if i == numGroups: |
150 |
None, prop) |
max = endMax |
151 |
|
end = right |
152 |
group.SetLabel("%s%% - %s%%" % (round(oldp*100, 2), |
else: |
153 |
round(p*100, 2))) |
max = _list[q] |
154 |
oldp = p |
|
155 |
start = "]" |
group = ClassGroupRange(Range((start, min, max, end)), |
156 |
min = max |
None, prop) |
157 |
clazz.AppendGroup(group) |
|
158 |
|
group.SetLabel("%s%% - %s%%" % (round(oldp*100, 2), |
159 |
|
round(p*100, 2))) |
160 |
|
oldp = p |
161 |
|
start = "]" |
162 |
|
min = max |
163 |
|
clazz.AppendGroup(group) |
164 |
|
i += 1 |
165 |
|
|
166 |
return (quantiles[0], clazz) |
return (adjusted, clazz) |
167 |
|
|
168 |
def CalculateQuantiles(self, list, percents, _range): |
def CalculateQuantiles(self, _list, percents, _range): |
169 |
"""Calculate quantiles for the given list of percents from the |
"""Calculate quantiles for the given _list of percents from the |
170 |
sorted list of values that are in range. |
sorted list of values that are in range. |
171 |
|
|
172 |
percents is a sorted list of floats in the range 0.0-1.0 |
This may not actually generate len(percents) quantiles if |
|
|
|
|
This may not actually generate numGroups quantiles if |
|
173 |
many of the values that fall on quantile borders are the same. |
many of the values that fall on quantile borders are the same. |
174 |
|
|
175 |
Returns a tuple of the form: (adjusted, [quantile_list]) |
Returns a tuple of the form: |
176 |
|
(adjusted, minIndex, maxIndex, [quantile_list]) |
177 |
|
|
178 |
|
where adjusted is True if the the quantile percentages differ from |
179 |
|
those supplied, minIndex is the index into _list where the |
180 |
|
minimum value used is located, maxIndex is the index into _list |
181 |
|
where the maximum value used is located, and quantile_list is a |
182 |
|
list of tuples of the form: (list_index, quantile_percentage) |
183 |
|
|
184 |
|
Returns None, if no quantiles could be generated based on the |
185 |
|
given range or input list. |
186 |
|
|
187 |
|
_list -- a sort list of values |
188 |
|
|
189 |
|
percents -- a sorted list of floats in the range 0.0-1.0 which |
190 |
|
represent the upper bound of each quantile |
191 |
|
|
192 |
where adjusted is true if the the quantile percentages differ from |
_range -- a Range object |
|
those supplied, and quantile_list is a list of tuples of the form: |
|
|
(list_index, quantile_percentage) |
|
193 |
""" |
""" |
194 |
|
|
195 |
quantiles = [] |
quantiles = [] |
|
|
|
196 |
adjusted = False |
adjusted = False |
197 |
|
|
198 |
if len(percents) != 0: |
if len(percents) != 0: |
199 |
|
|
|
#print "list:", list |
|
|
#print "percents:", percents |
|
|
#print "numGroups:", numGroups |
|
|
|
|
200 |
# |
# |
201 |
# find what part of the list range covers |
# find what part of the _list range covers |
202 |
# |
# |
203 |
minIndex = -1 |
minIndex = -1 |
204 |
maxIndex = -2 |
maxIndex = -2 |
205 |
for i in xrange(0, len(list), 1): |
for i in xrange(0, len(_list), 1): |
206 |
if operator.contains(_range, list[i]): |
if operator.contains(_range, _list[i]): |
207 |
minIndex = i |
minIndex = i |
208 |
break |
break |
209 |
|
|
210 |
for i in xrange(len(list)-1, -1, -1): |
for i in xrange(len(_list)-1, -1, -1): |
211 |
if operator.contains(_range, list[i]): |
if operator.contains(_range, _list[i]): |
212 |
maxIndex = i |
maxIndex = i |
213 |
break; |
break |
214 |
|
|
215 |
numValues = maxIndex - minIndex + 1 |
numValues = maxIndex - minIndex + 1 |
216 |
#print "minIndex:", minIndex, "maxIndex:", maxIndex |
|
217 |
if minIndex <= maxIndex: |
if numValues > 0: |
218 |
|
|
219 |
# |
# |
220 |
# build a list of unique indices into list of where each |
# build a list of unique indices into list of where each |
221 |
# quantile should be |
# quantile *should* be. set adjusted if the resulting |
222 |
|
# indices are different |
223 |
# |
# |
224 |
quantiles = {} |
quantiles = {} |
225 |
for p in percents: |
for p in percents: |
227 |
|
|
228 |
adjusted = adjusted \ |
adjusted = adjusted \ |
229 |
or quantiles.has_key(index) \ |
or quantiles.has_key(index) \ |
230 |
or ((index+1) / float(numValues)) != p |
or ((index - minIndex + 1) / float(numValues)) != p |
231 |
|
|
232 |
quantiles[index] = 0 |
quantiles[index] = 0 |
233 |
|
|
234 |
quantiles = quantiles.keys() |
quantiles = quantiles.keys() |
235 |
quantiles.sort() |
quantiles.sort() |
236 |
|
|
|
#print "quantiles:", quantiles |
|
|
|
|
237 |
# |
# |
238 |
# the current quantile index must be strictly greater than |
# the current quantile index must be strictly greater than |
239 |
# the lowerBound |
# the lowerBound |
240 |
# |
# |
241 |
lowerBound = minIndex - 1 |
lowerBound = minIndex - 1 |
242 |
|
|
243 |
for qindex in range(len(quantiles)): |
for qindex in xrange(len(quantiles)): |
244 |
if lowerBound >= maxIndex: |
if lowerBound >= maxIndex: |
245 |
# discard higher quantiles |
# discard higher quantiles |
246 |
quantiles = quantiles[:qindex] |
quantiles = quantiles[:qindex] |
248 |
|
|
249 |
# lowerBound + 1 is always a valid index |
# lowerBound + 1 is always a valid index |
250 |
|
|
|
|
|
251 |
# |
# |
252 |
# bump up the current quantile index to be a usable index |
# bump up the current quantile index to be a usable index |
253 |
# if it currently falls below the lowerBound |
# if it currently falls below the lowerBound |
254 |
# |
# |
255 |
if quantiles[qindex] <= lowerBound: |
if quantiles[qindex] <= lowerBound: |
256 |
quantiles[qindex] = min(lowerBound + 1, maxIndex) |
quantiles[qindex] = lowerBound + 1 |
257 |
|
|
258 |
listIndex = quantiles[qindex] |
listIndex = quantiles[qindex] |
259 |
value = list[quantiles[qindex]] |
value = _list[listIndex] |
|
|
|
|
#print "-----------------------------------" |
|
|
#print "listIndex:", listIndex |
|
|
#print "value:", value |
|
260 |
|
|
261 |
# |
# |
262 |
# look for similar values around the quantile index |
# look for similar values around the quantile index |
263 |
# |
# |
|
|
|
|
#print "lowerBound:", lowerBound |
|
264 |
lindex = listIndex - 1 |
lindex = listIndex - 1 |
265 |
lcount = 0 |
while lindex > lowerBound and value == _list[lindex]: |
|
while lindex > lowerBound: |
|
|
if value != list[lindex]: break |
|
|
lcount += 1 |
|
266 |
lindex -= 1 |
lindex -= 1 |
267 |
|
lcount = (listIndex - 1) - lindex |
268 |
|
|
269 |
rindex = listIndex + 1 |
rindex = listIndex + 1 |
270 |
rcount = 0 |
while rindex < maxIndex + 1 and value == _list[rindex]: |
|
while rindex < maxIndex + 1: |
|
|
if value != list[rindex]: break |
|
|
rcount += 1 |
|
271 |
rindex += 1 |
rindex += 1 |
272 |
|
rcount = (listIndex + 1) - rindex |
|
#print lcount, "(lcount)", rcount, "(rcount)" |
|
|
#print lindex, "(lindex)", rindex, "(rindex)" |
|
273 |
|
|
274 |
# |
# |
275 |
# adjust the current quantile index based on how many |
# adjust the current quantile index based on how many |
276 |
# numbers in the list are the same as the current value |
# numbers in the _list are the same as the current value |
277 |
# |
# |
278 |
newIndex = listIndex |
newIndex = listIndex |
279 |
if lcount == rcount: |
if lcount == rcount: |
285 |
# |
# |
286 |
if lindex != lowerBound: |
if lindex != lowerBound: |
287 |
newIndex = lindex |
newIndex = lindex |
|
#quantiles[qindex] = lindex |
|
288 |
else: |
else: |
289 |
newIndex = rindex - 1 |
newIndex = rindex - 1 |
|
#quantiles[qindex] = rindex - 1 |
|
290 |
|
|
291 |
elif lcount < rcount: |
elif lcount < rcount: |
292 |
# there are fewer items to the left, so |
# there are fewer items to the left, so |
294 |
# doing so creates an empty quantile. |
# doing so creates an empty quantile. |
295 |
if lindex != lowerBound: |
if lindex != lowerBound: |
296 |
newIndex = lindex |
newIndex = lindex |
|
#quantiles[qindex] = lindex |
|
297 |
else: |
else: |
298 |
newIndex = rindex - 1 |
newIndex = rindex - 1 |
|
#quantiles[qindex] = rindex - 1 |
|
299 |
|
|
300 |
elif rcount < lcount: |
elif rcount < lcount: |
301 |
# there are fewer items to the right, so go to the right |
# there are fewer items to the right, so go to the right |
|
#quantiles[qindex] = rindex - 1 |
|
302 |
newIndex = rindex - 1 |
newIndex = rindex - 1 |
303 |
|
|
304 |
|
adjusted = adjusted or newIndex != listIndex |
305 |
|
|
306 |
quantiles[qindex] = newIndex |
quantiles[qindex] = newIndex |
307 |
lowerBound = quantiles[qindex] |
lowerBound = quantiles[qindex] |
308 |
|
|
309 |
#print "quantiles:", quantiles |
# |
310 |
#print "lowerBound:", lowerBound |
# since quantiles is only set if the code is at least a little |
311 |
|
# successful, an empty list will be generated in the case that |
312 |
#print "=================================" |
# we fail to get to the real body of the algorithm |
313 |
#print "quantiles:", quantiles |
# |
314 |
#print "qindex:", qindex |
if len(quantiles) == 0: |
315 |
|
return None |
316 |
return (adjusted, [(q, (q+1) / float(numValues)) for q in quantiles]) |
else: |
317 |
|
return (adjusted, minIndex, maxIndex, |
318 |
|
[(q, (q - minIndex+1) / float(numValues)) \ |
319 |
|
for q in quantiles]) |
320 |
|
|
321 |
CLR = 0 |
CLR = 0 |
322 |
STEP = 1 |
STEP = 1 |