14 |
|
|
15 |
class ClassGenerator: |
class ClassGenerator: |
16 |
|
|
17 |
def GenSingletonsFromList(self, list, numGroups, ramp): |
def GenSingletonsFromList(self, _list, numGroups, ramp): |
18 |
"""Generate a new classification consisting solely of singletons. |
"""Generate a new classification consisting solely of singletons. |
19 |
|
|
20 |
The resulting classification will consist of at most 'numGroups' |
The resulting classification will consist of at most 'numGroups' |
21 |
groups whose group properties ramp between 'prop1' and 'prop2'. There |
groups whose group properties ramp between 'prop1' and 'prop2'. There |
22 |
could be fewer groups if 'list' contains fewer that 'numGroups' items. |
could be fewer groups if '_list' contains fewer that 'numGroups' items. |
23 |
|
|
24 |
list -- any object that implements the iterator interface |
_list -- any object that implements the iterator interface |
25 |
|
|
26 |
numGroups -- how many groups to generate. This can not be |
numGroups -- how many groups to generate. This can not be |
27 |
determined while the classification is being |
determined while the classification is being |
28 |
generated because the stepping values must |
generated because the stepping values must |
29 |
be precalculated to ramp between prop1 and prop2. |
be precalculated to ramp between prop1 and prop2. |
30 |
|
|
31 |
prop1 -- initial group property values |
ramp -- an object which implements the CustomRamp interface |
|
|
|
|
prop2 -- final group property values |
|
32 |
""" |
""" |
33 |
|
|
34 |
clazz = Classification() |
clazz = Classification() |
36 |
|
|
37 |
ramp.SetNumGroups(numGroups) |
ramp.SetNumGroups(numGroups) |
38 |
|
|
39 |
for value, prop in zip(list, ramp): |
for value, prop in zip(_list, ramp): |
40 |
clazz.AppendGroup(ClassGroupSingleton(value, prop)) |
clazz.AppendGroup(ClassGroupSingleton(value, prop)) |
41 |
|
|
42 |
return clazz |
return clazz |
60 |
|
|
61 |
return clazz |
return clazz |
62 |
|
|
63 |
def GenUnifromDistribution(self, min, max, numGroups, |
def GenUniformDistribution(self, min, max, numGroups, |
64 |
ramp, intStep = False): |
ramp, intStep = False): |
65 |
"""Generate a classification with numGroups range groups |
"""Generate a classification with numGroups range groups |
66 |
each with the same interval. |
each with the same interval. |
106 |
return clazz |
return clazz |
107 |
|
|
108 |
|
|
109 |
def GenQuantiles(self, list, percents, ramp, _range): |
def GenQuantiles(self, _list, percents, ramp, _range): |
110 |
|
"""Generates a Classification which has groups of ranges that |
111 |
|
represent quantiles of _list at the percentages given in percents. |
112 |
|
Only the values that fall within _range are considered. |
113 |
|
|
114 |
|
Returns a tuple (adjusted, Classification) where adjusted is |
115 |
|
True if the Classification does not exactly represent the given |
116 |
|
range, or if the Classification is empty. |
117 |
|
|
118 |
|
_list -- a sort list of values |
119 |
|
|
120 |
|
percents -- a sorted list of floats in the range 0.0-1.0 which |
121 |
|
represent the upper bound of each quantile |
122 |
|
|
123 |
|
ramp -- an object which implements the CustomRamp interface |
124 |
|
|
125 |
|
_range -- a Range object |
126 |
|
""" |
127 |
|
|
128 |
clazz = Classification() |
clazz = Classification() |
129 |
quantiles = self.CalculateQuantiles(list, percents, _range) |
quantiles = self.CalculateQuantiles(_list, percents, _range) |
130 |
numGroups = len(quantiles[1]) |
adjusted = True |
|
if numGroups == 0: return clazz |
|
131 |
|
|
132 |
ramp.SetNumGroups(numGroups) |
if quantiles is not None: |
133 |
|
|
134 |
|
numGroups = len(quantiles[3]) |
135 |
|
|
136 |
|
if numGroups != 0: |
137 |
|
|
138 |
left, min, max, right = _range.GetRange() |
adjusted = quantiles[0] |
139 |
|
|
140 |
start = "[" |
ramp.SetNumGroups(numGroups) |
|
oldp = 0 |
|
|
for (q, p), prop in zip(quantiles[1], ramp): |
|
|
max = list[q] |
|
|
group = ClassGroupRange(Range(start + str(min) + ";" + |
|
|
str(max) + "]"), |
|
|
None, prop) |
|
|
|
|
|
group.SetLabel("%s%% - %s%%" % (round(oldp*100, 2), |
|
|
round(p*100, 2))) |
|
|
oldp = p |
|
|
start = "]" |
|
|
min = max |
|
|
clazz.AppendGroup(group) |
|
141 |
|
|
142 |
return (quantiles[0], clazz) |
start, min, endMax, right = _range.GetRange() |
143 |
|
|
144 |
def CalculateQuantiles(self, list, percents, _range): |
if str(min) == '-inf': |
145 |
"""Calculate quantiles for the given list of percents from the |
min = "-oo" |
146 |
|
elif str(min) == 'inf': |
147 |
|
min = "oo" |
148 |
|
|
149 |
|
if str(endMax) == '-inf': |
150 |
|
endMax = "-oo" |
151 |
|
elif str(endMax) == 'inf': |
152 |
|
endMax = "oo" |
153 |
|
|
154 |
|
oldp = 0 |
155 |
|
i = 1 |
156 |
|
end = "]" |
157 |
|
|
158 |
|
for (q, p), prop in zip(quantiles[3], ramp): |
159 |
|
if i == numGroups: |
160 |
|
max = endMax |
161 |
|
end = right |
162 |
|
else: |
163 |
|
max = _list[q] |
164 |
|
|
165 |
|
group = ClassGroupRange(Range(start + str(min) + ";" + |
166 |
|
str(max) + end), |
167 |
|
None, prop) |
168 |
|
|
169 |
|
group.SetLabel("%s%% - %s%%" % (round(oldp*100, 2), |
170 |
|
round(p*100, 2))) |
171 |
|
oldp = p |
172 |
|
start = "]" |
173 |
|
min = max |
174 |
|
clazz.AppendGroup(group) |
175 |
|
i += 1 |
176 |
|
|
177 |
|
return (adjusted, clazz) |
178 |
|
|
179 |
|
def CalculateQuantiles(self, _list, percents, _range): |
180 |
|
"""Calculate quantiles for the given _list of percents from the |
181 |
sorted list of values that are in range. |
sorted list of values that are in range. |
182 |
|
|
183 |
percents is a sorted list of floats in the range 0.0-1.0 |
This may not actually generate len(percents) quantiles if |
|
|
|
|
This may not actually generate numGroups quantiles if |
|
184 |
many of the values that fall on quantile borders are the same. |
many of the values that fall on quantile borders are the same. |
185 |
|
|
186 |
Returns a tuple of the form: (adjusted, [quantile_list]) |
Returns a tuple of the form: |
187 |
|
(adjusted, minIndex, maxIndex, [quantile_list]) |
188 |
|
|
189 |
|
where adjusted is True if the the quantile percentages differ from |
190 |
|
those supplied, minIndex is the index into _list where the |
191 |
|
minimum value used is located, maxIndex is the index into _list |
192 |
|
where the maximum value used is located, and quantile_list is a |
193 |
|
list of tuples of the form: (list_index, quantile_percentage) |
194 |
|
|
195 |
|
Returns None, if no quantiles could be generated based on the |
196 |
|
given range or input list. |
197 |
|
|
198 |
where adjusted is true if the the quantile percentages differ from |
_list -- a sort list of values |
199 |
those supplied, and quantile_list is a list of tuples of the form: |
|
200 |
(list_index, quantile_percentage) |
percents -- a sorted list of floats in the range 0.0-1.0 which |
201 |
|
represent the upper bound of each quantile |
202 |
|
|
203 |
|
_range -- a Range object |
204 |
""" |
""" |
205 |
|
|
206 |
quantiles = [] |
quantiles = [] |
|
|
|
207 |
adjusted = False |
adjusted = False |
208 |
|
|
209 |
if len(percents) != 0: |
if len(percents) != 0: |
210 |
|
|
211 |
# |
# |
212 |
# find what part of the list range covers |
# find what part of the _list range covers |
213 |
# |
# |
214 |
minIndex = -1 |
minIndex = -1 |
215 |
maxIndex = -2 |
maxIndex = -2 |
216 |
for i in xrange(0, len(list), 1): |
for i in xrange(0, len(_list), 1): |
217 |
if operator.contains(_range, list[i]): |
if operator.contains(_range, _list[i]): |
218 |
minIndex = i |
minIndex = i |
219 |
break |
break |
220 |
|
|
221 |
for i in xrange(len(list)-1, -1, -1): |
for i in xrange(len(_list)-1, -1, -1): |
222 |
if operator.contains(_range, list[i]): |
if operator.contains(_range, _list[i]): |
223 |
maxIndex = i |
maxIndex = i |
224 |
break; |
break |
225 |
|
|
226 |
numValues = maxIndex - minIndex + 1 |
numValues = maxIndex - minIndex + 1 |
227 |
if minIndex <= maxIndex: |
|
228 |
|
if numValues > 0: |
229 |
|
|
230 |
# |
# |
231 |
# build a list of unique indices into list of where each |
# build a list of unique indices into list of where each |
251 |
# |
# |
252 |
lowerBound = minIndex - 1 |
lowerBound = minIndex - 1 |
253 |
|
|
254 |
for qindex in range(len(quantiles)): |
for qindex in xrange(len(quantiles)): |
255 |
if lowerBound >= maxIndex: |
if lowerBound >= maxIndex: |
256 |
# discard higher quantiles |
# discard higher quantiles |
257 |
quantiles = quantiles[:qindex] |
quantiles = quantiles[:qindex] |
264 |
# if it currently falls below the lowerBound |
# if it currently falls below the lowerBound |
265 |
# |
# |
266 |
if quantiles[qindex] <= lowerBound: |
if quantiles[qindex] <= lowerBound: |
267 |
quantiles[qindex] = min(lowerBound + 1, maxIndex) |
quantiles[qindex] = lowerBound + 1 |
268 |
|
|
269 |
listIndex = quantiles[qindex] |
listIndex = quantiles[qindex] |
270 |
value = list[quantiles[qindex]] |
value = _list[listIndex] |
271 |
|
|
272 |
# |
# |
273 |
# look for similar values around the quantile index |
# look for similar values around the quantile index |
274 |
# |
# |
275 |
lindex = listIndex - 1 |
lindex = listIndex - 1 |
276 |
lcount = 0 |
while lindex > lowerBound and value == _list[lindex]: |
|
while lindex > lowerBound: |
|
|
if value != list[lindex]: break |
|
|
lcount += 1 |
|
277 |
lindex -= 1 |
lindex -= 1 |
278 |
|
lcount = (listIndex - 1) - lindex |
279 |
|
|
280 |
rindex = listIndex + 1 |
rindex = listIndex + 1 |
281 |
rcount = 0 |
while rindex < maxIndex + 1 and value == _list[rindex]: |
|
while rindex < maxIndex + 1: |
|
|
if value != list[rindex]: break |
|
|
rcount += 1 |
|
282 |
rindex += 1 |
rindex += 1 |
283 |
|
rcount = (listIndex + 1) - rindex |
284 |
|
|
285 |
# |
# |
286 |
# adjust the current quantile index based on how many |
# adjust the current quantile index based on how many |
287 |
# numbers in the list are the same as the current value |
# numbers in the _list are the same as the current value |
288 |
# |
# |
289 |
newIndex = listIndex |
newIndex = listIndex |
290 |
if lcount == rcount: |
if lcount == rcount: |
312 |
# there are fewer items to the right, so go to the right |
# there are fewer items to the right, so go to the right |
313 |
newIndex = rindex - 1 |
newIndex = rindex - 1 |
314 |
|
|
315 |
|
adjusted = adjusted or newIndex != listIndex |
316 |
|
|
317 |
quantiles[qindex] = newIndex |
quantiles[qindex] = newIndex |
318 |
lowerBound = quantiles[qindex] |
lowerBound = quantiles[qindex] |
319 |
|
|
322 |
# successful, an empty list will be generated in the case that |
# successful, an empty list will be generated in the case that |
323 |
# we fail to get to the real body of the algorithm |
# we fail to get to the real body of the algorithm |
324 |
# |
# |
325 |
return (adjusted, |
if len(quantiles) == 0: |
326 |
[(q, (q - minIndex+1) / float(numValues)) for q in quantiles]) |
return None |
327 |
|
else: |
328 |
|
return (adjusted, minIndex, maxIndex, |
329 |
|
[(q, (q - minIndex+1) / float(numValues)) \ |
330 |
|
for q in quantiles]) |
331 |
|
|
332 |
CLR = 0 |
CLR = 0 |
333 |
STEP = 1 |
STEP = 1 |