14 |
|
|
15 |
class ClassGenerator: |
class ClassGenerator: |
16 |
|
|
17 |
def GenSingletonsFromList(self, list, numGroups, ramp): |
def GenSingletonsFromList(self, _list, numGroups, ramp): |
18 |
"""Generate a new classification consisting solely of singletons. |
"""Generate a new classification consisting solely of singletons. |
19 |
|
|
20 |
The resulting classification will consist of at most 'numGroups' |
The resulting classification will consist of at most 'numGroups' |
21 |
groups whose group properties ramp between 'prop1' and 'prop2'. There |
groups whose group properties ramp between 'prop1' and 'prop2'. There |
22 |
could be fewer groups if 'list' contains fewer that 'numGroups' items. |
could be fewer groups if '_list' contains fewer that 'numGroups' items. |
23 |
|
|
24 |
list -- any object that implements the iterator interface |
_list -- any object that implements the iterator interface |
25 |
|
|
26 |
numGroups -- how many groups to generate. This can not be |
numGroups -- how many groups to generate. This can not be |
27 |
determined while the classification is being |
determined while the classification is being |
38 |
|
|
39 |
ramp.SetNumGroups(numGroups) |
ramp.SetNumGroups(numGroups) |
40 |
|
|
41 |
for value, prop in zip(list, ramp): |
for value, prop in zip(_list, ramp): |
42 |
clazz.AppendGroup(ClassGroupSingleton(value, prop)) |
clazz.AppendGroup(ClassGroupSingleton(value, prop)) |
43 |
|
|
44 |
return clazz |
return clazz |
108 |
return clazz |
return clazz |
109 |
|
|
110 |
|
|
111 |
def GenQuantiles(self, list, percents, ramp, _range): |
def GenQuantiles(self, _list, percents, ramp, _range): |
112 |
|
"""Generates a Classification which has groups of ranges that |
113 |
|
represent quantiles of _list at the percentages given in percents. |
114 |
|
Only the values that fall within _range are considered. |
115 |
|
|
116 |
|
Returns a tuple (adjusted, Classification) where adjusted is |
117 |
|
True if the Classification does not exactly represent the given |
118 |
|
range, or if the Classification is empty. |
119 |
|
|
120 |
|
_list -- a sort list of values |
121 |
|
|
122 |
|
percents -- a sorted list of floats in the range 0.0-1.0 which |
123 |
|
represent the upper bound of each quantile |
124 |
|
|
125 |
|
ramp -- an object which implements the CustomRamp interface |
126 |
|
|
127 |
|
_range -- a Range object |
128 |
|
""" |
129 |
|
|
130 |
clazz = Classification() |
clazz = Classification() |
131 |
quantiles = self.CalculateQuantiles(list, percents, _range) |
quantiles = self.CalculateQuantiles(_list, percents, _range) |
132 |
numGroups = len(quantiles[1]) |
adjusted = True |
|
if numGroups == 0: return clazz |
|
133 |
|
|
134 |
ramp.SetNumGroups(numGroups) |
if quantiles is not None: |
135 |
|
|
136 |
left, min, max, right = _range.GetRange() |
numGroups = len(quantiles[3]) |
137 |
|
|
138 |
start = "[" |
if numGroups != 0: |
|
oldp = 0 |
|
|
for (q, p), prop in zip(quantiles[1], ramp): |
|
|
max = list[q] |
|
|
group = ClassGroupRange(Range(start + str(min) + ";" + |
|
|
str(max) + "]"), |
|
|
None, prop) |
|
|
|
|
|
group.SetLabel("%s%% - %s%%" % (round(oldp*100, 2), |
|
|
round(p*100, 2))) |
|
|
oldp = p |
|
|
start = "]" |
|
|
min = max |
|
|
clazz.AppendGroup(group) |
|
139 |
|
|
140 |
return (quantiles[0], clazz) |
adjusted = quantiles[0] |
141 |
|
|
142 |
|
ramp.SetNumGroups(numGroups) |
143 |
|
|
144 |
|
min = _list[quantiles[1]] |
145 |
|
start = "[" |
146 |
|
oldp = 0 |
147 |
|
for (q, p), prop in zip(quantiles[3], ramp): |
148 |
|
max = _list[q] |
149 |
|
group = ClassGroupRange(Range(start + str(min) + ";" + |
150 |
|
str(max) + "]"), |
151 |
|
None, prop) |
152 |
|
|
153 |
|
group.SetLabel("%s%% - %s%%" % (round(oldp*100, 2), |
154 |
|
round(p*100, 2))) |
155 |
|
oldp = p |
156 |
|
start = "]" |
157 |
|
min = max |
158 |
|
clazz.AppendGroup(group) |
159 |
|
|
160 |
def CalculateQuantiles(self, list, percents, _range): |
return (adjusted, clazz) |
161 |
"""Calculate quantiles for the given list of percents from the |
|
162 |
|
def CalculateQuantiles(self, _list, percents, _range): |
163 |
|
"""Calculate quantiles for the given _list of percents from the |
164 |
sorted list of values that are in range. |
sorted list of values that are in range. |
165 |
|
|
166 |
percents is a sorted list of floats in the range 0.0-1.0 |
This may not actually generate len(percents) quantiles if |
|
|
|
|
This may not actually generate numGroups quantiles if |
|
167 |
many of the values that fall on quantile borders are the same. |
many of the values that fall on quantile borders are the same. |
168 |
|
|
169 |
Returns a tuple of the form: (adjusted, [quantile_list]) |
Returns a tuple of the form: |
170 |
|
(adjusted, minIndex, maxIndex, [quantile_list]) |
171 |
|
|
172 |
|
where adjusted is True if the the quantile percentages differ from |
173 |
|
those supplied, minIndex is the index into _list where the |
174 |
|
minimum value used is located, maxIndex is the index into _list |
175 |
|
where the maximum value used is located, and quantile_list is a |
176 |
|
list of tuples of the form: (list_index, quantile_percentage) |
177 |
|
|
178 |
|
Returns None, if no quantiles could be generated based on the |
179 |
|
given range or input list. |
180 |
|
|
181 |
|
_list -- a sort list of values |
182 |
|
|
183 |
|
percents -- a sorted list of floats in the range 0.0-1.0 which |
184 |
|
represent the upper bound of each quantile |
185 |
|
|
186 |
where adjusted is true if the the quantile percentages differ from |
_range -- a Range object |
|
those supplied, and quantile_list is a list of tuples of the form: |
|
|
(list_index, quantile_percentage) |
|
187 |
""" |
""" |
188 |
|
|
189 |
quantiles = [] |
quantiles = [] |
|
|
|
190 |
adjusted = False |
adjusted = False |
191 |
|
|
192 |
if len(percents) != 0: |
if len(percents) != 0: |
193 |
|
|
194 |
# |
# |
195 |
# find what part of the list range covers |
# find what part of the _list range covers |
196 |
# |
# |
197 |
minIndex = -1 |
minIndex = -1 |
198 |
maxIndex = -2 |
maxIndex = -2 |
199 |
for i in xrange(0, len(list), 1): |
for i in xrange(0, len(_list), 1): |
200 |
if operator.contains(_range, list[i]): |
if operator.contains(_range, _list[i]): |
201 |
minIndex = i |
minIndex = i |
202 |
break |
break |
203 |
|
|
204 |
for i in xrange(len(list)-1, -1, -1): |
for i in xrange(len(_list)-1, -1, -1): |
205 |
if operator.contains(_range, list[i]): |
if operator.contains(_range, _list[i]): |
206 |
maxIndex = i |
maxIndex = i |
207 |
break; |
break |
208 |
|
|
209 |
numValues = maxIndex - minIndex + 1 |
numValues = maxIndex - minIndex + 1 |
210 |
if minIndex <= maxIndex: |
|
211 |
|
if numValues > 0: |
212 |
|
|
213 |
# |
# |
214 |
# build a list of unique indices into list of where each |
# build a list of unique indices into list of where each |
234 |
# |
# |
235 |
lowerBound = minIndex - 1 |
lowerBound = minIndex - 1 |
236 |
|
|
237 |
for qindex in range(len(quantiles)): |
for qindex in xrange(len(quantiles)): |
238 |
if lowerBound >= maxIndex: |
if lowerBound >= maxIndex: |
239 |
# discard higher quantiles |
# discard higher quantiles |
240 |
quantiles = quantiles[:qindex] |
quantiles = quantiles[:qindex] |
247 |
# if it currently falls below the lowerBound |
# if it currently falls below the lowerBound |
248 |
# |
# |
249 |
if quantiles[qindex] <= lowerBound: |
if quantiles[qindex] <= lowerBound: |
250 |
quantiles[qindex] = min(lowerBound + 1, maxIndex) |
quantiles[qindex] = lowerBound + 1 |
251 |
|
|
252 |
listIndex = quantiles[qindex] |
listIndex = quantiles[qindex] |
253 |
value = list[quantiles[qindex]] |
value = _list[listIndex] |
254 |
|
|
255 |
# |
# |
256 |
# look for similar values around the quantile index |
# look for similar values around the quantile index |
257 |
# |
# |
258 |
lindex = listIndex - 1 |
lindex = listIndex - 1 |
259 |
lcount = 0 |
while lindex > lowerBound and value == _list[lindex]: |
|
while lindex > lowerBound: |
|
|
if value != list[lindex]: break |
|
|
lcount += 1 |
|
260 |
lindex -= 1 |
lindex -= 1 |
261 |
|
lcount = (listIndex - 1) - lindex |
262 |
|
|
263 |
rindex = listIndex + 1 |
rindex = listIndex + 1 |
264 |
rcount = 0 |
while rindex < maxIndex + 1 and value == _list[rindex]: |
|
while rindex < maxIndex + 1: |
|
|
if value != list[rindex]: break |
|
|
rcount += 1 |
|
265 |
rindex += 1 |
rindex += 1 |
266 |
|
rcount = (listIndex + 1) - rindex |
267 |
|
|
268 |
# |
# |
269 |
# adjust the current quantile index based on how many |
# adjust the current quantile index based on how many |
270 |
# numbers in the list are the same as the current value |
# numbers in the _list are the same as the current value |
271 |
# |
# |
272 |
newIndex = listIndex |
newIndex = listIndex |
273 |
if lcount == rcount: |
if lcount == rcount: |
295 |
# there are fewer items to the right, so go to the right |
# there are fewer items to the right, so go to the right |
296 |
newIndex = rindex - 1 |
newIndex = rindex - 1 |
297 |
|
|
298 |
|
adjusted = adjusted or newIndex != listIndex |
299 |
|
|
300 |
quantiles[qindex] = newIndex |
quantiles[qindex] = newIndex |
301 |
lowerBound = quantiles[qindex] |
lowerBound = quantiles[qindex] |
302 |
|
|
305 |
# successful, an empty list will be generated in the case that |
# successful, an empty list will be generated in the case that |
306 |
# we fail to get to the real body of the algorithm |
# we fail to get to the real body of the algorithm |
307 |
# |
# |
308 |
return (adjusted, |
if len(quantiles) == 0: |
309 |
[(q, (q - minIndex+1) / float(numValues)) for q in quantiles]) |
return None |
310 |
|
else: |
311 |
|
return (adjusted, minIndex, maxIndex, |
312 |
|
[(q, (q - minIndex+1) / float(numValues)) \ |
313 |
|
for q in quantiles]) |
314 |
|
|
315 |
CLR = 0 |
CLR = 0 |
316 |
STEP = 1 |
STEP = 1 |