14 |
|
|
15 |
class ClassGenerator: |
class ClassGenerator: |
16 |
|
|
17 |
def GenSingletonsFromList(self, list, numGroups, ramp): |
def GenSingletonsFromList(self, _list, numGroups, ramp): |
18 |
"""Generate a new classification consisting solely of singletons. |
"""Generate a new classification consisting solely of singletons. |
19 |
|
|
20 |
The resulting classification will consist of at most 'numGroups' |
The resulting classification will consist of at most 'numGroups' |
21 |
groups whose group properties ramp between 'prop1' and 'prop2'. There |
groups whose group properties ramp between 'prop1' and 'prop2'. There |
22 |
could be fewer groups if 'list' contains fewer that 'numGroups' items. |
could be fewer groups if '_list' contains fewer that 'numGroups' items. |
23 |
|
|
24 |
list -- any object that implements the iterator interface |
_list -- any object that implements the iterator interface |
25 |
|
|
26 |
numGroups -- how many groups to generate. This can not be |
numGroups -- how many groups to generate. This can not be |
27 |
determined while the classification is being |
determined while the classification is being |
28 |
generated because the stepping values must |
generated because the stepping values must |
29 |
be precalculated to ramp between prop1 and prop2. |
be precalculated to ramp between prop1 and prop2. |
30 |
|
|
31 |
prop1 -- initial group property values |
ramp -- an object which implements the CustomRamp interface |
|
|
|
|
prop2 -- final group property values |
|
32 |
""" |
""" |
33 |
|
|
34 |
clazz = Classification() |
clazz = Classification() |
36 |
|
|
37 |
ramp.SetNumGroups(numGroups) |
ramp.SetNumGroups(numGroups) |
38 |
|
|
39 |
for value, prop in zip(list, ramp): |
for value, prop in zip(_list, ramp): |
40 |
clazz.AppendGroup(ClassGroupSingleton(value, prop)) |
clazz.AppendGroup(ClassGroupSingleton(value, prop)) |
41 |
|
|
42 |
return clazz |
return clazz |
60 |
|
|
61 |
return clazz |
return clazz |
62 |
|
|
63 |
def GenUnifromDistribution(self, min, max, numGroups, |
def GenUniformDistribution(self, min, max, numGroups, |
64 |
ramp, intStep = False): |
ramp, intStep = False): |
65 |
"""Generate a classification with numGroups range groups |
"""Generate a classification with numGroups range groups |
66 |
each with the same interval. |
each with the same interval. |
106 |
return clazz |
return clazz |
107 |
|
|
108 |
|
|
109 |
def GenQuantiles(self, list, percents, ramp, _range): |
def GenQuantiles(self, _list, percents, ramp, _range): |
110 |
|
"""Generates a Classification which has groups of ranges that |
111 |
|
represent quantiles of _list at the percentages given in percents. |
112 |
|
Only the values that fall within _range are considered. |
113 |
|
|
114 |
|
Returns a tuple (adjusted, Classification) where adjusted is |
115 |
|
True if the Classification does not exactly represent the given |
116 |
|
range, or if the Classification is empty. |
117 |
|
|
118 |
|
_list -- a sort list of values |
119 |
|
|
120 |
|
percents -- a sorted list of floats in the range 0.0-1.0 which |
121 |
|
represent the upper bound of each quantile |
122 |
|
|
123 |
|
ramp -- an object which implements the CustomRamp interface |
124 |
|
|
125 |
|
_range -- a Range object |
126 |
|
""" |
127 |
|
|
128 |
clazz = Classification() |
clazz = Classification() |
129 |
quantiles = self.CalculateQuantiles(list, percents, _range) |
quantiles = self.CalculateQuantiles(_list, percents, _range) |
130 |
numGroups = len(quantiles[1]) |
adjusted = True |
|
if numGroups == 0: return clazz |
|
131 |
|
|
132 |
ramp.SetNumGroups(numGroups) |
if quantiles is not None: |
133 |
|
|
134 |
|
numGroups = len(quantiles[3]) |
135 |
|
|
136 |
|
if numGroups != 0: |
137 |
|
|
138 |
|
adjusted = quantiles[0] |
139 |
|
|
140 |
|
ramp.SetNumGroups(numGroups) |
141 |
|
|
142 |
left, min, max, right = _range.GetRange() |
start, min, endMax, right = _range.GetRange() |
143 |
|
|
144 |
start = "[" |
oldp = 0 |
145 |
oldp = 0 |
i = 1 |
146 |
for (q, p), prop in zip(quantiles[1], ramp): |
end = "]" |
147 |
max = list[q] |
|
148 |
group = ClassGroupRange(Range(start + str(min) + ";" + |
for (q, p), prop in zip(quantiles[3], ramp): |
149 |
str(max) + "]"), |
if i == numGroups: |
150 |
None, prop) |
max = endMax |
151 |
|
end = right |
152 |
group.SetLabel("%s%% - %s%%" % (round(oldp*100, 2), |
else: |
153 |
round(p*100, 2))) |
max = _list[q] |
154 |
oldp = p |
|
155 |
start = "]" |
group = ClassGroupRange(Range((start, min, max, end)), |
156 |
min = max |
None, prop) |
157 |
clazz.AppendGroup(group) |
|
158 |
|
group.SetLabel("%s%% - %s%%" % (round(oldp*100, 2), |
159 |
|
round(p*100, 2))) |
160 |
|
oldp = p |
161 |
|
start = "]" |
162 |
|
min = max |
163 |
|
clazz.AppendGroup(group) |
164 |
|
i += 1 |
165 |
|
|
166 |
return (quantiles[0], clazz) |
return (adjusted, clazz) |
167 |
|
|
168 |
def CalculateQuantiles(self, list, percents, _range): |
def CalculateQuantiles(self, _list, percents, _range): |
169 |
"""Calculate quantiles for the given list of percents from the |
"""Calculate quantiles for the given _list of percents from the |
170 |
sorted list of values that are in range. |
sorted list of values that are in range. |
171 |
|
|
172 |
percents is a sorted list of floats in the range 0.0-1.0 |
This may not actually generate len(percents) quantiles if |
|
|
|
|
This may not actually generate numGroups quantiles if |
|
173 |
many of the values that fall on quantile borders are the same. |
many of the values that fall on quantile borders are the same. |
174 |
|
|
175 |
Returns a tuple of the form: (adjusted, [quantile_list]) |
Returns a tuple of the form: |
176 |
|
(adjusted, minIndex, maxIndex, [quantile_list]) |
177 |
|
|
178 |
|
where adjusted is True if the the quantile percentages differ from |
179 |
|
those supplied, minIndex is the index into _list where the |
180 |
|
minimum value used is located, maxIndex is the index into _list |
181 |
|
where the maximum value used is located, and quantile_list is a |
182 |
|
list of tuples of the form: (list_index, quantile_percentage) |
183 |
|
|
184 |
|
Returns None, if no quantiles could be generated based on the |
185 |
|
given range or input list. |
186 |
|
|
187 |
where adjusted is true if the the quantile percentages differ from |
_list -- a sort list of values |
188 |
those supplied, and quantile_list is a list of tuples of the form: |
|
189 |
(list_index, quantile_percentage) |
percents -- a sorted list of floats in the range 0.0-1.0 which |
190 |
|
represent the upper bound of each quantile |
191 |
|
|
192 |
|
_range -- a Range object |
193 |
""" |
""" |
194 |
|
|
195 |
quantiles = [] |
quantiles = [] |
|
|
|
196 |
adjusted = False |
adjusted = False |
197 |
|
|
198 |
if len(percents) != 0: |
if len(percents) != 0: |
199 |
|
|
200 |
# |
# |
201 |
# find what part of the list range covers |
# find what part of the _list range covers |
202 |
# |
# |
203 |
minIndex = -1 |
minIndex = -1 |
204 |
maxIndex = -2 |
maxIndex = -2 |
205 |
for i in xrange(0, len(list), 1): |
for i in xrange(0, len(_list), 1): |
206 |
if operator.contains(_range, list[i]): |
if operator.contains(_range, _list[i]): |
207 |
minIndex = i |
minIndex = i |
208 |
break |
break |
209 |
|
|
210 |
for i in xrange(len(list)-1, -1, -1): |
for i in xrange(len(_list)-1, -1, -1): |
211 |
if operator.contains(_range, list[i]): |
if operator.contains(_range, _list[i]): |
212 |
maxIndex = i |
maxIndex = i |
213 |
break; |
break |
214 |
|
|
215 |
numValues = maxIndex - minIndex + 1 |
numValues = maxIndex - minIndex + 1 |
216 |
if minIndex <= maxIndex: |
|
217 |
|
if numValues > 0: |
218 |
|
|
219 |
# |
# |
220 |
# build a list of unique indices into list of where each |
# build a list of unique indices into list of where each |
240 |
# |
# |
241 |
lowerBound = minIndex - 1 |
lowerBound = minIndex - 1 |
242 |
|
|
243 |
for qindex in range(len(quantiles)): |
for qindex in xrange(len(quantiles)): |
244 |
if lowerBound >= maxIndex: |
if lowerBound >= maxIndex: |
245 |
# discard higher quantiles |
# discard higher quantiles |
246 |
quantiles = quantiles[:qindex] |
quantiles = quantiles[:qindex] |
253 |
# if it currently falls below the lowerBound |
# if it currently falls below the lowerBound |
254 |
# |
# |
255 |
if quantiles[qindex] <= lowerBound: |
if quantiles[qindex] <= lowerBound: |
256 |
quantiles[qindex] = min(lowerBound + 1, maxIndex) |
quantiles[qindex] = lowerBound + 1 |
257 |
|
|
258 |
listIndex = quantiles[qindex] |
listIndex = quantiles[qindex] |
259 |
value = list[quantiles[qindex]] |
value = _list[listIndex] |
260 |
|
|
261 |
# |
# |
262 |
# look for similar values around the quantile index |
# look for similar values around the quantile index |
263 |
# |
# |
264 |
lindex = listIndex - 1 |
lindex = listIndex - 1 |
265 |
lcount = 0 |
while lindex > lowerBound and value == _list[lindex]: |
|
while lindex > lowerBound: |
|
|
if value != list[lindex]: break |
|
|
lcount += 1 |
|
266 |
lindex -= 1 |
lindex -= 1 |
267 |
|
lcount = (listIndex - 1) - lindex |
268 |
|
|
269 |
rindex = listIndex + 1 |
rindex = listIndex + 1 |
270 |
rcount = 0 |
while rindex < maxIndex + 1 and value == _list[rindex]: |
|
while rindex < maxIndex + 1: |
|
|
if value != list[rindex]: break |
|
|
rcount += 1 |
|
271 |
rindex += 1 |
rindex += 1 |
272 |
|
rcount = (listIndex + 1) - rindex |
273 |
|
|
274 |
# |
# |
275 |
# adjust the current quantile index based on how many |
# adjust the current quantile index based on how many |
276 |
# numbers in the list are the same as the current value |
# numbers in the _list are the same as the current value |
277 |
# |
# |
278 |
newIndex = listIndex |
newIndex = listIndex |
279 |
if lcount == rcount: |
if lcount == rcount: |
301 |
# there are fewer items to the right, so go to the right |
# there are fewer items to the right, so go to the right |
302 |
newIndex = rindex - 1 |
newIndex = rindex - 1 |
303 |
|
|
304 |
|
adjusted = adjusted or newIndex != listIndex |
305 |
|
|
306 |
quantiles[qindex] = newIndex |
quantiles[qindex] = newIndex |
307 |
lowerBound = quantiles[qindex] |
lowerBound = quantiles[qindex] |
308 |
|
|
311 |
# successful, an empty list will be generated in the case that |
# successful, an empty list will be generated in the case that |
312 |
# we fail to get to the real body of the algorithm |
# we fail to get to the real body of the algorithm |
313 |
# |
# |
314 |
return (adjusted, |
if len(quantiles) == 0: |
315 |
[(q, (q - minIndex+1) / float(numValues)) for q in quantiles]) |
return None |
316 |
|
else: |
317 |
|
return (adjusted, minIndex, maxIndex, |
318 |
|
[(q, (q - minIndex+1) / float(numValues)) \ |
319 |
|
for q in quantiles]) |
320 |
|
|
321 |
CLR = 0 |
CLR = 0 |
322 |
STEP = 1 |
STEP = 1 |