5 |
# This program is free software under the GPL (>=v2) |
# This program is free software under the GPL (>=v2) |
6 |
# Read the file COPYING coming with Thuban for details. |
# Read the file COPYING coming with Thuban for details. |
7 |
|
|
8 |
|
""" |
9 |
|
ClassGenerator |
10 |
|
""" |
11 |
|
|
12 |
|
__version__ = "$Revision$" |
13 |
|
# $Source$ |
14 |
|
# $Id$ |
15 |
|
|
16 |
import operator |
import operator |
17 |
|
|
18 |
from color import Color |
from color import Color |
22 |
|
|
23 |
class ClassGenerator: |
class ClassGenerator: |
24 |
|
|
25 |
def GenSingletonsFromList(self, list, numGroups, ramp): |
def GenSingletonsFromList(self, _list, numGroups, ramp): |
26 |
"""Generate a new classification consisting solely of singletons. |
"""Generate a new classification consisting solely of singletons. |
27 |
|
|
28 |
The resulting classification will consist of at most 'numGroups' |
The resulting classification will consist of at most 'numGroups' |
29 |
groups whose group properties ramp between 'prop1' and 'prop2'. There |
groups whose group properties ramp between 'prop1' and 'prop2'. There |
30 |
could be fewer groups if 'list' contains fewer that 'numGroups' items. |
could be fewer groups if '_list' contains fewer that 'numGroups' items. |
31 |
|
|
32 |
list -- any object that implements the iterator interface |
_list -- any object that implements the iterator interface |
33 |
|
|
34 |
numGroups -- how many groups to generate. This can not be |
numGroups -- how many groups to generate. This can not be |
35 |
determined while the classification is being |
determined while the classification is being |
36 |
generated because the stepping values must |
generated because the stepping values must |
37 |
be precalculated to ramp between prop1 and prop2. |
be precalculated to ramp between prop1 and prop2. |
38 |
|
|
39 |
prop1 -- initial group property values |
ramp -- an object which implements the CustomRamp interface |
|
|
|
|
prop2 -- final group property values |
|
40 |
""" |
""" |
41 |
|
|
42 |
clazz = Classification() |
clazz = Classification() |
44 |
|
|
45 |
ramp.SetNumGroups(numGroups) |
ramp.SetNumGroups(numGroups) |
46 |
|
|
47 |
for value, prop in zip(list, ramp): |
for value, prop in zip(_list, ramp): |
48 |
clazz.AppendGroup(ClassGroupSingleton(value, prop)) |
clazz.AppendGroup(ClassGroupSingleton(value, prop)) |
49 |
|
|
50 |
return clazz |
return clazz |
68 |
|
|
69 |
return clazz |
return clazz |
70 |
|
|
71 |
def GenUnifromDistribution(self, min, max, numGroups, |
def GenUniformDistribution(self, min, max, numGroups, |
72 |
ramp, intStep = False): |
ramp, intStep = False): |
73 |
"""Generate a classification with numGroups range groups |
"""Generate a classification with numGroups range groups |
74 |
each with the same interval. |
each with the same interval. |
103 |
|
|
104 |
# this check guards against rounding issues |
# this check guards against rounding issues |
105 |
if cur_min != cur_max: |
if cur_min != cur_max: |
106 |
range = Range("[" + str(float(cur_min)) + ";" + |
range = Range(("[", cur_min, cur_max, end)) |
|
str(float(cur_max)) + end) |
|
107 |
clazz.AppendGroup(ClassGroupRange(range, None, prop)) |
clazz.AppendGroup(ClassGroupRange(range, None, prop)) |
108 |
|
|
109 |
cur_min = cur_max |
cur_min = cur_max |
113 |
return clazz |
return clazz |
114 |
|
|
115 |
|
|
116 |
def GenQuantiles(self, list, percents, ramp, _range): |
def GenQuantiles(self, _list, percents, ramp, _range): |
117 |
|
"""Generates a Classification which has groups of ranges that |
118 |
|
represent quantiles of _list at the percentages given in percents. |
119 |
|
Only the values that fall within _range are considered. |
120 |
|
|
121 |
|
Returns a tuple (adjusted, Classification) where adjusted is |
122 |
|
True if the Classification does not exactly represent the given |
123 |
|
range, or if the Classification is empty. |
124 |
|
|
125 |
|
_list -- a sort list of values |
126 |
|
|
127 |
|
percents -- a sorted list of floats in the range 0.0-1.0 which |
128 |
|
represent the upper bound of each quantile |
129 |
|
|
130 |
|
ramp -- an object which implements the CustomRamp interface |
131 |
|
|
132 |
|
_range -- a Range object |
133 |
|
""" |
134 |
|
|
135 |
clazz = Classification() |
clazz = Classification() |
136 |
quantiles = self.CalculateQuantiles(list, percents, _range) |
quantiles = self.CalculateQuantiles(_list, percents, _range) |
137 |
numGroups = len(quantiles[1]) |
adjusted = True |
|
if numGroups == 0: return clazz |
|
138 |
|
|
139 |
ramp.SetNumGroups(numGroups) |
if quantiles is not None: |
140 |
|
|
141 |
|
numGroups = len(quantiles[3]) |
142 |
|
|
143 |
left, min, max, right = _range.GetRange() |
if numGroups != 0: |
144 |
|
|
145 |
start = "[" |
adjusted = quantiles[0] |
|
oldp = 0 |
|
|
for (q, p), prop in zip(quantiles[1], ramp): |
|
|
max = list[q] |
|
|
group = ClassGroupRange(Range(start + str(min) + ";" + |
|
|
str(max) + "]"), |
|
|
None, prop) |
|
|
|
|
|
group.SetLabel("%s%% - %s%%" % (round(oldp*100, 2), |
|
|
round(p*100, 2))) |
|
|
oldp = p |
|
|
start = "]" |
|
|
min = max |
|
|
clazz.AppendGroup(group) |
|
146 |
|
|
147 |
return (quantiles[0], clazz) |
ramp.SetNumGroups(numGroups) |
148 |
|
|
149 |
|
start, min, endMax, right = _range.GetRange() |
150 |
|
|
151 |
|
oldp = 0 |
152 |
|
i = 1 |
153 |
|
end = "]" |
154 |
|
|
155 |
def CalculateQuantiles(self, list, percents, _range): |
for (q, p), prop in zip(quantiles[3], ramp): |
156 |
"""Calculate quantiles for the given list of percents from the |
if i == numGroups: |
157 |
|
max = endMax |
158 |
|
end = right |
159 |
|
else: |
160 |
|
max = _list[q] |
161 |
|
|
162 |
|
group = ClassGroupRange(Range((start, min, max, end)), |
163 |
|
None, prop) |
164 |
|
|
165 |
|
group.SetLabel("%s%% - %s%%" % (round(oldp*100, 2), |
166 |
|
round(p*100, 2))) |
167 |
|
oldp = p |
168 |
|
start = "]" |
169 |
|
min = max |
170 |
|
clazz.AppendGroup(group) |
171 |
|
i += 1 |
172 |
|
|
173 |
|
return (adjusted, clazz) |
174 |
|
|
175 |
|
def CalculateQuantiles(self, _list, percents, _range): |
176 |
|
"""Calculate quantiles for the given _list of percents from the |
177 |
sorted list of values that are in range. |
sorted list of values that are in range. |
178 |
|
|
179 |
percents is a sorted list of floats in the range 0.0-1.0 |
This may not actually generate len(percents) quantiles if |
|
|
|
|
This may not actually generate numGroups quantiles if |
|
180 |
many of the values that fall on quantile borders are the same. |
many of the values that fall on quantile borders are the same. |
181 |
|
|
182 |
Returns a tuple of the form: (adjusted, [quantile_list]) |
Returns a tuple of the form: |
183 |
|
(adjusted, minIndex, maxIndex, [quantile_list]) |
184 |
|
|
185 |
|
where adjusted is True if the the quantile percentages differ from |
186 |
|
those supplied, minIndex is the index into _list where the |
187 |
|
minimum value used is located, maxIndex is the index into _list |
188 |
|
where the maximum value used is located, and quantile_list is a |
189 |
|
list of tuples of the form: (list_index, quantile_percentage) |
190 |
|
|
191 |
|
Returns None, if no quantiles could be generated based on the |
192 |
|
given range or input list. |
193 |
|
|
194 |
where adjusted is true if the the quantile percentages differ from |
_list -- a sort list of values |
195 |
those supplied, and quantile_list is a list of tuples of the form: |
|
196 |
(list_index, quantile_percentage) |
percents -- a sorted list of floats in the range 0.0-1.0 which |
197 |
|
represent the upper bound of each quantile |
198 |
|
|
199 |
|
_range -- a Range object |
200 |
""" |
""" |
201 |
|
|
202 |
quantiles = [] |
quantiles = [] |
|
|
|
203 |
adjusted = False |
adjusted = False |
204 |
|
|
205 |
if len(percents) != 0: |
if len(percents) != 0: |
206 |
|
|
207 |
# |
# |
208 |
# find what part of the list range covers |
# find what part of the _list range covers |
209 |
# |
# |
210 |
minIndex = -1 |
minIndex = -1 |
211 |
maxIndex = -2 |
maxIndex = -2 |
212 |
for i in xrange(0, len(list), 1): |
for i in xrange(0, len(_list), 1): |
213 |
if operator.contains(_range, list[i]): |
if operator.contains(_range, _list[i]): |
214 |
minIndex = i |
minIndex = i |
215 |
break |
break |
216 |
|
|
217 |
for i in xrange(len(list)-1, -1, -1): |
for i in xrange(len(_list)-1, -1, -1): |
218 |
if operator.contains(_range, list[i]): |
if operator.contains(_range, _list[i]): |
219 |
maxIndex = i |
maxIndex = i |
220 |
break; |
break |
221 |
|
|
222 |
numValues = maxIndex - minIndex + 1 |
numValues = maxIndex - minIndex + 1 |
223 |
if minIndex <= maxIndex: |
|
224 |
|
if numValues > 0: |
225 |
|
|
226 |
# |
# |
227 |
# build a list of unique indices into list of where each |
# build a list of unique indices into list of where each |
247 |
# |
# |
248 |
lowerBound = minIndex - 1 |
lowerBound = minIndex - 1 |
249 |
|
|
250 |
for qindex in range(len(quantiles)): |
for qindex in xrange(len(quantiles)): |
251 |
if lowerBound >= maxIndex: |
if lowerBound >= maxIndex: |
252 |
# discard higher quantiles |
# discard higher quantiles |
253 |
quantiles = quantiles[:qindex] |
quantiles = quantiles[:qindex] |
260 |
# if it currently falls below the lowerBound |
# if it currently falls below the lowerBound |
261 |
# |
# |
262 |
if quantiles[qindex] <= lowerBound: |
if quantiles[qindex] <= lowerBound: |
263 |
quantiles[qindex] = min(lowerBound + 1, maxIndex) |
quantiles[qindex] = lowerBound + 1 |
264 |
|
|
265 |
listIndex = quantiles[qindex] |
listIndex = quantiles[qindex] |
266 |
value = list[quantiles[qindex]] |
value = _list[listIndex] |
267 |
|
|
268 |
# |
# |
269 |
# look for similar values around the quantile index |
# look for similar values around the quantile index |
270 |
# |
# |
271 |
lindex = listIndex - 1 |
lindex = listIndex - 1 |
272 |
lcount = 0 |
while lindex > lowerBound and value == _list[lindex]: |
|
while lindex > lowerBound: |
|
|
if value != list[lindex]: break |
|
|
lcount += 1 |
|
273 |
lindex -= 1 |
lindex -= 1 |
274 |
|
lcount = (listIndex - 1) - lindex |
275 |
|
|
276 |
rindex = listIndex + 1 |
rindex = listIndex + 1 |
277 |
rcount = 0 |
while rindex < maxIndex + 1 and value == _list[rindex]: |
|
while rindex < maxIndex + 1: |
|
|
if value != list[rindex]: break |
|
|
rcount += 1 |
|
278 |
rindex += 1 |
rindex += 1 |
279 |
|
rcount = (listIndex + 1) - rindex |
280 |
|
|
281 |
# |
# |
282 |
# adjust the current quantile index based on how many |
# adjust the current quantile index based on how many |
283 |
# numbers in the list are the same as the current value |
# numbers in the _list are the same as the current value |
284 |
# |
# |
285 |
newIndex = listIndex |
newIndex = listIndex |
286 |
if lcount == rcount: |
if lcount == rcount: |
308 |
# there are fewer items to the right, so go to the right |
# there are fewer items to the right, so go to the right |
309 |
newIndex = rindex - 1 |
newIndex = rindex - 1 |
310 |
|
|
311 |
|
adjusted = adjusted or newIndex != listIndex |
312 |
|
|
313 |
quantiles[qindex] = newIndex |
quantiles[qindex] = newIndex |
314 |
lowerBound = quantiles[qindex] |
lowerBound = quantiles[qindex] |
315 |
|
|
318 |
# successful, an empty list will be generated in the case that |
# successful, an empty list will be generated in the case that |
319 |
# we fail to get to the real body of the algorithm |
# we fail to get to the real body of the algorithm |
320 |
# |
# |
321 |
return (adjusted, |
if len(quantiles) == 0: |
322 |
[(q, (q - minIndex+1) / float(numValues)) for q in quantiles]) |
return None |
323 |
|
else: |
324 |
|
return (adjusted, minIndex, maxIndex, |
325 |
|
[(q, (q - minIndex+1) / float(numValues)) \ |
326 |
|
for q in quantiles]) |
327 |
|
|
328 |
CLR = 0 |
CLR = 0 |
329 |
STEP = 1 |
STEP = 1 |