1 |
#!/usr/bin/env python2 |
2 |
import sys |
3 |
from string import join, strip, split |
4 |
import getopt |
5 |
|
6 |
from xml.sax import saxexts |
7 |
from xml.sax.saxlib import HandlerBase |
8 |
from xml.dom.minidom import parse, Element, Text |
9 |
|
10 |
from textformatter import TextFormatter |
11 |
|
12 |
|
13 |
class Options: |
14 |
|
15 |
new_sgml_dtd = 0 |
16 |
|
17 |
|
18 |
def extract_raw_text(element): |
19 |
text = [] |
20 |
for child in element.childNodes: |
21 |
if child.nodeName == '#text': |
22 |
text.append(child.nodeValue) |
23 |
else: |
24 |
text.extend(extract_raw_text(child)) |
25 |
return join(text, '') |
26 |
|
27 |
def element_to_sgml(element, label_dict = {}): |
28 |
text = [] |
29 |
#print 'element_to_sgml', element |
30 |
for child in element.childNodes: |
31 |
#print child |
32 |
if child.nodeName == 'faqref': |
33 |
ref = "FAQ" + child.getAttribute("id") |
34 |
item = label_dict[ref] |
35 |
t = '<ref id="%s">%s.%s</>' % (ref, item.section, item.num) |
36 |
elif child.nodeName == 'url': |
37 |
url = child.getAttribute("url") |
38 |
t = '<href url="%s">%s</>' % (url, url) |
39 |
elif child.nodeName == 'href': |
40 |
url = child.getAttribute("url") |
41 |
t = '<href url="%s">' % url |
42 |
t = t + extract_raw_text(child) + "</>" |
43 |
elif child.nodeName == 'p': |
44 |
t = '\n\n' |
45 |
else: |
46 |
t = child.toxml() |
47 |
text.append(t) |
48 |
return join(text, '') |
49 |
|
50 |
def element_to_xhtml(element, label_dict = {}): |
51 |
text = [] |
52 |
for child in element.childNodes: |
53 |
#print child |
54 |
if child.nodeName == 'faqref': |
55 |
ref = "FAQ" + child.getAttribute("id") |
56 |
item = label_dict[ref] |
57 |
t = '<a href="#%s">%s.%s</a>' % (ref, item.section, item.num) |
58 |
elif child.nodeName == 'url': |
59 |
url = child.getAttribute("url") |
60 |
t = '<a href="%s">%s</a>' % (url, url) |
61 |
elif child.nodeName == 'href': |
62 |
url = child.getAttribute("url") |
63 |
t = '<a href="%s">' % url |
64 |
t = t + extract_raw_text(child) + "</a>" |
65 |
elif child.nodeName == 'p': |
66 |
t = '<p>' |
67 |
t = t + element_to_xhtml(child, label_dict) |
68 |
t = t + '</p>' |
69 |
elif child.nodeName == 'itemize': |
70 |
ul = Element("ul") |
71 |
for item in child.getElementsByTagName("item"): |
72 |
li = Element("li") |
73 |
for insideitem in item.childNodes: |
74 |
li.appendChild(insideitem) |
75 |
ul.appendChild(li) |
76 |
t = ul.toxml() |
77 |
else: |
78 |
t = child.toxml() |
79 |
text.append(t) |
80 |
return join(text, '') |
81 |
|
82 |
class Entry: |
83 |
|
84 |
def __init__(self, section, num, title, body, label = ''): |
85 |
self.section = section |
86 |
self.num = num |
87 |
self.title = title |
88 |
self.body = body |
89 |
if label: |
90 |
self.label = "FAQ" + label |
91 |
else: |
92 |
self.label = "FAQ" + `self.section` + '.' + `self.num` |
93 |
|
94 |
def __cmp__(self, other): |
95 |
if self.section != other.section: |
96 |
return cmp(self.section, other.section) |
97 |
return cmp(self.num, other.num) |
98 |
|
99 |
def save_as_sgml(self, file, labeldict): |
100 |
if Options.new_sgml_dtd: |
101 |
file.write('<faqentry id="%s">%d.%d %s\n' % |
102 |
(self.label, self.section, self.num, |
103 |
element_to_sgml(self.title))) |
104 |
else: |
105 |
file.write('<faqentry>%d.%d %s\n<label id="%s">\n' |
106 |
% (self.section, self.num, element_to_sgml(self.title), |
107 |
self.label)) |
108 |
file.write("<faqbody>\n") |
109 |
file.write(element_to_sgml(self.body, labeldict)) |
110 |
file.write("</faqbody>\n</faqentry>\n") |
111 |
|
112 |
def toc_sgml(self): |
113 |
return ' %3d.%d <ref id="%s">%s</>' % \ |
114 |
(self.section, self.num, self.label, |
115 |
element_to_sgml(self.title)) |
116 |
|
117 |
def save_as_xhtml(self, file, labeldict): |
118 |
file.write('<h3><a name="%s">%d.%d %s</a></h3>\n' % |
119 |
(self.label, self.section, self.num, |
120 |
element_to_xhtml(self.title))) |
121 |
|
122 |
paragraphs = 0 |
123 |
if len(self.body.getElementsByTagName("p")): |
124 |
file.write(element_to_xhtml(self.body, labeldict)) |
125 |
else: |
126 |
file.write("<p>\n") |
127 |
file.write(element_to_xhtml(self.body, labeldict)) |
128 |
file.write("</p>\n") |
129 |
|
130 |
def toc_xhtml(self): |
131 |
return ' %3d.%d <a href="#%s">%s</a>' % \ |
132 |
(self.section, self.num, self.label, |
133 |
element_to_xhtml(self.title)) |
134 |
|
135 |
def toc_text(self): |
136 |
title = extract_raw_text(self.title) |
137 |
title = join(split(title)) |
138 |
return '%3d.%d %s' % (self.section, self.num, title) |
139 |
|
140 |
|
141 |
class Section: |
142 |
|
143 |
def __init__(self, section, title, label = ''): |
144 |
#print 'Section', section, title, `label` |
145 |
self.section = section |
146 |
self.title = title |
147 |
self.entries = [] |
148 |
if label: |
149 |
self.label = "FAQ" + label |
150 |
else: |
151 |
self.label = "FAQ" + `self.section` |
152 |
|
153 |
def append(self, entry): |
154 |
self.entries.append(entry) |
155 |
|
156 |
def save_as_sgml(self, file, labeldict): |
157 |
if Options.new_sgml_dtd: |
158 |
file.write('<faqsect id="%s">%d %s\n' % |
159 |
(self.label, self.section, element_to_sgml(self.title))) |
160 |
else: |
161 |
file.write('<faqsect>%d %s\n<label id="%s">\n' % |
162 |
(self.section, element_to_sgml(self.title), self.label)) |
163 |
for entry in self.entries: |
164 |
entry.save_as_sgml(file, labeldict) |
165 |
file.write("</faqsect>\n") |
166 |
|
167 |
def toc_sgml(self): |
168 |
return '%3d <ref id="%s">%s</>' % (self.section, self.label, |
169 |
element_to_sgml(self.title)) |
170 |
def save_as_xhtml(self, file, labeldict): |
171 |
file.write('<h2><a name="%s">%d %s</a></h2>\n' % |
172 |
(self.label, self.section, element_to_xhtml(self.title))) |
173 |
for entry in self.entries: |
174 |
entry.save_as_xhtml(file, labeldict) |
175 |
|
176 |
def toc_xhtml(self): |
177 |
return '%3d <a href="#%s">%s</a>' % (self.section, self.label, |
178 |
element_to_xhtml(self.title)) |
179 |
def toc_text(self): |
180 |
title = extract_raw_text(self.title) |
181 |
title = join(split(title)) |
182 |
return '%3d %s' % (self.section, title) |
183 |
|
184 |
|
185 |
|
186 |
def extract(domfaqlist): |
187 |
result = [] |
188 |
label_dict = {} |
189 |
faq_title = domfaqlist.getElementsByTagName('title')[0] |
190 |
domsections = domfaqlist.getElementsByTagName('section') |
191 |
for secnum in range(len(domsections)): |
192 |
section = Section(secnum + 1, |
193 |
domsections[secnum].getElementsByTagName('title')[0], |
194 |
label = domsections[secnum].getAttribute('id')) |
195 |
result.append(section) |
196 |
secfaqs = domsections[secnum].getElementsByTagName('faq') |
197 |
for faqnum in range(len(secfaqs)): |
198 |
title = secfaqs[faqnum].getElementsByTagName('title')[0] |
199 |
body = secfaqs[faqnum].getElementsByTagName('body')[0] |
200 |
label = secfaqs[faqnum].getAttribute('id') |
201 |
entry = Entry(secnum + 1, faqnum + 1, title, body, |
202 |
label = label) |
203 |
if label: |
204 |
label_dict[entry.label] = entry |
205 |
section.append(entry) |
206 |
return result, label_dict, faq_title |
207 |
|
208 |
|
209 |
sgml_header = ''' |
210 |
<faqlist> |
211 |
<faqhead1>Table of Contents</faqhead1> |
212 |
''' |
213 |
|
214 |
xhtml_header = '''\ |
215 |
title: Skencil / Frequently Asked Questions |
216 |
|
217 |
<h2>Table of Contents</h2> |
218 |
''' |
219 |
|
220 |
|
221 |
def write_sgml(file, faqlist, title, label_dict): |
222 |
file.write("<faqlist>\n<faqtoc>\n") |
223 |
for section in faqlist: |
224 |
file.write(section.toc_sgml()) |
225 |
file.write('<newline>\n') |
226 |
for entry in section.entries: |
227 |
file.write(entry.toc_sgml()) |
228 |
file.write('<newline>\n') |
229 |
file.write("</faqtoc>\n") |
230 |
|
231 |
for section in faqlist: |
232 |
section.save_as_sgml(file, label_dict) |
233 |
file.write("</faqlist>") |
234 |
|
235 |
def write_xhtml(file, faqlist, title, label_dict): |
236 |
file.write(xhtml_header) |
237 |
for section in faqlist: |
238 |
file.write(section.toc_xhtml()) |
239 |
file.write('<br />\n') |
240 |
for entry in section.entries: |
241 |
file.write(entry.toc_xhtml()) |
242 |
file.write('<br />\n') |
243 |
|
244 |
for section in faqlist: |
245 |
section.save_as_xhtml(file, label_dict) |
246 |
|
247 |
|
248 |
bullets = '-+o*' |
249 |
def format_element(element, formatter, label_dict, list_depth = -1): |
250 |
for child in element.childNodes: |
251 |
#print child |
252 |
if child.nodeName == 'faqref': |
253 |
ref = "FAQ" + child.getAttribute("id") |
254 |
item = label_dict[ref] |
255 |
formatter.add_text('%s.%s' % (item.section, item.num)) |
256 |
elif child.nodeName == 'url': |
257 |
url = child.getAttribute("url") |
258 |
formatter.add_text('URL:%s' % url) |
259 |
elif child.nodeName == 'href': |
260 |
url = child.getAttribute("url") |
261 |
text = extract_raw_text(child) |
262 |
formatter.add_text('%s (URL:%s)' % (text, url)) |
263 |
elif child.nodeName == 'p': |
264 |
formatter.new_paragraph() |
265 |
format_element(child, formatter, label_dict, list_depth) |
266 |
elif child.nodeName == 'itemize': |
267 |
formatter.new_paragraph(indent = 4) |
268 |
format_element(child, formatter, label_dict, |
269 |
list_depth = list_depth + 1) |
270 |
formatter.new_paragraph() |
271 |
formatter.outdent(4) |
272 |
elif child.nodeName == 'item': |
273 |
formatter.new_paragraph(bullet = bullets[list_depth]) |
274 |
format_element(child, formatter, label_dict, list_depth) |
275 |
elif child.nodeName == '#text': |
276 |
formatter.add_text(child.nodeValue) |
277 |
else: |
278 |
format_element(child, formatter, label_dict, |
279 |
list_depth = list_depth) |
280 |
|
281 |
def write_text(file, faqlist, title, label_dict): |
282 |
formatter = TextFormatter(file) |
283 |
formatter.heading(extract_raw_text(title), underline = '=') |
284 |
formatter.indent(2) |
285 |
for section in faqlist: |
286 |
formatter.vspace(1) |
287 |
formatter.paragraph(section.toc_text(), vspace = 0) |
288 |
for entry in section.entries: |
289 |
formatter.paragraph(entry.toc_text(), vspace = 0) |
290 |
formatter.outdent(2) |
291 |
|
292 |
for section in faqlist: |
293 |
formatter.vspace(2) |
294 |
formatter.heading(section.toc_text(), underline = '=') |
295 |
for entry in section.entries: |
296 |
formatter.vspace(1) |
297 |
formatter.heading(entry.toc_text(), underline = '-') |
298 |
formatter.vspace(1) |
299 |
format_element(entry.body, formatter, label_dict) |
300 |
|
301 |
formatter.close() |
302 |
|
303 |
def main(): |
304 |
format = 'sgml' |
305 |
|
306 |
opts, args = getopt.getopt(sys.argv[1:], "ntx") |
307 |
for opt, value in opts: |
308 |
if opt == '-t': |
309 |
format = 'text' |
310 |
elif opt == '-n': |
311 |
Options.new_sgml_dtd = 1 |
312 |
elif opt == '-x': |
313 |
format = 'xhtml' |
314 |
|
315 |
if len(args) == 0: |
316 |
print 'faqdom.py usage: python faqdom.py [-t|-x] [-n] <xml_file> [<outfile>].' |
317 |
return |
318 |
|
319 |
doc = parse(args[0]) |
320 |
faqlist, label_dict, title = extract(doc.getElementsByTagName("faqlist")[0]) |
321 |
if len(args) > 1: |
322 |
outfile = open(args[1], "w") |
323 |
else: |
324 |
outfile = sys.stdout |
325 |
|
326 |
if format == 'sgml': |
327 |
write_sgml(outfile, faqlist, title, label_dict) |
328 |
elif format == 'text': |
329 |
write_text(outfile, faqlist, title, label_dict) |
330 |
elif format == 'xhtml': |
331 |
write_xhtml(outfile, faqlist, title, label_dict) |
332 |
|
333 |
main() |
334 |
|