1 |
alfonx |
1317 |
package skrueger; |
2 |
|
|
|
3 |
|
|
import java.io.FileWriter; |
4 |
|
|
import java.io.StringWriter; |
5 |
|
|
import java.io.Writer; |
6 |
|
|
import java.util.Date; |
7 |
|
|
|
8 |
|
|
import javax.xml.parsers.DocumentBuilderFactory; |
9 |
|
|
import javax.xml.transform.OutputKeys; |
10 |
|
|
import javax.xml.transform.Result; |
11 |
|
|
import javax.xml.transform.Transformer; |
12 |
|
|
import javax.xml.transform.TransformerFactory; |
13 |
|
|
import javax.xml.transform.dom.DOMSource; |
14 |
|
|
import javax.xml.transform.stream.StreamResult; |
15 |
|
|
|
16 |
|
|
import org.apache.log4j.Logger; |
17 |
|
|
import org.w3c.dom.Document; |
18 |
|
|
import org.w3c.dom.Element; |
19 |
|
|
|
20 |
|
|
/** |
21 |
|
|
* This class provides an easy way to list all pages of your site (including |
22 |
|
|
* generates ones) ina sitemap.xml. See http://sitemaps.org/protocol.php for the |
23 |
|
|
* XML specification. |
24 |
|
|
* |
25 |
|
|
* <br/> |
26 |
|
|
* XML output is based on org.w3c.dom |
27 |
|
|
* |
28 |
|
|
* |
29 |
|
|
* @author Stefan Tzeggai |
30 |
|
|
*/ |
31 |
|
|
public class Sitemap { |
32 |
|
|
|
33 |
|
|
Logger log = Logger.getLogger(Sitemap.class); |
34 |
|
|
|
35 |
|
|
private Document document; |
36 |
|
|
|
37 |
|
|
final private Element urlsetElement; |
38 |
|
|
|
39 |
|
|
/** |
40 |
|
|
* Returns the number of URLs. May not be more than 50,000 by spec. |
41 |
|
|
*/ |
42 |
|
|
private int size = 0; |
43 |
|
|
|
44 |
|
|
final static String NSURL = "http://www.sitemaps.org/schemas/sitemap/0.9"; |
45 |
|
|
|
46 |
|
|
public enum CHANGEFREQ { |
47 |
|
|
monthly, daily, weekly |
48 |
|
|
} |
49 |
|
|
|
50 |
|
|
public Sitemap() { |
51 |
|
|
|
52 |
|
|
try { |
53 |
|
|
|
54 |
|
|
// Create a DOM builder and parse the fragment |
55 |
|
|
final DocumentBuilderFactory factory = DocumentBuilderFactory |
56 |
|
|
.newInstance(); |
57 |
|
|
document = factory.newDocumentBuilder().newDocument(); |
58 |
|
|
|
59 |
|
|
// XML root element |
60 |
|
|
urlsetElement = document.createElementNS(NSURL, "urlset"); |
61 |
|
|
document.appendChild(urlsetElement); |
62 |
|
|
|
63 |
|
|
// // Linking this XML to the AtlasML Schema |
64 |
|
|
// final Attr namespaces = document.createAttributeNS( |
65 |
|
|
// "http://www.w3.org/2001/XMLSchema-instance", "schemaLocation"); |
66 |
|
|
// namespaces |
67 |
|
|
// .setValue(NSURL+" http://localhost:" |
68 |
|
|
// + Webserver.DEFAULTPORT |
69 |
|
|
// + "/skrueger/atlas/resource/AtlasML.xsd"); |
70 |
|
|
// urlset.setAttributeNode(namespaces); |
71 |
|
|
|
72 |
|
|
} catch (Exception e) { |
73 |
|
|
throw new RuntimeException( |
74 |
|
|
"Sitemap org.w3c.xml newDocumentBuilder failed:", e); |
75 |
|
|
} |
76 |
|
|
} |
77 |
|
|
|
78 |
|
|
/** |
79 |
|
|
* |
80 |
|
|
* @param locString |
81 |
|
|
* @param lastmod |
82 |
|
|
* when was the page modified the last time? May be |
83 |
|
|
* <code>null</code>. |
84 |
|
|
* @param changefreq |
85 |
|
|
* How often does the page change. May be <code>null</code>. |
86 |
|
|
* @param priority |
87 |
|
|
* Priority 0.0 - 1.0. May be <code>null</code>. |
88 |
|
|
* @return <code>true</code> if the {@link Sitemap} contains less than 50000 |
89 |
|
|
* urls and the page was added. |
90 |
|
|
*/ |
91 |
|
|
public boolean addUrl(String locString, Date lastmod, |
92 |
|
|
CHANGEFREQ changefreq, Double priority) { |
93 |
|
|
size++; |
94 |
|
|
if (size >= 50000) |
95 |
|
|
return false; |
96 |
|
|
|
97 |
|
|
Element urlElement = document.createElement("url"); |
98 |
|
|
|
99 |
|
|
if (locString == null) |
100 |
|
|
throw new IllegalArgumentException("location must be provided"); |
101 |
|
|
else { |
102 |
|
|
Element e = document.createElement("loc"); |
103 |
|
|
e.appendChild(document.createTextNode(locString)); |
104 |
|
|
urlElement.appendChild(e); |
105 |
|
|
} |
106 |
|
|
|
107 |
|
|
if (lastmod != null) { |
108 |
|
|
Element e = document.createElement("lastmod"); |
109 |
|
|
e.appendChild(document.createTextNode(lastmod.toString())); |
110 |
|
|
urlElement.appendChild(e); |
111 |
|
|
} |
112 |
|
|
|
113 |
|
|
if (changefreq != null) { |
114 |
|
|
Element e = document.createElement("changefreq"); |
115 |
|
|
e.appendChild(document.createTextNode(changefreq.toString())); |
116 |
|
|
urlElement.appendChild(e); |
117 |
|
|
} |
118 |
|
|
|
119 |
|
|
if (priority != null) { |
120 |
|
|
Element e = document.createElement("priority"); |
121 |
|
|
e.appendChild(document.createTextNode(priority.toString())); |
122 |
|
|
urlElement.appendChild(e); |
123 |
|
|
} |
124 |
|
|
urlsetElement.appendChild(urlElement); |
125 |
|
|
return true; |
126 |
|
|
} |
127 |
|
|
|
128 |
|
|
public Document getDocument() { |
129 |
|
|
return document; |
130 |
|
|
} |
131 |
|
|
|
132 |
|
|
public String getXmlString() { |
133 |
|
|
StringWriter stringWriter = new StringWriter(); |
134 |
|
|
outputToWriter(stringWriter); |
135 |
|
|
return stringWriter.toString(); |
136 |
|
|
} |
137 |
|
|
|
138 |
|
|
/** |
139 |
|
|
* Can be used to write to a File with {@link FileWriter}. |
140 |
|
|
*/ |
141 |
|
|
public void outputToWriter(Writer stringWriter) { |
142 |
|
|
try { |
143 |
|
|
|
144 |
|
|
try { // close outputStreamWriter.close(); |
145 |
|
|
|
146 |
|
|
// **************************************************************************** |
147 |
|
|
// Create the XML |
148 |
|
|
// **************************************************************************** |
149 |
|
|
final Result result = new StreamResult(stringWriter); |
150 |
|
|
|
151 |
|
|
// with indenting to make it human-readable |
152 |
|
|
final TransformerFactory tf = TransformerFactory.newInstance(); |
153 |
|
|
|
154 |
|
|
// TODO Ging mit xerces, geht nicht mehr mit xalan ?! |
155 |
|
|
// tf.setAttribute("indent-number", new Integer(2)); |
156 |
|
|
|
157 |
|
|
final Transformer xformer = tf.newTransformer(); |
158 |
|
|
xformer.setOutputProperty(OutputKeys.INDENT, "yes"); |
159 |
|
|
xformer.setOutputProperty( |
160 |
|
|
"{http://xml.apache.org/xalan}indent-amount", "2"); |
161 |
|
|
|
162 |
|
|
// Write the DOM document to the file |
163 |
|
|
xformer.transform(new DOMSource(document), result); |
164 |
|
|
|
165 |
|
|
} finally { |
166 |
|
|
stringWriter.close(); |
167 |
|
|
} |
168 |
|
|
|
169 |
|
|
} catch (Exception e) { |
170 |
|
|
log.error("Failed to create sitemap.XML-String", e); |
171 |
|
|
throw new RuntimeException(e); |
172 |
|
|
} |
173 |
|
|
} |
174 |
|
|
|
175 |
|
|
public int getSize() { |
176 |
|
|
return size; |
177 |
|
|
} |
178 |
|
|
} |