/[schmitzm]/trunk/src/skrueger/Sitemap.java
ViewVC logotype

Contents of /trunk/src/skrueger/Sitemap.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1320 - (show annotations)
Wed Dec 1 01:14:59 2010 UTC (14 years, 3 months ago) by alfonx
File MIME type: text/plain
File size: 5695 byte(s)


1 package skrueger;
2
3 import java.io.FileWriter;
4 import java.io.IOException;
5 import java.io.StringWriter;
6 import java.io.Writer;
7 import java.net.MalformedURLException;
8 import java.net.URL;
9 import java.util.Date;
10
11 import javax.xml.parsers.DocumentBuilderFactory;
12 import javax.xml.transform.OutputKeys;
13 import javax.xml.transform.Result;
14 import javax.xml.transform.Transformer;
15 import javax.xml.transform.TransformerFactory;
16 import javax.xml.transform.dom.DOMSource;
17 import javax.xml.transform.stream.StreamResult;
18
19 import org.apache.log4j.Logger;
20 import org.w3c.dom.Document;
21 import org.w3c.dom.Element;
22
23 /**
24 * This class provides an easy way to list all pages of your site (including
25 * generates ones) ina sitemap.xml. See http://sitemaps.org/protocol.php for the
26 * XML specification.
27 *
28 * <br/>
29 * XML output is based on org.w3c.dom
30 *
31 *
32 * @author Stefan Tzeggai
33 */
34 public class Sitemap {
35
36 static String[] SEARCHENGINES = new String[] {
37 "http://www.sitemapwriter.com/notify.php?crawler=all&url=",
38 "http://submissions.ask.com/ping?sitemap=",
39 "http://www.google.com/webmasters/tools/ping?sitemap=",
40 "http://api.moreover.com/ping?u=",
41 "http://www.bing.com/webmaster/ping.aspx?siteMap=",
42 "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=" };
43
44 final static Logger log = Logger.getLogger(Sitemap.class);
45
46 private Document document;
47
48 final private Element urlsetElement;
49
50 /**
51 * Returns the number of URLs. May not be more than 50,000 by spec.
52 */
53 private int size = 0;
54
55 final static String NSURL = "http://www.sitemaps.org/schemas/sitemap/0.9";
56
57 public enum CHANGEFREQ {
58 monthly, daily, weekly
59 }
60
61 public Sitemap() {
62
63 try {
64
65 // Create a DOM builder and parse the fragment
66 final DocumentBuilderFactory factory = DocumentBuilderFactory
67 .newInstance();
68 document = factory.newDocumentBuilder().newDocument();
69
70 // XML root element
71 urlsetElement = document.createElementNS(NSURL, "urlset");
72 document.appendChild(urlsetElement);
73
74 // // Linking this XML to the AtlasML Schema
75 // final Attr namespaces = document.createAttributeNS(
76 // "http://www.w3.org/2001/XMLSchema-instance", "schemaLocation");
77 // namespaces
78 // .setValue(NSURL+" http://localhost:"
79 // + Webserver.DEFAULTPORT
80 // + "/skrueger/atlas/resource/AtlasML.xsd");
81 // urlset.setAttributeNode(namespaces);
82
83 } catch (Exception e) {
84 throw new RuntimeException(
85 "Sitemap org.w3c.xml newDocumentBuilder failed:", e);
86 }
87 }
88
89 /**
90 *
91 * @param locString
92 * @param lastmod
93 * when was the page modified the last time? May be
94 * <code>null</code>.
95 * @param changefreq
96 * How often does the page change. May be <code>null</code>.
97 * @param priority
98 * Priority 0.0 - 1.0. May be <code>null</code>.
99 * @return <code>true</code> if the {@link Sitemap} contains less than 50000
100 * urls and the page was added.
101 */
102 public boolean addUrl(String locString, Date lastmod,
103 CHANGEFREQ changefreq, Double priority) {
104 size++;
105 if (size >= 50000)
106 return false;
107
108 Element urlElement = document.createElement("url");
109
110 if (locString == null)
111 throw new IllegalArgumentException("location must be provided");
112 else {
113 Element e = document.createElement("loc");
114 e.appendChild(document.createTextNode(locString));
115 urlElement.appendChild(e);
116 }
117
118 if (lastmod != null) {
119 Element e = document.createElement("lastmod");
120 e.appendChild(document.createTextNode(lastmod.toString()));
121 urlElement.appendChild(e);
122 }
123
124 if (changefreq != null) {
125 Element e = document.createElement("changefreq");
126 e.appendChild(document.createTextNode(changefreq.toString()));
127 urlElement.appendChild(e);
128 }
129
130 if (priority != null) {
131 Element e = document.createElement("priority");
132 e.appendChild(document.createTextNode(priority.toString()));
133 urlElement.appendChild(e);
134 }
135 urlsetElement.appendChild(urlElement);
136 return true;
137 }
138
139 public Document getDocument() {
140 return document;
141 }
142
143 public String getXmlString() {
144 StringWriter stringWriter = new StringWriter();
145 outputToWriter(stringWriter);
146 return stringWriter.toString();
147 }
148
149 /**
150 * Can be used to write to a File with {@link FileWriter}.
151 */
152 public void outputToWriter(Writer stringWriter) {
153 try {
154
155 try { // close outputStreamWriter.close();
156
157 // ****************************************************************************
158 // Create the XML
159 // ****************************************************************************
160 final Result result = new StreamResult(stringWriter);
161
162 // with indenting to make it human-readable
163 final TransformerFactory tf = TransformerFactory.newInstance();
164
165 // TODO Ging mit xerces, geht nicht mehr mit xalan ?!
166 // tf.setAttribute("indent-number", new Integer(2));
167
168 final Transformer xformer = tf.newTransformer();
169 xformer.setOutputProperty(OutputKeys.INDENT, "yes");
170 xformer.setOutputProperty(
171 "{http://xml.apache.org/xalan}indent-amount", "2");
172
173 // Write the DOM document to the file
174 xformer.transform(new DOMSource(document), result);
175
176 } finally {
177 stringWriter.close();
178 }
179
180 } catch (Exception e) {
181 log.error("Failed to create sitemap.XML-String", e);
182 throw new RuntimeException(e);
183 }
184 }
185
186 public int getSize() {
187 return size;
188 }
189
190 public static void submitToSearchEngines(String urlToSitemap) {
191 for (String se : SEARCHENGINES) {
192 log.info("Submitting " + urlToSitemap + " to " + se);
193 try {
194 URL url = new URL(se + urlToSitemap);
195
196 url.openConnection().connect();
197
198 } catch (Exception e) {
199 log.error("failed to submit " + urlToSitemap + " to " + se, e);
200 }
201 }
202 }
203 }

Properties

Name Value
svn:eol-style native
svn:keywords Id URL
svn:mime-type text/plain

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26