/[schmitzm]/trunk/src/skrueger/Sitemap.java
ViewVC logotype

Contents of /trunk/src/skrueger/Sitemap.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1326 - (show annotations)
Wed Dec 1 14:35:53 2010 UTC (14 years, 3 months ago) by alfonx
File MIME type: text/plain
File size: 6244 byte(s)


1 package skrueger;
2
3 import java.io.FileWriter;
4 import java.io.StringWriter;
5 import java.io.Writer;
6 import java.net.HttpURLConnection;
7 import java.net.URL;
8 import java.net.URLConnection;
9 import java.net.URLEncoder;
10 import java.util.Date;
11
12 import javax.xml.parsers.DocumentBuilderFactory;
13 import javax.xml.transform.OutputKeys;
14 import javax.xml.transform.Result;
15 import javax.xml.transform.Transformer;
16 import javax.xml.transform.TransformerFactory;
17 import javax.xml.transform.dom.DOMSource;
18 import javax.xml.transform.stream.StreamResult;
19
20 import org.apache.log4j.Logger;
21 import org.w3c.dom.Document;
22 import org.w3c.dom.Element;
23
24 /**
25 * This class provides an easy way to list all pages of your site (including
26 * generates ones) ina sitemap.xml. See http://sitemaps.org/protocol.php for the
27 * XML specification.
28 *
29 * <br/>
30 * XML output is based on org.w3c.dom
31 *
32 *
33 * @author Stefan Tzeggai
34 */
35 public class Sitemap {
36
37 static String[] SEARCHENGINES = new String[] {
38 "http://www.sitemapwriter.com/notify.php?crawler=all&url=",
39 "http://submissions.ask.com/ping?sitemap=",
40 "http://www.google.com/webmasters/tools/ping?sitemap=",
41 "http://api.moreover.com/ping?u=",
42 "http://www.bing.com/webmaster/ping.aspx?siteMap=",
43 "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=" };
44
45 final static Logger log = Logger.getLogger(Sitemap.class);
46
47 private Document document;
48
49 final private Element urlsetElement;
50
51 /**
52 * Returns the number of URLs. May not be more than 50,000 by spec.
53 */
54 private int size = 0;
55
56 final static String NSURL = "http://www.sitemaps.org/schemas/sitemap/0.9";
57
58 public enum CHANGEFREQ {
59 monthly, daily, weekly
60 }
61
62 public Sitemap() {
63
64 try {
65
66 // Create a DOM builder and parse the fragment
67 final DocumentBuilderFactory factory = DocumentBuilderFactory
68 .newInstance();
69 document = factory.newDocumentBuilder().newDocument();
70
71 // XML root element
72 urlsetElement = document.createElementNS(NSURL, "urlset");
73 document.appendChild(urlsetElement);
74
75 // // Linking this XML to the AtlasML Schema
76 // final Attr namespaces = document.createAttributeNS(
77 // "http://www.w3.org/2001/XMLSchema-instance", "schemaLocation");
78 // namespaces
79 // .setValue(NSURL+" http://localhost:"
80 // + Webserver.DEFAULTPORT
81 // + "/skrueger/atlas/resource/AtlasML.xsd");
82 // urlset.setAttributeNode(namespaces);
83
84 } catch (Exception e) {
85 throw new RuntimeException(
86 "Sitemap org.w3c.xml newDocumentBuilder failed:", e);
87 }
88 }
89
90 /**
91 *
92 * @param locString
93 * @param lastmod
94 * when was the page modified the last time? May be
95 * <code>null</code>.
96 * @param changefreq
97 * How often does the page change. May be <code>null</code>.
98 * @param priority
99 * Priority 0.0 - 1.0. May be <code>null</code>.
100 * @return <code>true</code> if the {@link Sitemap} contains less than 50000
101 * urls and the page was added.
102 */
103 public boolean addUrl(String locString, Date lastmod,
104 CHANGEFREQ changefreq, Double priority) {
105 size++;
106 if (size >= 50000)
107 return false;
108
109 Element urlElement = document.createElement("url");
110
111 if (locString == null)
112 throw new IllegalArgumentException("location must be provided");
113 else {
114 Element e = document.createElement("loc");
115 e.appendChild(document.createTextNode(locString));
116 urlElement.appendChild(e);
117 }
118
119 if (lastmod != null) {
120 Element e = document.createElement("lastmod");
121 e.appendChild(document.createTextNode(lastmod.toString()));
122 urlElement.appendChild(e);
123 }
124
125 if (changefreq != null) {
126 Element e = document.createElement("changefreq");
127 e.appendChild(document.createTextNode(changefreq.toString()));
128 urlElement.appendChild(e);
129 }
130
131 if (priority != null) {
132 Element e = document.createElement("priority");
133 e.appendChild(document.createTextNode(priority.toString()));
134 urlElement.appendChild(e);
135 }
136 urlsetElement.appendChild(urlElement);
137 return true;
138 }
139
140 public Document getDocument() {
141 return document;
142 }
143
144 public String getXmlString() {
145 StringWriter stringWriter = new StringWriter();
146 outputToWriter(stringWriter);
147 return stringWriter.toString();
148 }
149
150 /**
151 * Can be used to write to a File with {@link FileWriter}.
152 */
153 public void outputToWriter(Writer stringWriter) {
154 try {
155
156 try { // close outputStreamWriter.close();
157
158 // ****************************************************************************
159 // Create the XML
160 // ****************************************************************************
161 final Result result = new StreamResult(stringWriter);
162
163 // with indenting to make it human-readable
164 final TransformerFactory tf = TransformerFactory.newInstance();
165
166 // TODO Ging mit xerces, geht nicht mehr mit xalan ?!
167 // tf.setAttribute("indent-number", new Integer(2));
168
169 final Transformer xformer = tf.newTransformer();
170 xformer.setOutputProperty(OutputKeys.INDENT, "yes");
171 xformer.setOutputProperty(
172 "{http://xml.apache.org/xalan}indent-amount", "2");
173
174 // Write the DOM document to the file
175 xformer.transform(new DOMSource(document), result);
176
177 } finally {
178 stringWriter.close();
179 }
180
181 } catch (Exception e) {
182 log.error("Failed to create sitemap.XML-String", e);
183 throw new RuntimeException(e);
184 }
185 }
186
187 public int getSize() {
188 return size;
189 }
190
191 public static void submitToSearchEngines(String urlToSitemap) {
192 try {
193
194 urlToSitemap = URLEncoder.encode(urlToSitemap, "UTF-8");
195 for (String se : SEARCHENGINES) {
196 log.info("Submitting " + urlToSitemap + " to " + se);
197 try {
198 URL url = new URL(se + urlToSitemap);
199
200 URLConnection openConnection = url.openConnection();
201 if (openConnection instanceof HttpURLConnection) {
202 HttpURLConnection httpconnection = ((HttpURLConnection) openConnection);
203 if (httpconnection.getResponseCode() != 200) {
204 log.error(se + " returned ResponseCode "
205 + httpconnection.getResponseCode()
206 + " for " + urlToSitemap);
207 }
208 }
209
210 } catch (Exception e) {
211 log.error("failed to submit " + urlToSitemap + " to " + se,
212 e);
213 }
214 }
215 } catch (Exception e) {
216 log.error("Error URL encoding " + urlToSitemap, e);
217 }
218 }
219 }

Properties

Name Value
svn:eol-style native
svn:keywords Id URL
svn:mime-type text/plain

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26