/[schmitzm]/branches/2.4.x/src/skrueger/Sitemap.java
ViewVC logotype

Contents of /branches/2.4.x/src/skrueger/Sitemap.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1324 - (show annotations)
Wed Dec 1 13:26:03 2010 UTC (14 years, 3 months ago) by alfonx
Original Path: trunk/src/skrueger/Sitemap.java
File MIME type: text/plain
File size: 5709 byte(s)
Moved testing resources to testresources (moving otwards astandard maven directory structure very and very slowly carefully)
1 package skrueger;
2
3 import java.io.FileWriter;
4 import java.io.StringWriter;
5 import java.io.Writer;
6 import java.net.URL;
7 import java.net.URLEncoder;
8 import java.util.Date;
9
10 import javax.xml.parsers.DocumentBuilderFactory;
11 import javax.xml.transform.OutputKeys;
12 import javax.xml.transform.Result;
13 import javax.xml.transform.Transformer;
14 import javax.xml.transform.TransformerFactory;
15 import javax.xml.transform.dom.DOMSource;
16 import javax.xml.transform.stream.StreamResult;
17
18 import org.apache.log4j.Logger;
19 import org.w3c.dom.Document;
20 import org.w3c.dom.Element;
21
22 /**
23 * This class provides an easy way to list all pages of your site (including
24 * generates ones) ina sitemap.xml. See http://sitemaps.org/protocol.php for the
25 * XML specification.
26 *
27 * <br/>
28 * XML output is based on org.w3c.dom
29 *
30 *
31 * @author Stefan Tzeggai
32 */
33 public class Sitemap {
34
35 static String[] SEARCHENGINES = new String[] {
36 "http://www.sitemapwriter.com/notify.php?crawler=all&url=",
37 "http://submissions.ask.com/ping?sitemap=",
38 "http://www.google.com/webmasters/tools/ping?sitemap=",
39 "http://api.moreover.com/ping?u=",
40 "http://www.bing.com/webmaster/ping.aspx?siteMap=",
41 "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=" };
42
43 final static Logger log = Logger.getLogger(Sitemap.class);
44
45 private Document document;
46
47 final private Element urlsetElement;
48
49 /**
50 * Returns the number of URLs. May not be more than 50,000 by spec.
51 */
52 private int size = 0;
53
54 final static String NSURL = "http://www.sitemaps.org/schemas/sitemap/0.9";
55
56 public enum CHANGEFREQ {
57 monthly, daily, weekly
58 }
59
60 public Sitemap() {
61
62 try {
63
64 // Create a DOM builder and parse the fragment
65 final DocumentBuilderFactory factory = DocumentBuilderFactory
66 .newInstance();
67 document = factory.newDocumentBuilder().newDocument();
68
69 // XML root element
70 urlsetElement = document.createElementNS(NSURL, "urlset");
71 document.appendChild(urlsetElement);
72
73 // // Linking this XML to the AtlasML Schema
74 // final Attr namespaces = document.createAttributeNS(
75 // "http://www.w3.org/2001/XMLSchema-instance", "schemaLocation");
76 // namespaces
77 // .setValue(NSURL+" http://localhost:"
78 // + Webserver.DEFAULTPORT
79 // + "/skrueger/atlas/resource/AtlasML.xsd");
80 // urlset.setAttributeNode(namespaces);
81
82 } catch (Exception e) {
83 throw new RuntimeException(
84 "Sitemap org.w3c.xml newDocumentBuilder failed:", e);
85 }
86 }
87
88 /**
89 *
90 * @param locString
91 * @param lastmod
92 * when was the page modified the last time? May be
93 * <code>null</code>.
94 * @param changefreq
95 * How often does the page change. May be <code>null</code>.
96 * @param priority
97 * Priority 0.0 - 1.0. May be <code>null</code>.
98 * @return <code>true</code> if the {@link Sitemap} contains less than 50000
99 * urls and the page was added.
100 */
101 public boolean addUrl(String locString, Date lastmod,
102 CHANGEFREQ changefreq, Double priority) {
103 size++;
104 if (size >= 50000)
105 return false;
106
107 Element urlElement = document.createElement("url");
108
109 if (locString == null)
110 throw new IllegalArgumentException("location must be provided");
111 else {
112 Element e = document.createElement("loc");
113 e.appendChild(document.createTextNode(locString));
114 urlElement.appendChild(e);
115 }
116
117 if (lastmod != null) {
118 Element e = document.createElement("lastmod");
119 e.appendChild(document.createTextNode(lastmod.toString()));
120 urlElement.appendChild(e);
121 }
122
123 if (changefreq != null) {
124 Element e = document.createElement("changefreq");
125 e.appendChild(document.createTextNode(changefreq.toString()));
126 urlElement.appendChild(e);
127 }
128
129 if (priority != null) {
130 Element e = document.createElement("priority");
131 e.appendChild(document.createTextNode(priority.toString()));
132 urlElement.appendChild(e);
133 }
134 urlsetElement.appendChild(urlElement);
135 return true;
136 }
137
138 public Document getDocument() {
139 return document;
140 }
141
142 public String getXmlString() {
143 StringWriter stringWriter = new StringWriter();
144 outputToWriter(stringWriter);
145 return stringWriter.toString();
146 }
147
148 /**
149 * Can be used to write to a File with {@link FileWriter}.
150 */
151 public void outputToWriter(Writer stringWriter) {
152 try {
153
154 try { // close outputStreamWriter.close();
155
156 // ****************************************************************************
157 // Create the XML
158 // ****************************************************************************
159 final Result result = new StreamResult(stringWriter);
160
161 // with indenting to make it human-readable
162 final TransformerFactory tf = TransformerFactory.newInstance();
163
164 // TODO Ging mit xerces, geht nicht mehr mit xalan ?!
165 // tf.setAttribute("indent-number", new Integer(2));
166
167 final Transformer xformer = tf.newTransformer();
168 xformer.setOutputProperty(OutputKeys.INDENT, "yes");
169 xformer.setOutputProperty(
170 "{http://xml.apache.org/xalan}indent-amount", "2");
171
172 // Write the DOM document to the file
173 xformer.transform(new DOMSource(document), result);
174
175 } finally {
176 stringWriter.close();
177 }
178
179 } catch (Exception e) {
180 log.error("Failed to create sitemap.XML-String", e);
181 throw new RuntimeException(e);
182 }
183 }
184
185 public int getSize() {
186 return size;
187 }
188
189 public static void submitToSearchEngines(String urlToSitemap) {
190 for (String se : SEARCHENGINES) {
191 log.info("Submitting " + urlToSitemap + " to " + se);
192 try {
193 URL url = new URL(se + urlToSitemap);
194
195 urlToSitemap = URLEncoder.encode(urlToSitemap);
196
197 url.openConnection().connect();
198
199 } catch (Exception e) {
200 log.error("failed to submit " + urlToSitemap + " to " + se, e);
201 }
202 }
203 }
204 }

Properties

Name Value
svn:eol-style native
svn:keywords Id URL
svn:mime-type text/plain

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26