/[schmitzm]/branches/2.4.x/src/skrueger/Sitemap.java
ViewVC logotype

Contents of /branches/2.4.x/src/skrueger/Sitemap.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1321 - (show annotations)
Wed Dec 1 01:33:22 2010 UTC (14 years, 3 months ago) by alfonx
Original Path: trunk/src/skrueger/Sitemap.java
File MIME type: text/plain
File size: 5834 byte(s)


1 package skrueger;
2
3 import java.io.FileWriter;
4 import java.io.IOException;
5 import java.io.StringWriter;
6 import java.io.Writer;
7 import java.net.MalformedURLException;
8 import java.net.URL;
9 import java.net.URLEncoder;
10 import java.util.Date;
11
12 import javax.xml.parsers.DocumentBuilderFactory;
13 import javax.xml.transform.OutputKeys;
14 import javax.xml.transform.Result;
15 import javax.xml.transform.Transformer;
16 import javax.xml.transform.TransformerFactory;
17 import javax.xml.transform.dom.DOMSource;
18 import javax.xml.transform.stream.StreamResult;
19
20 import net.sf.antcontrib.property.URLEncodeTask;
21
22 import org.apache.log4j.Logger;
23 import org.w3c.dom.Document;
24 import org.w3c.dom.Element;
25
26 /**
27 * This class provides an easy way to list all pages of your site (including
28 * generates ones) ina sitemap.xml. See http://sitemaps.org/protocol.php for the
29 * XML specification.
30 *
31 * <br/>
32 * XML output is based on org.w3c.dom
33 *
34 *
35 * @author Stefan Tzeggai
36 */
37 public class Sitemap {
38
39 static String[] SEARCHENGINES = new String[] {
40 "http://www.sitemapwriter.com/notify.php?crawler=all&url=",
41 "http://submissions.ask.com/ping?sitemap=",
42 "http://www.google.com/webmasters/tools/ping?sitemap=",
43 "http://api.moreover.com/ping?u=",
44 "http://www.bing.com/webmaster/ping.aspx?siteMap=",
45 "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=" };
46
47 final static Logger log = Logger.getLogger(Sitemap.class);
48
49 private Document document;
50
51 final private Element urlsetElement;
52
53 /**
54 * Returns the number of URLs. May not be more than 50,000 by spec.
55 */
56 private int size = 0;
57
58 final static String NSURL = "http://www.sitemaps.org/schemas/sitemap/0.9";
59
60 public enum CHANGEFREQ {
61 monthly, daily, weekly
62 }
63
64 public Sitemap() {
65
66 try {
67
68 // Create a DOM builder and parse the fragment
69 final DocumentBuilderFactory factory = DocumentBuilderFactory
70 .newInstance();
71 document = factory.newDocumentBuilder().newDocument();
72
73 // XML root element
74 urlsetElement = document.createElementNS(NSURL, "urlset");
75 document.appendChild(urlsetElement);
76
77 // // Linking this XML to the AtlasML Schema
78 // final Attr namespaces = document.createAttributeNS(
79 // "http://www.w3.org/2001/XMLSchema-instance", "schemaLocation");
80 // namespaces
81 // .setValue(NSURL+" http://localhost:"
82 // + Webserver.DEFAULTPORT
83 // + "/skrueger/atlas/resource/AtlasML.xsd");
84 // urlset.setAttributeNode(namespaces);
85
86 } catch (Exception e) {
87 throw new RuntimeException(
88 "Sitemap org.w3c.xml newDocumentBuilder failed:", e);
89 }
90 }
91
92 /**
93 *
94 * @param locString
95 * @param lastmod
96 * when was the page modified the last time? May be
97 * <code>null</code>.
98 * @param changefreq
99 * How often does the page change. May be <code>null</code>.
100 * @param priority
101 * Priority 0.0 - 1.0. May be <code>null</code>.
102 * @return <code>true</code> if the {@link Sitemap} contains less than 50000
103 * urls and the page was added.
104 */
105 public boolean addUrl(String locString, Date lastmod,
106 CHANGEFREQ changefreq, Double priority) {
107 size++;
108 if (size >= 50000)
109 return false;
110
111 Element urlElement = document.createElement("url");
112
113 if (locString == null)
114 throw new IllegalArgumentException("location must be provided");
115 else {
116 Element e = document.createElement("loc");
117 e.appendChild(document.createTextNode(locString));
118 urlElement.appendChild(e);
119 }
120
121 if (lastmod != null) {
122 Element e = document.createElement("lastmod");
123 e.appendChild(document.createTextNode(lastmod.toString()));
124 urlElement.appendChild(e);
125 }
126
127 if (changefreq != null) {
128 Element e = document.createElement("changefreq");
129 e.appendChild(document.createTextNode(changefreq.toString()));
130 urlElement.appendChild(e);
131 }
132
133 if (priority != null) {
134 Element e = document.createElement("priority");
135 e.appendChild(document.createTextNode(priority.toString()));
136 urlElement.appendChild(e);
137 }
138 urlsetElement.appendChild(urlElement);
139 return true;
140 }
141
142 public Document getDocument() {
143 return document;
144 }
145
146 public String getXmlString() {
147 StringWriter stringWriter = new StringWriter();
148 outputToWriter(stringWriter);
149 return stringWriter.toString();
150 }
151
152 /**
153 * Can be used to write to a File with {@link FileWriter}.
154 */
155 public void outputToWriter(Writer stringWriter) {
156 try {
157
158 try { // close outputStreamWriter.close();
159
160 // ****************************************************************************
161 // Create the XML
162 // ****************************************************************************
163 final Result result = new StreamResult(stringWriter);
164
165 // with indenting to make it human-readable
166 final TransformerFactory tf = TransformerFactory.newInstance();
167
168 // TODO Ging mit xerces, geht nicht mehr mit xalan ?!
169 // tf.setAttribute("indent-number", new Integer(2));
170
171 final Transformer xformer = tf.newTransformer();
172 xformer.setOutputProperty(OutputKeys.INDENT, "yes");
173 xformer.setOutputProperty(
174 "{http://xml.apache.org/xalan}indent-amount", "2");
175
176 // Write the DOM document to the file
177 xformer.transform(new DOMSource(document), result);
178
179 } finally {
180 stringWriter.close();
181 }
182
183 } catch (Exception e) {
184 log.error("Failed to create sitemap.XML-String", e);
185 throw new RuntimeException(e);
186 }
187 }
188
189 public int getSize() {
190 return size;
191 }
192
193 public static void submitToSearchEngines(String urlToSitemap) {
194 for (String se : SEARCHENGINES) {
195 log.info("Submitting " + urlToSitemap + " to " + se);
196 try {
197 URL url = new URL(se + urlToSitemap);
198
199 urlToSitemap = URLEncoder.encode(urlToSitemap, "UTF8");
200
201 url.openConnection().connect();
202
203 } catch (Exception e) {
204 log.error("failed to submit " + urlToSitemap + " to " + se, e);
205 }
206 }
207 }
208 }

Properties

Name Value
svn:eol-style native
svn:keywords Id URL
svn:mime-type text/plain

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26