/[schmitzm]/branches/2.4.x/src/skrueger/Sitemap.java
ViewVC logotype

Annotation of /branches/2.4.x/src/skrueger/Sitemap.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1321 - (hide annotations)
Wed Dec 1 01:33:22 2010 UTC (14 years, 3 months ago) by alfonx
Original Path: trunk/src/skrueger/Sitemap.java
File MIME type: text/plain
File size: 5834 byte(s)


1 alfonx 1317 package skrueger;
2    
3     import java.io.FileWriter;
4 alfonx 1319 import java.io.IOException;
5 alfonx 1317 import java.io.StringWriter;
6     import java.io.Writer;
7 alfonx 1319 import java.net.MalformedURLException;
8     import java.net.URL;
9 alfonx 1321 import java.net.URLEncoder;
10 alfonx 1317 import java.util.Date;
11    
12     import javax.xml.parsers.DocumentBuilderFactory;
13     import javax.xml.transform.OutputKeys;
14     import javax.xml.transform.Result;
15     import javax.xml.transform.Transformer;
16     import javax.xml.transform.TransformerFactory;
17     import javax.xml.transform.dom.DOMSource;
18     import javax.xml.transform.stream.StreamResult;
19    
20 alfonx 1321 import net.sf.antcontrib.property.URLEncodeTask;
21    
22 alfonx 1317 import org.apache.log4j.Logger;
23     import org.w3c.dom.Document;
24     import org.w3c.dom.Element;
25    
26     /**
27     * This class provides an easy way to list all pages of your site (including
28     * generates ones) ina sitemap.xml. See http://sitemaps.org/protocol.php for the
29     * XML specification.
30     *
31     * <br/>
32     * XML output is based on org.w3c.dom
33     *
34     *
35     * @author Stefan Tzeggai
36     */
37     public class Sitemap {
38    
39 alfonx 1320 static String[] SEARCHENGINES = new String[] {
40     "http://www.sitemapwriter.com/notify.php?crawler=all&url=",
41     "http://submissions.ask.com/ping?sitemap=",
42     "http://www.google.com/webmasters/tools/ping?sitemap=",
43     "http://api.moreover.com/ping?u=",
44     "http://www.bing.com/webmaster/ping.aspx?siteMap=",
45     "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=" };
46 alfonx 1317
47 alfonx 1319 final static Logger log = Logger.getLogger(Sitemap.class);
48    
49 alfonx 1317 private Document document;
50    
51     final private Element urlsetElement;
52    
53     /**
54     * Returns the number of URLs. May not be more than 50,000 by spec.
55     */
56     private int size = 0;
57    
58     final static String NSURL = "http://www.sitemaps.org/schemas/sitemap/0.9";
59    
60     public enum CHANGEFREQ {
61     monthly, daily, weekly
62     }
63    
64     public Sitemap() {
65    
66     try {
67    
68     // Create a DOM builder and parse the fragment
69     final DocumentBuilderFactory factory = DocumentBuilderFactory
70     .newInstance();
71     document = factory.newDocumentBuilder().newDocument();
72    
73     // XML root element
74     urlsetElement = document.createElementNS(NSURL, "urlset");
75     document.appendChild(urlsetElement);
76    
77     // // Linking this XML to the AtlasML Schema
78     // final Attr namespaces = document.createAttributeNS(
79     // "http://www.w3.org/2001/XMLSchema-instance", "schemaLocation");
80     // namespaces
81     // .setValue(NSURL+" http://localhost:"
82     // + Webserver.DEFAULTPORT
83     // + "/skrueger/atlas/resource/AtlasML.xsd");
84     // urlset.setAttributeNode(namespaces);
85    
86     } catch (Exception e) {
87     throw new RuntimeException(
88     "Sitemap org.w3c.xml newDocumentBuilder failed:", e);
89     }
90     }
91    
92     /**
93     *
94     * @param locString
95     * @param lastmod
96     * when was the page modified the last time? May be
97     * <code>null</code>.
98     * @param changefreq
99     * How often does the page change. May be <code>null</code>.
100     * @param priority
101     * Priority 0.0 - 1.0. May be <code>null</code>.
102     * @return <code>true</code> if the {@link Sitemap} contains less than 50000
103     * urls and the page was added.
104     */
105     public boolean addUrl(String locString, Date lastmod,
106     CHANGEFREQ changefreq, Double priority) {
107     size++;
108     if (size >= 50000)
109     return false;
110    
111     Element urlElement = document.createElement("url");
112    
113     if (locString == null)
114     throw new IllegalArgumentException("location must be provided");
115     else {
116     Element e = document.createElement("loc");
117     e.appendChild(document.createTextNode(locString));
118     urlElement.appendChild(e);
119     }
120    
121     if (lastmod != null) {
122     Element e = document.createElement("lastmod");
123     e.appendChild(document.createTextNode(lastmod.toString()));
124     urlElement.appendChild(e);
125     }
126    
127     if (changefreq != null) {
128     Element e = document.createElement("changefreq");
129     e.appendChild(document.createTextNode(changefreq.toString()));
130     urlElement.appendChild(e);
131     }
132    
133     if (priority != null) {
134     Element e = document.createElement("priority");
135     e.appendChild(document.createTextNode(priority.toString()));
136     urlElement.appendChild(e);
137     }
138     urlsetElement.appendChild(urlElement);
139     return true;
140     }
141    
142     public Document getDocument() {
143     return document;
144     }
145    
146     public String getXmlString() {
147     StringWriter stringWriter = new StringWriter();
148     outputToWriter(stringWriter);
149     return stringWriter.toString();
150     }
151    
152     /**
153     * Can be used to write to a File with {@link FileWriter}.
154     */
155     public void outputToWriter(Writer stringWriter) {
156     try {
157    
158     try { // close outputStreamWriter.close();
159    
160     // ****************************************************************************
161     // Create the XML
162     // ****************************************************************************
163     final Result result = new StreamResult(stringWriter);
164    
165     // with indenting to make it human-readable
166     final TransformerFactory tf = TransformerFactory.newInstance();
167    
168 alfonx 1319 // TODO Ging mit xerces, geht nicht mehr mit xalan ?!
169 alfonx 1317 // tf.setAttribute("indent-number", new Integer(2));
170    
171     final Transformer xformer = tf.newTransformer();
172     xformer.setOutputProperty(OutputKeys.INDENT, "yes");
173     xformer.setOutputProperty(
174     "{http://xml.apache.org/xalan}indent-amount", "2");
175    
176     // Write the DOM document to the file
177     xformer.transform(new DOMSource(document), result);
178    
179     } finally {
180     stringWriter.close();
181     }
182    
183     } catch (Exception e) {
184     log.error("Failed to create sitemap.XML-String", e);
185     throw new RuntimeException(e);
186     }
187     }
188    
189     public int getSize() {
190     return size;
191     }
192 alfonx 1319
193 alfonx 1320 public static void submitToSearchEngines(String urlToSitemap) {
194 alfonx 1319 for (String se : SEARCHENGINES) {
195     log.info("Submitting " + urlToSitemap + " to " + se);
196     try {
197     URL url = new URL(se + urlToSitemap);
198 alfonx 1320
199 alfonx 1321 urlToSitemap = URLEncoder.encode(urlToSitemap, "UTF8");
200    
201 alfonx 1319 url.openConnection().connect();
202 alfonx 1320
203 alfonx 1319 } catch (Exception e) {
204     log.error("failed to submit " + urlToSitemap + " to " + se, e);
205     }
206     }
207     }
208 alfonx 1317 }

Properties

Name Value
svn:eol-style native
svn:keywords Id URL
svn:mime-type text/plain

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26