/[schmitzm]/trunk/src/skrueger/Sitemap.java
ViewVC logotype

Annotation of /trunk/src/skrueger/Sitemap.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1320 - (hide annotations)
Wed Dec 1 01:14:59 2010 UTC (14 years, 3 months ago) by alfonx
File MIME type: text/plain
File size: 5695 byte(s)


1 alfonx 1317 package skrueger;
2    
3     import java.io.FileWriter;
4 alfonx 1319 import java.io.IOException;
5 alfonx 1317 import java.io.StringWriter;
6     import java.io.Writer;
7 alfonx 1319 import java.net.MalformedURLException;
8     import java.net.URL;
9 alfonx 1317 import java.util.Date;
10    
11     import javax.xml.parsers.DocumentBuilderFactory;
12     import javax.xml.transform.OutputKeys;
13     import javax.xml.transform.Result;
14     import javax.xml.transform.Transformer;
15     import javax.xml.transform.TransformerFactory;
16     import javax.xml.transform.dom.DOMSource;
17     import javax.xml.transform.stream.StreamResult;
18    
19     import org.apache.log4j.Logger;
20     import org.w3c.dom.Document;
21     import org.w3c.dom.Element;
22    
23     /**
24     * This class provides an easy way to list all pages of your site (including
25     * generates ones) ina sitemap.xml. See http://sitemaps.org/protocol.php for the
26     * XML specification.
27     *
28     * <br/>
29     * XML output is based on org.w3c.dom
30     *
31     *
32     * @author Stefan Tzeggai
33     */
34     public class Sitemap {
35    
36 alfonx 1320 static String[] SEARCHENGINES = new String[] {
37     "http://www.sitemapwriter.com/notify.php?crawler=all&url=",
38     "http://submissions.ask.com/ping?sitemap=",
39     "http://www.google.com/webmasters/tools/ping?sitemap=",
40     "http://api.moreover.com/ping?u=",
41     "http://www.bing.com/webmaster/ping.aspx?siteMap=",
42     "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=" };
43 alfonx 1317
44 alfonx 1319 final static Logger log = Logger.getLogger(Sitemap.class);
45    
46 alfonx 1317 private Document document;
47    
48     final private Element urlsetElement;
49    
50     /**
51     * Returns the number of URLs. May not be more than 50,000 by spec.
52     */
53     private int size = 0;
54    
55     final static String NSURL = "http://www.sitemaps.org/schemas/sitemap/0.9";
56    
57     public enum CHANGEFREQ {
58     monthly, daily, weekly
59     }
60    
61     public Sitemap() {
62    
63     try {
64    
65     // Create a DOM builder and parse the fragment
66     final DocumentBuilderFactory factory = DocumentBuilderFactory
67     .newInstance();
68     document = factory.newDocumentBuilder().newDocument();
69    
70     // XML root element
71     urlsetElement = document.createElementNS(NSURL, "urlset");
72     document.appendChild(urlsetElement);
73    
74     // // Linking this XML to the AtlasML Schema
75     // final Attr namespaces = document.createAttributeNS(
76     // "http://www.w3.org/2001/XMLSchema-instance", "schemaLocation");
77     // namespaces
78     // .setValue(NSURL+" http://localhost:"
79     // + Webserver.DEFAULTPORT
80     // + "/skrueger/atlas/resource/AtlasML.xsd");
81     // urlset.setAttributeNode(namespaces);
82    
83     } catch (Exception e) {
84     throw new RuntimeException(
85     "Sitemap org.w3c.xml newDocumentBuilder failed:", e);
86     }
87     }
88    
89     /**
90     *
91     * @param locString
92     * @param lastmod
93     * when was the page modified the last time? May be
94     * <code>null</code>.
95     * @param changefreq
96     * How often does the page change. May be <code>null</code>.
97     * @param priority
98     * Priority 0.0 - 1.0. May be <code>null</code>.
99     * @return <code>true</code> if the {@link Sitemap} contains less than 50000
100     * urls and the page was added.
101     */
102     public boolean addUrl(String locString, Date lastmod,
103     CHANGEFREQ changefreq, Double priority) {
104     size++;
105     if (size >= 50000)
106     return false;
107    
108     Element urlElement = document.createElement("url");
109    
110     if (locString == null)
111     throw new IllegalArgumentException("location must be provided");
112     else {
113     Element e = document.createElement("loc");
114     e.appendChild(document.createTextNode(locString));
115     urlElement.appendChild(e);
116     }
117    
118     if (lastmod != null) {
119     Element e = document.createElement("lastmod");
120     e.appendChild(document.createTextNode(lastmod.toString()));
121     urlElement.appendChild(e);
122     }
123    
124     if (changefreq != null) {
125     Element e = document.createElement("changefreq");
126     e.appendChild(document.createTextNode(changefreq.toString()));
127     urlElement.appendChild(e);
128     }
129    
130     if (priority != null) {
131     Element e = document.createElement("priority");
132     e.appendChild(document.createTextNode(priority.toString()));
133     urlElement.appendChild(e);
134     }
135     urlsetElement.appendChild(urlElement);
136     return true;
137     }
138    
139     public Document getDocument() {
140     return document;
141     }
142    
143     public String getXmlString() {
144     StringWriter stringWriter = new StringWriter();
145     outputToWriter(stringWriter);
146     return stringWriter.toString();
147     }
148    
149     /**
150     * Can be used to write to a File with {@link FileWriter}.
151     */
152     public void outputToWriter(Writer stringWriter) {
153     try {
154    
155     try { // close outputStreamWriter.close();
156    
157     // ****************************************************************************
158     // Create the XML
159     // ****************************************************************************
160     final Result result = new StreamResult(stringWriter);
161    
162     // with indenting to make it human-readable
163     final TransformerFactory tf = TransformerFactory.newInstance();
164    
165 alfonx 1319 // TODO Ging mit xerces, geht nicht mehr mit xalan ?!
166 alfonx 1317 // tf.setAttribute("indent-number", new Integer(2));
167    
168     final Transformer xformer = tf.newTransformer();
169     xformer.setOutputProperty(OutputKeys.INDENT, "yes");
170     xformer.setOutputProperty(
171     "{http://xml.apache.org/xalan}indent-amount", "2");
172    
173     // Write the DOM document to the file
174     xformer.transform(new DOMSource(document), result);
175    
176     } finally {
177     stringWriter.close();
178     }
179    
180     } catch (Exception e) {
181     log.error("Failed to create sitemap.XML-String", e);
182     throw new RuntimeException(e);
183     }
184     }
185    
186     public int getSize() {
187     return size;
188     }
189 alfonx 1319
190 alfonx 1320 public static void submitToSearchEngines(String urlToSitemap) {
191 alfonx 1319 for (String se : SEARCHENGINES) {
192     log.info("Submitting " + urlToSitemap + " to " + se);
193     try {
194     URL url = new URL(se + urlToSitemap);
195 alfonx 1320
196 alfonx 1319 url.openConnection().connect();
197 alfonx 1320
198 alfonx 1319 } catch (Exception e) {
199     log.error("failed to submit " + urlToSitemap + " to " + se, e);
200     }
201     }
202     }
203 alfonx 1317 }

Properties

Name Value
svn:eol-style native
svn:keywords Id URL
svn:mime-type text/plain

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26