/[schmitzm]/branches/2.4.x/src/skrueger/Sitemap.java
ViewVC logotype

Annotation of /branches/2.4.x/src/skrueger/Sitemap.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1324 - (hide annotations)
Wed Dec 1 13:26:03 2010 UTC (14 years, 3 months ago) by alfonx
Original Path: trunk/src/skrueger/Sitemap.java
File MIME type: text/plain
File size: 5709 byte(s)
Moved testing resources to testresources (moving otwards astandard maven directory structure very and very slowly carefully)
1 alfonx 1317 package skrueger;
2    
3     import java.io.FileWriter;
4     import java.io.StringWriter;
5     import java.io.Writer;
6 alfonx 1319 import java.net.URL;
7 alfonx 1321 import java.net.URLEncoder;
8 alfonx 1317 import java.util.Date;
9    
10     import javax.xml.parsers.DocumentBuilderFactory;
11     import javax.xml.transform.OutputKeys;
12     import javax.xml.transform.Result;
13     import javax.xml.transform.Transformer;
14     import javax.xml.transform.TransformerFactory;
15     import javax.xml.transform.dom.DOMSource;
16     import javax.xml.transform.stream.StreamResult;
17    
18     import org.apache.log4j.Logger;
19     import org.w3c.dom.Document;
20     import org.w3c.dom.Element;
21    
22     /**
23     * This class provides an easy way to list all pages of your site (including
24     * generates ones) ina sitemap.xml. See http://sitemaps.org/protocol.php for the
25     * XML specification.
26     *
27     * <br/>
28     * XML output is based on org.w3c.dom
29     *
30     *
31     * @author Stefan Tzeggai
32     */
33     public class Sitemap {
34    
35 alfonx 1320 static String[] SEARCHENGINES = new String[] {
36     "http://www.sitemapwriter.com/notify.php?crawler=all&url=",
37     "http://submissions.ask.com/ping?sitemap=",
38     "http://www.google.com/webmasters/tools/ping?sitemap=",
39     "http://api.moreover.com/ping?u=",
40     "http://www.bing.com/webmaster/ping.aspx?siteMap=",
41     "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=" };
42 alfonx 1317
43 alfonx 1319 final static Logger log = Logger.getLogger(Sitemap.class);
44    
45 alfonx 1317 private Document document;
46    
47     final private Element urlsetElement;
48    
49     /**
50     * Returns the number of URLs. May not be more than 50,000 by spec.
51     */
52     private int size = 0;
53    
54     final static String NSURL = "http://www.sitemaps.org/schemas/sitemap/0.9";
55    
56     public enum CHANGEFREQ {
57     monthly, daily, weekly
58     }
59    
60     public Sitemap() {
61    
62     try {
63    
64     // Create a DOM builder and parse the fragment
65     final DocumentBuilderFactory factory = DocumentBuilderFactory
66     .newInstance();
67     document = factory.newDocumentBuilder().newDocument();
68    
69     // XML root element
70     urlsetElement = document.createElementNS(NSURL, "urlset");
71     document.appendChild(urlsetElement);
72    
73     // // Linking this XML to the AtlasML Schema
74     // final Attr namespaces = document.createAttributeNS(
75     // "http://www.w3.org/2001/XMLSchema-instance", "schemaLocation");
76     // namespaces
77     // .setValue(NSURL+" http://localhost:"
78     // + Webserver.DEFAULTPORT
79     // + "/skrueger/atlas/resource/AtlasML.xsd");
80     // urlset.setAttributeNode(namespaces);
81    
82     } catch (Exception e) {
83     throw new RuntimeException(
84     "Sitemap org.w3c.xml newDocumentBuilder failed:", e);
85     }
86     }
87    
88     /**
89     *
90     * @param locString
91     * @param lastmod
92     * when was the page modified the last time? May be
93     * <code>null</code>.
94     * @param changefreq
95     * How often does the page change. May be <code>null</code>.
96     * @param priority
97     * Priority 0.0 - 1.0. May be <code>null</code>.
98     * @return <code>true</code> if the {@link Sitemap} contains less than 50000
99     * urls and the page was added.
100     */
101     public boolean addUrl(String locString, Date lastmod,
102     CHANGEFREQ changefreq, Double priority) {
103     size++;
104     if (size >= 50000)
105     return false;
106    
107     Element urlElement = document.createElement("url");
108    
109     if (locString == null)
110     throw new IllegalArgumentException("location must be provided");
111     else {
112     Element e = document.createElement("loc");
113     e.appendChild(document.createTextNode(locString));
114     urlElement.appendChild(e);
115     }
116    
117     if (lastmod != null) {
118     Element e = document.createElement("lastmod");
119     e.appendChild(document.createTextNode(lastmod.toString()));
120     urlElement.appendChild(e);
121     }
122    
123     if (changefreq != null) {
124     Element e = document.createElement("changefreq");
125     e.appendChild(document.createTextNode(changefreq.toString()));
126     urlElement.appendChild(e);
127     }
128    
129     if (priority != null) {
130     Element e = document.createElement("priority");
131     e.appendChild(document.createTextNode(priority.toString()));
132     urlElement.appendChild(e);
133     }
134     urlsetElement.appendChild(urlElement);
135     return true;
136     }
137    
138     public Document getDocument() {
139     return document;
140     }
141    
142     public String getXmlString() {
143     StringWriter stringWriter = new StringWriter();
144     outputToWriter(stringWriter);
145     return stringWriter.toString();
146     }
147    
148     /**
149     * Can be used to write to a File with {@link FileWriter}.
150     */
151     public void outputToWriter(Writer stringWriter) {
152     try {
153    
154     try { // close outputStreamWriter.close();
155    
156     // ****************************************************************************
157     // Create the XML
158     // ****************************************************************************
159     final Result result = new StreamResult(stringWriter);
160    
161     // with indenting to make it human-readable
162     final TransformerFactory tf = TransformerFactory.newInstance();
163    
164 alfonx 1319 // TODO Ging mit xerces, geht nicht mehr mit xalan ?!
165 alfonx 1317 // tf.setAttribute("indent-number", new Integer(2));
166    
167     final Transformer xformer = tf.newTransformer();
168     xformer.setOutputProperty(OutputKeys.INDENT, "yes");
169     xformer.setOutputProperty(
170     "{http://xml.apache.org/xalan}indent-amount", "2");
171    
172     // Write the DOM document to the file
173     xformer.transform(new DOMSource(document), result);
174    
175     } finally {
176     stringWriter.close();
177     }
178    
179     } catch (Exception e) {
180     log.error("Failed to create sitemap.XML-String", e);
181     throw new RuntimeException(e);
182     }
183     }
184    
185     public int getSize() {
186     return size;
187     }
188 alfonx 1319
189 alfonx 1320 public static void submitToSearchEngines(String urlToSitemap) {
190 alfonx 1319 for (String se : SEARCHENGINES) {
191     log.info("Submitting " + urlToSitemap + " to " + se);
192     try {
193     URL url = new URL(se + urlToSitemap);
194 alfonx 1320
195 alfonx 1324 urlToSitemap = URLEncoder.encode(urlToSitemap);
196 alfonx 1321
197 alfonx 1319 url.openConnection().connect();
198 alfonx 1320
199 alfonx 1319 } catch (Exception e) {
200     log.error("failed to submit " + urlToSitemap + " to " + se, e);
201     }
202     }
203     }
204 alfonx 1317 }

Properties

Name Value
svn:eol-style native
svn:keywords Id URL
svn:mime-type text/plain

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26