/[schmitzm]/trunk/src/skrueger/Sitemap.java
ViewVC logotype

Annotation of /trunk/src/skrueger/Sitemap.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1325 - (hide annotations)
Wed Dec 1 14:05:08 2010 UTC (14 years, 3 months ago) by alfonx
File MIME type: text/plain
File size: 6241 byte(s)


1 alfonx 1317 package skrueger;
2    
3     import java.io.FileWriter;
4     import java.io.StringWriter;
5     import java.io.Writer;
6 alfonx 1325 import java.net.HttpURLConnection;
7 alfonx 1319 import java.net.URL;
8 alfonx 1325 import java.net.URLConnection;
9 alfonx 1321 import java.net.URLEncoder;
10 alfonx 1317 import java.util.Date;
11    
12     import javax.xml.parsers.DocumentBuilderFactory;
13     import javax.xml.transform.OutputKeys;
14     import javax.xml.transform.Result;
15     import javax.xml.transform.Transformer;
16     import javax.xml.transform.TransformerFactory;
17     import javax.xml.transform.dom.DOMSource;
18     import javax.xml.transform.stream.StreamResult;
19    
20     import org.apache.log4j.Logger;
21     import org.w3c.dom.Document;
22     import org.w3c.dom.Element;
23    
24     /**
25     * This class provides an easy way to list all pages of your site (including
26     * generates ones) ina sitemap.xml. See http://sitemaps.org/protocol.php for the
27     * XML specification.
28     *
29     * <br/>
30     * XML output is based on org.w3c.dom
31     *
32     *
33     * @author Stefan Tzeggai
34     */
35     public class Sitemap {
36    
37 alfonx 1320 static String[] SEARCHENGINES = new String[] {
38     "http://www.sitemapwriter.com/notify.php?crawler=all&url=",
39     "http://submissions.ask.com/ping?sitemap=",
40     "http://www.google.com/webmasters/tools/ping?sitemap=",
41     "http://api.moreover.com/ping?u=",
42     "http://www.bing.com/webmaster/ping.aspx?siteMap=",
43     "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=" };
44 alfonx 1317
45 alfonx 1319 final static Logger log = Logger.getLogger(Sitemap.class);
46    
47 alfonx 1317 private Document document;
48    
49     final private Element urlsetElement;
50    
51     /**
52     * Returns the number of URLs. May not be more than 50,000 by spec.
53     */
54     private int size = 0;
55    
56     final static String NSURL = "http://www.sitemaps.org/schemas/sitemap/0.9";
57    
58     public enum CHANGEFREQ {
59     monthly, daily, weekly
60     }
61    
62     public Sitemap() {
63    
64     try {
65    
66     // Create a DOM builder and parse the fragment
67     final DocumentBuilderFactory factory = DocumentBuilderFactory
68     .newInstance();
69     document = factory.newDocumentBuilder().newDocument();
70    
71     // XML root element
72     urlsetElement = document.createElementNS(NSURL, "urlset");
73     document.appendChild(urlsetElement);
74    
75     // // Linking this XML to the AtlasML Schema
76     // final Attr namespaces = document.createAttributeNS(
77     // "http://www.w3.org/2001/XMLSchema-instance", "schemaLocation");
78     // namespaces
79     // .setValue(NSURL+" http://localhost:"
80     // + Webserver.DEFAULTPORT
81     // + "/skrueger/atlas/resource/AtlasML.xsd");
82     // urlset.setAttributeNode(namespaces);
83    
84     } catch (Exception e) {
85     throw new RuntimeException(
86     "Sitemap org.w3c.xml newDocumentBuilder failed:", e);
87     }
88     }
89    
90     /**
91     *
92     * @param locString
93     * @param lastmod
94     * when was the page modified the last time? May be
95     * <code>null</code>.
96     * @param changefreq
97     * How often does the page change. May be <code>null</code>.
98     * @param priority
99     * Priority 0.0 - 1.0. May be <code>null</code>.
100     * @return <code>true</code> if the {@link Sitemap} contains less than 50000
101     * urls and the page was added.
102     */
103     public boolean addUrl(String locString, Date lastmod,
104     CHANGEFREQ changefreq, Double priority) {
105     size++;
106     if (size >= 50000)
107     return false;
108    
109     Element urlElement = document.createElement("url");
110    
111     if (locString == null)
112     throw new IllegalArgumentException("location must be provided");
113     else {
114     Element e = document.createElement("loc");
115     e.appendChild(document.createTextNode(locString));
116     urlElement.appendChild(e);
117     }
118    
119     if (lastmod != null) {
120     Element e = document.createElement("lastmod");
121     e.appendChild(document.createTextNode(lastmod.toString()));
122     urlElement.appendChild(e);
123     }
124    
125     if (changefreq != null) {
126     Element e = document.createElement("changefreq");
127     e.appendChild(document.createTextNode(changefreq.toString()));
128     urlElement.appendChild(e);
129     }
130    
131     if (priority != null) {
132     Element e = document.createElement("priority");
133     e.appendChild(document.createTextNode(priority.toString()));
134     urlElement.appendChild(e);
135     }
136     urlsetElement.appendChild(urlElement);
137     return true;
138     }
139    
140     public Document getDocument() {
141     return document;
142     }
143    
144     public String getXmlString() {
145     StringWriter stringWriter = new StringWriter();
146     outputToWriter(stringWriter);
147     return stringWriter.toString();
148     }
149    
150     /**
151     * Can be used to write to a File with {@link FileWriter}.
152     */
153     public void outputToWriter(Writer stringWriter) {
154     try {
155    
156     try { // close outputStreamWriter.close();
157    
158     // ****************************************************************************
159     // Create the XML
160     // ****************************************************************************
161     final Result result = new StreamResult(stringWriter);
162    
163     // with indenting to make it human-readable
164     final TransformerFactory tf = TransformerFactory.newInstance();
165    
166 alfonx 1319 // TODO Ging mit xerces, geht nicht mehr mit xalan ?!
167 alfonx 1317 // tf.setAttribute("indent-number", new Integer(2));
168    
169     final Transformer xformer = tf.newTransformer();
170     xformer.setOutputProperty(OutputKeys.INDENT, "yes");
171     xformer.setOutputProperty(
172     "{http://xml.apache.org/xalan}indent-amount", "2");
173    
174     // Write the DOM document to the file
175     xformer.transform(new DOMSource(document), result);
176    
177     } finally {
178     stringWriter.close();
179     }
180    
181     } catch (Exception e) {
182     log.error("Failed to create sitemap.XML-String", e);
183     throw new RuntimeException(e);
184     }
185     }
186    
187     public int getSize() {
188     return size;
189     }
190 alfonx 1319
191 alfonx 1320 public static void submitToSearchEngines(String urlToSitemap) {
192 alfonx 1325 try {
193 alfonx 1320
194 alfonx 1325 urlToSitemap = URLEncoder.encode(urlToSitemap, "UTF-8");
195     for (String se : SEARCHENGINES) {
196     log.info("Submitting " + urlToSitemap + " to " + se);
197     try {
198     URL url = new URL(se + urlToSitemap);
199 alfonx 1321
200 alfonx 1325 URLConnection openConnection = url.openConnection();
201     if (openConnection instanceof HttpURLConnection) {
202     HttpURLConnection httpconnection = ((HttpURLConnection) openConnection);
203     if (httpconnection.getResponseCode() != 200) {
204     log.error("se returned ResponseCode "
205     + httpconnection.getResponseCode()
206     + " for " + urlToSitemap);
207     }
208     }
209 alfonx 1320
210 alfonx 1325 } catch (Exception e) {
211     log.error("failed to submit " + urlToSitemap + " to " + se,
212     e);
213     }
214 alfonx 1319 }
215 alfonx 1325 } catch (Exception e) {
216     log.error("Error URL encoding " + urlToSitemap, e);
217 alfonx 1319 }
218     }
219 alfonx 1317 }

Properties

Name Value
svn:eol-style native
svn:keywords Id URL
svn:mime-type text/plain

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26