| 
 
 package com.spider.obj; import java.net.*; import java.io.*; import javax.swing.text.*; import javax.swing.text.HTML.*;    public class Spider { protected Collection workloadError = new ArrayList(3); protected Collection workloadWaiting = new ArrayList(3); protected Collection workloadProcessed = new ArrayList(3); protected ISpiderReportable report; protected boolean cancel = false;   public Spider(ISpiderReportable report) { this.report = report; } public Collection getWorkloadError() { return workloadError; } public Collection getWorkloadWaiting() { return workloadWaiting; }     public Collection getWorkloadProcessed() { return workloadProcessed; }        public void clear() { getWorkloadError().clear(); getWorkloadWaiting().clear(); getWorkloadProcessed().clear(); }     public void cancel() { cancel = true; }     public void addURL(URL url) { if ( getWorkloadWaiting().contains(url) ) return; if ( getWorkloadError().contains(url) ) return; if ( getWorkloadProcessed().contains(url) ) return; log("Adding to workload: " + url ); getWorkloadWaiting().add(url); }     public void processURL(URL url) { try { log(" rocessing: " + url ); // get the URL's contents URLConnection connection = url.openConnection(); if ( (connection.getContentType()!=null) &&     !connection.getContentType().toLowerCase().startsWith("text/") ) {  getWorkloadWaiting().remove(url);  getWorkloadProcessed().add(url);  log("Not processing because content type is: " +       connection.getContentType() );  return; }   // read the URL InputStream is = connection.getInputStream(); Reader r = new InputStreamReader(is); // parse the URL HTMLEditorKit.Parser parse = new HTMLParse().getParser(); parse.parse(r,new Parser(url),true); } catch ( IOException e ) { getWorkloadWaiting().remove(url); getWorkloadError().add(url); log("Error: " + url ); report.spiderURLError(url); return; } // mark URL as complete getWorkloadWaiting().remove(url); getWorkloadProcessed().add(url); log("Complete: " + url );   }     public void begin() { cancel = false; while ( !getWorkloadWaiting().isEmpty() && !cancel ) {  Object list[] = getWorkloadWaiting().toArray();  for ( int i=0;(i<list.length)&&!cancel;i++ )    processURL((URL)list); } }         protected class Parser extends HTMLEditorKit.ParserCallback { protected URL base;   public Parser(URL base) { this.base = base; }   public void handleSimpleTag(HTML.Tag t,                          MutableAttributeSet a,int pos) { String href = (String)a.getAttribute(HTML.Attribute.HREF);     if( (href==null) && (t==HTML.Tag.FRAME) )  href = (String)a.getAttribute(HTML.Attribute.SRC);   if ( href==null )  return;   int i = href.indexOf('#'); if ( i!=-1 )  href = href.substring(0,i);   if ( href.toLowerCase().startsWith("mailto:") ) {  report.spiderFoundEMail(href);  return; } if(t==HTML.Tag.META) { String title = (String)a.getAttribute(HTML.Attribute.NAME); System.out.println("title:"+title); } handleLink(base,href); }   public void handleStartTag(HTML.Tag t,                         MutableAttributeSet a,int pos) { handleSimpleTag(t,a,pos);    // handle the same way   }   protected void handleLink(URL base,String str) { try {  URL url = new URL(base,str);  if ( report.spiderFoundURL(base,url) )    addURL(url); } catch ( MalformedURLException e ) {  log("Found malformed URL: " + str ); } } }     public void log(String entry) { System.out.println( (new Date()) + ":" + entry ); } }       
package com.spider.obj; import java.net.*;   interface ISpiderReportable {   public boolean spiderFoundURL(URL base,URL url);   public void spiderURLError(URL url);   public void spiderFoundEMail(String email); }   package com.spider.obj; import javax.swing.text.html.*;   public class HTMLParse extends HTMLEditorKit {     public HTMLEditorKit.Parser getParser()   {     return super.getParser();   } }       package com.spider.obj; 
import java.awt.*; 
import javax.swing.*; 
import java.net.*; 
import java.io.*; 
public class CheckLinks extends javax.swing.JFrame implements 
      Runnable,ISpiderReportable { 
  
  
public CheckLinks() 
{ 
//{{INIT_CONTROLS 
  
setTitle("Find Broken Links"); 
getContentPane().setLayout(null); 
setSize(405,288); 
setVisible(true); 
label1.setText("Enter a URL:"); 
getContentPane().add(label1); 
label1.setBounds(12,12,84,12); 
begin.setText("Begin"); 
begin.setActionCommand("Begin"); 
getContentPane().add(begin); 
begin.setBounds(12,36,84,24); 
getContentPane().add(url); 
url.setBounds(108,36,288,24); 
errorScroll.setAutoscrolls(true); 
errorScroll.setHorizontalScrollBarPolicy(javax.swing. 
         ScrollPaneConstants.HORIZONTAL_SCROLLBAR_ALWAYS); 
errorScroll.setVerticalScrollBarPolicy(javax.swing. 
         ScrollPaneConstants.VERTICAL_SCROLLBAR_ALWAYS); 
errorScroll.setOpaque(true); 
getContentPane().add(errorScroll); 
errorScroll.setBounds(12,120,384,156); 
errors.setEditable(false); 
errorScroll.getViewport().add(errors); 
errors.setBounds(0,0,366,138); 
current.setText("Currently Processing: "); 
getContentPane().add(current); 
current.setBounds(12,72,384,12); 
goodLinksLabel.setText("Good Links: 0"); 
getContentPane().add(goodLinksLabel); 
goodLinksLabel.setBounds(12,96,192,12); 
badLinksLabel.setText("Bad Links: 0"); 
getContentPane().add(badLinksLabel); 
badLinksLabel.setBounds(216,96,96,12); 
//}} 
//{{INIT_MENUS 
//}} 
  
//{{REGISTER_LISTENERS 
SymAction lSymAction = new SymAction(); 
begin.addActionListener(lSymAction); 
//} 
} 
  
static public void main(String args[]) 
{ 
(new CheckLinks()).setVisible(true); 
} 
  
  
public void addNotify() 
{ 
// Record the size of the window prior to calling parent's 
// addNotify. 
Dimension size = getSize(); 
  
super.addNotify(); 
  
if ( frameSizeAdjusted ) 
 return; 
frameSizeAdjusted = true; 
  
//Adjust size of frame according to the insets and menu bar 
Insets insets = getInsets(); 
javax.swing.JMenuBar menuBar = getRootPane().getJMenuBar(); 
int menuBarHeight = 0; 
if ( menuBar != null ) 
 menuBarHeight = menuBar.getPreferredSize().height; 
setSize(insets.left + insets.right + size.width, insets.top + 
                     insets.bottom + size.height + 
                     menuBarHeight); 
} 
  
// Used by addNotify 
boolean frameSizeAdjusted = false; 
  
//{{DECLARE_CONTROLS 
javax.swing.JLabel label1 = new javax.swing.JLabel(); 
  
  
javax.swing.JButton begin = new javax.swing.JButton(); 
  
  
javax.swing.JTextField url = new javax.swing.JTextField(); 
  
  
javax.swing.JScrollPane errorScroll = 
   new javax.swing.JScrollPane(); 
  
  
javax.swing.JTextArea errors = new javax.swing.JTextArea(); 
javax.swing.JLabel current = new javax.swing.JLabel(); 
javax.swing.JLabel goodLinksLabel = new javax.swing.JLabel(); 
javax.swing.JLabel badLinksLabel = new javax.swing.JLabel(); 
//}} 
  
//{{DECLARE_MENUS 
//}} 
  
  
protected Thread backgroundThread; 
  
  
protected Spider spider; 
  
  
protected URL base; 
  
  
protected int badLinksCount = 0; 
  
  
protected int goodLinksCount = 0; 
class SymAction implements java.awt.event.ActionListener { 
 public void actionPerformed(java.awt.event.ActionEvent event) 
 { 
   Object object = event.getSource(); 
   if ( object == begin ) 
     begin_actionPerformed(event); 
 } 
} 
  
  
void begin_actionPerformed(java.awt.event.ActionEvent event) 
{ 
 if ( backgroundThread==null ) { 
   begin.setLabel("Cancel"); 
   backgroundThread = new Thread(this); 
   backgroundThread.start(); 
   goodLinksCount=0; 
   badLinksCount=0; 
 } else { 
   spider.cancel(); 
 } 
  
} 
  
  
public void run() 
{ 
 try { 
   errors.setText(""); 
   spider = new Spider(this); 
   spider.clear(); 
   base = new URL(url.getText()); 
   spider.addURL(base); 
   spider.begin(); 
   Runnable doLater = new Runnable() 
   { 
     public void run() 
     { 
       begin.setText("Begin"); 
     } 
   }; 
   SwingUtilities.invokeLater(doLater); 
   backgroundThread=null; 
  
 } catch ( MalformedURLException e ) { 
   UpdateErrors err = new UpdateErrors(); 
   err.msg = "Bad address."; 
   SwingUtilities.invokeLater(err); 
  
 } 
} 
  
  
public boolean spiderFoundURL(URL base,URL url) 
{ 
 UpdateCurrentStats cs = new UpdateCurrentStats(); 
 cs.msg = url.toString(); 
 SwingUtilities.invokeLater(cs); 
  
 if ( !checkLink(url) ) { 
   UpdateErrors err = new UpdateErrors(); 
   err.msg = url+"(on page " + base + ")\n"; 
   SwingUtilities.invokeLater(err); 
   badLinksCount++; 
   return false; 
 } 
  
 goodLinksCount++; 
 if ( !url.getHost().equalsIgnoreCase(base.getHost()) ) 
   return false; 
 else 
   return true; 
} 
  
  
public void spiderURLError(URL url) 
{ 
System.out.println("没找到"); 
} 
  
  
protected boolean checkLink(URL url) 
{ 
 try { 
   URLConnection connection = url.openConnection(); 
   connection.connect(); 
   return true; 
 } catch ( IOException e ) { 
   return false; 
 } 
} 
  
  
public void spiderFoundEMail(String email) 
{ 
System.out.println(""); 
} 
  
  
class UpdateErrors implements Runnable { 
 public String msg; 
 public void run() 
 { 
   errors.append(msg); 
 } 
} 
  
  
class UpdateCurrentStats implements Runnable { 
 public String msg; 
 public void run() 
 { 
   current.setText("Currently Processing: " + msg ); 
   goodLinksLabel.setText("Good Links: " + goodLinksCount); 
   badLinksLabel.setText("Bad Links: " + badLinksCount); 
 } 
} 
} 
  
  |