首页 >web前端 >html教程 >一个Swing程序,用来判断一个URL页面内包含的好链接和坏链接数目_html/css_WEB-ITnose

一个Swing程序,用来判断一个URL页面内包含的好链接和坏链接数目_html/css_WEB-ITnose

WBOY
WBOY原创
2016-06-24 11:52:431222浏览

入口类

import java.awt.Dimension;import java.awt.Insets;import java.awt.event.ActionEvent;import java.awt.event.ActionListener;import java.io.IOException;import java.net.MalformedURLException;import java.net.URL;import java.net.URLConnection;import javax.swing.JButton;import javax.swing.JFrame;import javax.swing.JLabel;import javax.swing.JMenuBar;import javax.swing.JScrollPane;import javax.swing.JTextArea;import javax.swing.JTextField;import javax.swing.ScrollPaneConstants;import javax.swing.SwingUtilities;/** * Description 检查URL是否是合法的URL,入口类,直接运行该类,将需要分析的URL地址粘入文本框即可 *  * @author wangxu *  */public class CheckLinks extends JFrame implements Runnable, ISpiderReportable {	// Used by addNotify	boolean frameSizeAdjusted = false;	JLabel label1 = new JLabel();	JButton begin = new JButton();	JTextField url = new JTextField();	JScrollPane errorScroll = new JScrollPane();	JTextArea errors = new JTextArea();	JLabel current = new JLabel();	JLabel goodLinksLabel = new JLabel();	JLabel badLinksLabel = new JLabel();	protected Thread backgroundThread;	protected Spider spider;	protected URL base;	protected int badLinksCount = 0;	protected int goodLinksCount = 0;	private static final long serialVersionUID = 1L;	public CheckLinks() {		setTitle("Find Broken Links");// 设置JFrame的标题		getContentPane().setLayout(null);// 设置布局方式		setSize(405, 288);		setVisible(true);		label1.setText("Enter a URL:");		getContentPane().add(label1);		label1.setBounds(12, 12, 84, 12);		begin.setText("Begin");		begin.setActionCommand("Begin");		getContentPane().add(begin);		begin.setBounds(12, 36, 84, 24);// 设置坐标和宽、高		getContentPane().add(url);		url.setBounds(108, 36, 288, 24);		errorScroll.setAutoscrolls(true);// 自动显示滚动条		errorScroll.setHorizontalScrollBarPolicy(ScrollPaneConstants.HORIZONTAL_SCROLLBAR_ALWAYS);// 水平方向始终显示		errorScroll.setVerticalScrollBarPolicy(ScrollPaneConstants.VERTICAL_SCROLLBAR_ALWAYS);// 垂直方向始终显示		errorScroll.setOpaque(true);// 设置不透明		getContentPane().add(errorScroll);		errorScroll.setBounds(12, 120, 384, 156);		errors.setEditable(false);// 设置不可编辑		errorScroll.getViewport().add(errors);// 将文本域添加进滚动条		errors.setBounds(0, 0, 366, 138);		current.setText("Currently Processing: ");		getContentPane().add(current);// 加入显示当前信息的JLabel		current.setBounds(12, 72, 384, 12);		goodLinksLabel.setText("Good Links: 0");		getContentPane().add(goodLinksLabel);		goodLinksLabel.setBounds(12, 96, 192, 12);		badLinksLabel.setText("Bad Links: 0");		getContentPane().add(badLinksLabel);		badLinksLabel.setBounds(216, 96, 96, 12);		SymAction lSymAction = new SymAction();// 实例化一个事件监听器		begin.addActionListener(lSymAction);// 注册监听	}	static public void main(String args[]) {		new CheckLinks();// 程序入口	}	public void addNotify() {		// Record the size of the window prior to calling parent's addNotify.		Dimension size = getSize();		super.addNotify();		if (frameSizeAdjusted)			return;		frameSizeAdjusted = true;		// Adjust size of frame according to the insets and menu bar		Insets insets = getInsets();		JMenuBar menuBar = getRootPane().getJMenuBar();		int menuBarHeight = 0;		if (menuBar != null)			menuBarHeight = menuBar.getPreferredSize().height;		setSize(insets.left + insets.right + size.width, insets.top + insets.bottom + size.height + menuBarHeight);	}	class SymAction implements ActionListener {		public void actionPerformed(ActionEvent event) {			Object object = event.getSource();			if (object == begin)				begin_actionPerformed(event);		}	}	void begin_actionPerformed(ActionEvent event) {		if (backgroundThread == null) {			begin.setText("Cancel");			backgroundThread = new Thread(this);// 用当前对象来实例化一个Thread对象			backgroundThread.start();// 启动线程,执行run方法			goodLinksCount = 0;			badLinksCount = 0;		} else {			spider.cancel();// 设置标志位true		}	}	@Override	public void run() {		try {			errors.setText("");			spider = new Spider(this);// 用当前对象来实例化一个Spider对象,因为当前类实现了ISpiderReportable接口			spider.clear();			base = new URL(url.getText());// 取得需要搜索的URL地址			spider.addURL(base);//将URL地址加入spider			spider.begin();//spider开始工作			Runnable doLater = new Runnable() {				public void run() {					begin.setText("Begin");				}			};			// 导致 doRun.run() 在 AWT 事件指派线程上异步执行。在所有挂起的 AWT			// 事件被处理后才发生。此方法应该在应用程序线程需要更新该 GUI时使用。在下面的示例中,invokeLater			// 调用将事件指派线程上的 Runnable对象 doHelloWorld加入队列,然后输出一条信息。			SwingUtilities.invokeLater(doLater);			backgroundThread = null;// 将后台线程重新置空,以便接受下一个URL		} catch (MalformedURLException e) {			UpdateErrors err = new UpdateErrors();			err.msg = "Bad address.";			SwingUtilities.invokeLater(err);		}	}	//检测两个URL地址是否属于同一主机,如果是返回true,否则false	@Override	public boolean spiderFoundURL(URL base, URL url) {		UpdateCurrentStats cs = new UpdateCurrentStats();		cs.msg = url.toString();//将URL信息赋值给cs.msg,使用后台线程进行打印		SwingUtilities.invokeLater(cs);		if (!checkLink(url)) {			UpdateErrors err = new UpdateErrors();			err.msg = url + "(on page " + base + ")\n";			SwingUtilities.invokeLater(err);			badLinksCount++;			return false;		}		goodLinksCount++;		if (!url.getHost().equalsIgnoreCase(base.getHost()))			return false;		else			return true;	}	@Override	public void spiderURLError(URL url) {		System.out.println("没找到的URL:" + url);	}	protected boolean checkLink(URL url) {		try {			URLConnection connection = url.openConnection();			connection.connect();			return true;		} catch (IOException e) {			return false;		}	}	public void spiderFoundEMail(String email) {		System.out.println("获得Email:" + email);	}	class UpdateErrors implements Runnable {		public String msg;		public void run() {			errors.append(msg);		}	}	class UpdateCurrentStats implements Runnable {		public String msg;		public void run() {			current.setText("Currently Processing: " + msg);			goodLinksLabel.setText("Good Links: " + goodLinksCount);			badLinksLabel.setText("Bad Links: " + badLinksCount);		}	}}
import javax.swing.text.html.*;/** * Swing JEditorPane 文本组件通过称为 EditorKit 的插件机制来支持不同种类的内容。因为 HTML * 是很流行的内容格式,因此默认提供了某种支持。此类提供了 HTML version 3.2(带有某些扩展)的默认支持,并正在向 version 4.0 * 迁移。不支持 <applet> 标记,但为 <object> 标记提供了某种支持。 *  * @author wangxu *  */public class HTMLParse extends HTMLEditorKit {	private static final long serialVersionUID = 1L;	public HTMLEditorKit.Parser getParser() {		return super.getParser();	}}</object></applet>
import java.net.*;public interface ISpiderReportable {	// 找到URL链接	public boolean spiderFoundURL(URL base, URL url);	public void spiderURLError(URL url);	// 找到Email的链接	public void spiderFoundEMail(String email);}
import java.util.*;import java.net.*;import java.io.*;import javax.swing.text.*;import javax.swing.text.html.*;public class Spider {	// 装载错误的工作集	protected Collection workloadError = new ArrayList(3);	// 等待工作集	protected Collection workloadWaiting = new ArrayList(3);	// 已处理的工作集	protected Collection workloadProcessed = new ArrayList(3);	protected ISpiderReportable report;	protected boolean cancel = false;	public Spider(ISpiderReportable report) {		this.report = report;	}	public Collection getWorkloadError() {		return workloadError;	}	public Collection getWorkloadWaiting() {		return workloadWaiting;	}	public Collection getWorkloadProcessed() {		return workloadProcessed;	}	public void clear() {		getWorkloadError().clear();		getWorkloadWaiting().clear();		getWorkloadProcessed().clear();	}	public void cancel() {		cancel = true;	}	public void addURL(URL url) {		if (getWorkloadWaiting().contains(url))// 如果等待的工作集中已经包含该URL,返回			return;		if (getWorkloadError().contains(url))// 如果出错的工作集中已经包含该URL,返回			return;		if (getWorkloadProcessed().contains(url))// 如果已处理的工作集中包含该URL,返回			return;		log("Adding to workload: " + url);		getWorkloadWaiting().add(url);// 将其加入等待的工作集中	}	// 具体分析URL的方法	public void processURL(URL url) {		try {			log("Processing: " + url);// 控制台打印处理的URL地址			// get the URL's contents			URLConnection connection = url.openConnection();			System.out.println(connection.getContentType() + "++++++++++++++++====");			if ((connection.getContentType() != null) && !connection.getContentType().toLowerCase().startsWith("text/")) {				getWorkloadWaiting().remove(url);				getWorkloadProcessed().add(url);				log("Not processing because content type is: " + connection.getContentType());				return;			}			// read the URL			InputStream is = connection.getInputStream();			Reader r = new InputStreamReader(is);			// parse the URL			HTMLEditorKit.Parser parse = new HTMLParse().getParser();			// Parse the given stream and drive the given callback with the			// results of the parse. This method should be implemented to be			// thread-safe.			// 解析给定的流并通过解析的结果驱动给定的回调。该方法执行完之后,会调用给定的回调函数			parse.parse(r, new Parser(url), true);		} catch (IOException e) {// 如果出错			getWorkloadWaiting().remove(url);// 从工作集中移除URL			getWorkloadError().add(url);// 将出错的URL加入错误的工作集			log("Error: " + url);			report.spiderURLError(url);// 报告该出错的URL			return;		}		// mark URL as complete		getWorkloadWaiting().remove(url);		getWorkloadProcessed().add(url);		log("Complete: " + url);	}	// 蜘蛛工作的方法,只要等待工作集不为空,并且标志位为false,那么一直从集合中取出URL	public void begin() {		cancel = false;		while (!getWorkloadWaiting().isEmpty() && !cancel) {			Object list[] = getWorkloadWaiting().toArray();			for (int i = 0; (i   <p></p> 
声明:
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系admin@php.cn