教你用Java代码做爬虫 爬JavaEye博客上的文章

前段时间无聊申请了个javaeye的博客玩玩,看自己浏览人气太低就用java代码随便写了个刷浏览次数工具。

想爬文章自己回家收藏的只需要改动下代码就ok。

 

/*    */import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
/*    */
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;

/*    */
/*    */public class test
/*    */{
	/* 6 */public static test.TT ts = null;

	/*    */
	/*    */public static void main(String[] args) {
		String path = args[0];
		int sleep = 30000;
		if (args.length >=1 && args[0] == null && args[0].equals("")) {
			System.out.println("请指定读取文件路径:");
			return;
		}
		if (args.length > 1 && args[1] != null && !args[1].equals("")) {
			try {
				sleep = Integer.parseInt(args[1]);
			} catch (Exception e) {
				System.out.println("请输入数字:");
			}
		}
		File file = new File(args[0]);
		try {
			InputStream in = new FileInputStream(file);
			DataInputStream din = new DataInputStream(in);
			ArrayList<String> paths = new ArrayList<String>();
			path = din.readLine();
			while (path != null && !path.equals("")) {
				paths.add(path);
				path = din.readLine();
			}
			din.close();
			in.close();
			TT t = new TT(paths, sleep);
			t.start();
		} catch (Exception e) {
			e.printStackTrace();
			return;
		}
	}

	/*    */
	/*    */static class TT extends Thread
	/*    */{
		ArrayList<String> urls = null;
		int sleep = 30000;

		public TT(ArrayList<String> urls, int sleep) {
			this.urls = urls;
			this.sleep = sleep;
		}

		public boolean stop = false;

		/*    */
		/*    */public void run()
		/*    */{
			/* 27 */int i = 0;
			/* 28 */int counts = 0;
			/*    */do
				/*    */try
				/*    */{
					/* 32 */URL ul = new URL(this.urls.get(i));
					/* 33 */URLConnection con = ul.openConnection();
					/* 34 */con.setDoInput(true);
					/* 35 */con
							/* 36 */.setRequestProperty(
									/* 37 */"User-Agent",
									/* 38 */"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3)");
					/* 39 */con.connect();
					/* 40 */InputStream is = con.getInputStream();
					/* 41 */byte[] bys = new byte[1024];
					/* 42 */int count = is.read(bys, 0, bys.length);
					/*    */
					/* 44 */while (count != -1)
					/*    */{
						/* 47 */count = is.read(bys, 0, bys.length);
						/*    */}
					/* 49 */counts++;
					System.out.println(counts);
					/* 50 */is.close();
					/* 51 */Thread.sleep(sleep);
					/* 52 */i++;
					/* 53 */if (i >= this.urls.size())
						/* 54 */i = 0;
					/*    */}
				/*    */catch (Exception e) {
					/* 57 */e.printStackTrace();
					/*    */}
			/* 59 */while (!this.stop);
			/*    */}
		/*    */
	}
	/*    */
}

你可能感兴趣的:(java,thread,.net,windows)