JavaWeb简单爬取网页内容(2)

前期:
1建立项目
2导入jar(在web-inf文件下建立lib文件夹,很多人在这里都入了坑没有在这里导入jar导致无法使用或者报一堆烂错)
3大家看一下我项目结构(主要是dao和servlet,剩下自己拓展)
JavaWeb简单爬取网页内容(2)_第1张图片

4关于如何导入jar包 移步上一篇

5
WEIBODAO.java

package WBDAO;

import java.io.IOException;
import java.sql.SQLException;

import org.jsoup.*;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import WBClass.WBCLASS;

public class WEIBODAO {
	
	public Document getDocument(String url) {
		try {
			return Jsoup.connect(url).get();

		} catch (IOException e) {
			e.printStackTrace();

		}
		return null;
	}
	
	public boolean find(){
		Demo1 t = new Demo1();
		Document doc = t.getDocument("https://s.weibo.com/top/summary?cate=realtimehot");
		Elements elements1 = doc.select("tbody");
		Elements elements2 = elements1.select("tr");
			for (int i = 0; i < 51; i++) {
			String A = elements2.get(i).text();
			System.out.println(A);
		}	
		return false;
	}

}

6 WBHomeServlet.java

package WBServlet;

import java.io.IOException;
import java.io.PrintWriter;
import java.sql.SQLException;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import WBBean.WBBean;

import org.jsoup.select.Elements;
import org.jsoup.*;
import WBClass.WBCLASS;
import WBDAO.WEIBODAO;

public class WBHomeServlet extends HttpServlet {

	public WBHomeServlet() {
		super();
	}

	public void destroy() {
		super.destroy();

	}

	public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

		response.setContentType("text/html");
		PrintWriter out = response.getWriter();
		WEIBODAO dao = new WEIBODAO();
		if (dao.find()) {
			System.out.println("链接成功");
		}
}
	public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

		doGet(request, response);

	}
 	public void init() throws ServletException {
		// Put your code here
	}

}

最后放一下效果图

JavaWeb简单爬取网页内容(2)_第2张图片
对了 有时还会可能会报错
HostnameVerifier=weblogic.security.utils.SSLWLSHostnameVerifier, hostname=s.weibo.com.

进入weblogic后台管理(这里用的是weblogic)
环境→服务器→选择你部署的服务器
SSL→高级
然后就有这个东西选择 无。重新运行一下 搞定。

在这里插入图片描述
关于servlet数据如何传到jsp???
这里我可以提供一个思路,对于很多小白都是一个速成的方法
将get到的文本放进一个list里面
然后在转换成string类型
利用request的 get/setAttribute 建立关系
接着在jsp页面中先用一个string类型接住然后用比较高级的for循环遍历出来就行了

具体代码贴在下面供大家参考

WEIBODAO.java

package WBDAO;

import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;

import org.jsoup.*;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import WBClass.WBCLASS;

public class WEIBODAO {

	public Document getDocument(String url) {
		try {
			return Jsoup.connect(url).get();

		} catch (IOException e) {
			e.printStackTrace();

		}
		return null;
	}

	public boolean find() {
		WEIBODAO t = new WEIBODAO();
		Document doc = t.getDocument("https://s.weibo.com/top/summary?cate=realtimehot");
		Elements elements1 = doc.select("tbody");
		Elements elements2 = elements1.select("tr");
		List B = new ArrayList<>();
		for (int i = 0; i < 51; i++) {
			B.add(elements2.get(i).text());
		}
		String[] A = new String[B.size()];
		for (int j = 0; j < B.size(); j++) {
			A[j] = B.get(j);
		}
		for (String s : A) {
			System.out.println(s);
		}
		System.out.println("加载完毕");
		return false;

	}
	
	public String[] list(){
		WEIBODAO t = new WEIBODAO();
		Document doc = t.getDocument("https://s.weibo.com/top/summary?cate=realtimehot");
		Elements elements1 = doc.select("tbody");
		Elements elements2 = elements1.select("tr");
		List B = new ArrayList<>();
		for (int i = 0; i < 51; i++) {
			B.add(elements2.get(i).text());
		}
		String[] A = new String[B.size()];
		for (int j = 0; j < B.size(); j++) {
			A[j] = B.get(j);
		}
		/*for (String s : A) {
			System.out.println(s);
		}*/
		
		return A;
		
	}

}

WBHomeServlet.java

package WBServlet;

import java.io.IOException;
import java.io.PrintWriter;
import java.sql.SQLException;

import javax.ejb.HomeHandle;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import WBBean.WBBean;

import org.jsoup.select.Elements;
import org.jsoup.*;
import WBClass.WBCLASS;
import WBDAO.WEIBODAO;

public class WBHomeServlet extends HttpServlet {

	public WBHomeServlet() {
		super();
	}

	public void destroy() {
		super.destroy();

	}

	public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

		response.setContentType("text/html");
		PrintWriter out = response.getWriter();
		WEIBODAO dao = new WEIBODAO();
	
		String[] B=dao.list();
		request.setAttribute( "ValueA",B);
		request.getRequestDispatcher("home.jsp").forward(request,response);
		System.out.println("发送完毕");

	}

	public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

		doGet(request, response);

	}
	public void init() throws ServletException {
		// Put your code here
	}

}

home.jsp

<%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%>
<%
String path = request.getContextPath();
String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
%>



  
    
    
    微博热搜仿生页面
    
	
	
	    
	
	
	

  
  
  

<%String[] s1=(String[])request.getAttribute("ValueA"); %>
<%for(String s:s1){ %>


<% }%>

最终效果图

JavaWeb简单爬取网页内容(2)_第3张图片
最后总结
我个人是习惯MVC模式的
因为可以持续的开发 如果一开始布局比较好的话 局限性较低
若是全部放在class其实也是可以的
直接放在jsp里面也可以 但是jsp主要承担是一个M的作用就没必要了

你可能感兴趣的:(Java爬虫)