某些网页有反扒机制,使用jsoup和httpclient不能满足要求,使用selenium可以。
https://registry.npmmirror.com/binary.html?path=chromedriver/
下载解压即可,解压路径代码中会使用到。
4.0.0
org.springframework.boot
spring-boot-starter-parent
2.7.2
com.pshdhx.fund
tiantianjijin
0.0.1-SNAPSHOT
tiantianjijin
天天基金数据爬取
1.8
org.springframework.boot
spring-boot-starter-web
org.projectlombok
lombok
1.18.4
org.springframework.boot
spring-boot-starter-test
test
org.jsoup
jsoup
1.11.3
cn.hutool
hutool-all
5.8.4
org.seleniumhq.selenium
selenium-java
4.0.0
mysql
mysql-connector-java
8.0.29
org.springframework.boot
spring-boot-maven-plugin
org.projectlombok
lombok
db.setting放入到resources里边
## db.setting文件
url = jdbc:mysql://xxxxxx:3306/tiantian_fund?useUnicode=true&characterEncoding=UTF-8&serverTimezone=Asia/Shanghai
user = root
pass = xxxxxxx
## 可选配置
# 是否在日志中显示执行的SQL
showSql = true
# 是否格式化显示的SQL
formatSql = false
# 是否显示SQL参数
showParams = true
# 打印SQL的日志等级,默认debug,可以是info、warn、error
sqlLevel = debug
package com.pshdhx.fund;
import cn.hutool.db.Db;
import cn.hutool.db.Entity;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeDriverLogLevel;
import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.annotation.Scheduled;
import java.sql.SQLException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.LocalDateTime;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
/**
* @author pshdhx
* @date 2022-08-01 13:23
*/
@Configuration //1.主要用于标记配置类,兼备Component的效果。
@EnableScheduling // 2.开启定时任务
public class ScheduleTask {
//3.添加定时任务
@Scheduled(cron = "0 0 10 ? * 2-6")
//或直接指定时间间隔,例如:5秒
//@Scheduled(fixedRate=5000)
private void configureTasks() throws ParseException {
//设置驱动
System.setProperty("webdriver.chrome.driver", "D:\\new\\chromeDownload\\chromedriver_win32\\chromedriver.exe");
ChromeOptions options = new ChromeOptions();
options.addArguments("--headless");
options.addArguments("--disable-gpu");
options.setLogLevel(ChromeDriverLogLevel.OFF);
//创建驱动
WebDriver driver = new ChromeDriver(options);
//与将要爬取的网站建立连接
driver.get("https://fund.eastmoney.com/data/fundranking.html#tall;c0;r;szzf;pn200;ddesc;");
WebElement searchBox = driver.findElement(By.id("dbtable"));
System.err.println("==========================================================");
WebElement tbody = searchBox.findElement(By.tagName("tbody"));
String text = tbody.getText();
String[] split = text.split("[\n]");
List content = Arrays.asList(split);
String fund_code = null;
String fund_name = "";
Date fund_date = null;
Double fund_day_rate = null;
Double fund_week_rate = null;
Double fund_month_rate = null;
Double fund_month_3_rate = null;
for (int i = 0; i < content.size(); i++) {
if ((i + 1) % 4 == 1) {
//获取序号
}
if ((i + 1) % 4 == 2) {
//获取基金代码
fund_code = content.get(i);
}
if ((i + 1) % 4 == 3) {
//获取基金名称
fund_name = content.get(i).toString();
}
if ((i + 1) % 4 == 0) {
//获取基金净值信息
String fund_values = content.get(i).toString();
if (!"".equals(fund_values)) {
String[] valus = fund_values.split("[ ]");
if (valus.length > 5) {
fund_date = new SimpleDateFormat("yyyy-MM-dd").parse("2022-"+valus[0]);
if (valus[3].indexOf("%") != -1) {
String[] split1 = valus[3].split("[%]");
if(split1.length > 0){
fund_day_rate = Double.parseDouble(split1[0]);
}
}
if (valus[4].indexOf("%") != -1) {
String[] split1 = valus[4].split("[%]");
if(split1.length > 0){
fund_week_rate = Double.parseDouble(split1[0]);
}
}
if (valus[5].indexOf("%") != -1) {
String[] split1 = valus[5].split("[%]");
if(split1.length > 0){
fund_month_rate = Double.parseDouble(split1[0]);
}
}
if (valus[6].indexOf("%") != -1) {
String[] split1 = valus[5].split("[%]");
if(split1.length > 0){
fund_month_3_rate = Double.parseDouble(split1[0]);
}
}
}
try {
Db.use().insert(Entity.create("fund_info")
.set("fund_code",fund_code)
.set("fund_name",fund_name)
.set("fund_date",fund_date)
.set("fund_day_rate",fund_day_rate)
.set("fund_week_rate",fund_week_rate)
.set("fund_month_rate",fund_month_rate)
.set("fund_month_3_rate",fund_month_3_rate));
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
}
}