最近得到一个需求,需要模拟登陆网页,然后通过网页接口获取相应数据。一共两个网页,其中没有验证码的网页比较容易的模拟登陆成功。但是另一个带有验证码(图片)却总是登陆失败。
public class AliYun {
private static Logger logger = Logger.getLogger(AliYun.class);
// 通过图片请求地址 获取图片Base64编码
public static String getImageStrFromUrl(String imgURL) {
byte[] data = null;
InputStream inStream = null;
try {
// 创建URL
URL url = new URL(imgURL);
// 创建链接 (注意:稍后更改代码在这一部分)
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setConnectTimeout(5 * 1000);
inStream = conn.getInputStream();
BufferedImage src = ImageIO.read(inStream);
File file = new File("D:\\temp.jpg");
ImageIO.write(src, "jpg", file);
InputStream inputStream = new FileInputStream(file);
data = new byte[inputStream.available()];
inputStream.read(data);
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
inStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
// 对字节数组Base64编码
BASE64Encoder encoder = new BASE64Encoder();
// 返回Base64编码过的字节数组字符串
return encoder.encode(data);
}
// 获取识别后的验证码
public static String getLoginCode(String imgPath) {
String baseImg = getImageStrFromUrl(imgPath);//base64转换
baseImg = baseImg.replaceAll("\\r\\n", "");
String host = "http://jisuyzmsb.market.alicloudapi.com";
String path = "/captcha/recognize";
String appcode = "你购买接口之后的Code";
Map bodys = new HashMap();
bodys.put("pic", baseImg);
HttpRequest request = HttpRequest.post(host + path + "?type=en4", bodys, true).header("Authorization", "APPCODE " + appcode).header("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
String result = request.body();
logger.debug("阿里云接口识别结果:" + result);
JSONObject jsonObject = JSON.parseObject(result);
if (jsonObject.containsKey("status") && jsonObject.getString("status").equals("0")) {
//识别正确 {"status":"0","msg":"ok","result":{"type":"en4","code":"5sfw"}}
JSONObject rs = jsonObject.getJSONObject("result");
return rs.getString("code");
} else if (jsonObject.containsKey("status") && jsonObject.getString("status").equals("210")) {
//识别错误 {"status":"210","msg":"未知错误","result":""}
logger.error("阿里云接口识别失败:" + result);
}
return null;
}
}
//模拟登陆
private void login() {
//获取验证码 (1)通过调用上面AliYun方法获取到 识别图片之后的验证码
String vldcode = AliYun.getLoginCode(loginConfig.getUrl() + LoginConfig.CODE_URL); // 验证码图片的URL
//登录 (2)用户名和密码 再携带上验证码 模拟登陆
String url = loginConfig.getUrl() + LoginConfig.LOGIN_URL.replace("{username}", loginConfig.getUsername()).replace("{password}", loginConfig.getPassword()).replace("{vldcode}", vldcode);
HttpRequest request = HttpRequest.get(url);
//获取登录后的数据
Map> headers = request.headers();
List cookies = headers.get("Set-Cookie"); // (3)获取Cookie
if (null != cookies && cookies.size() > 0) {
String cookie = cookies.get(0).split(";")[0];
String key = RedisKeyList.getLoginSessionId();
Jedis jedis = RedisClient.getJedis();
try {
jedis.set(key, cookie); // (4)存入Redis
} catch (Exception ignored) {
} finally {
RedisClient.returnResource(jedis);
}
}
}
// 携带有识别前验证码(图片)base64编码 识别之后的验证码 cookie
public void 获取数据(){
//使用过程 伪代码
//如果Redis 中 SeesionId(Cookie获取的值)为空
if (Redis.getLoginSessionId == null) {
//重新登陆
login();
}
...
...
//请求接口,获取数据
if (数据获取失败) {
//意味着登陆已过期,把Redis存的值清空
loginConfig.cleanSessionId();
获取数据();//再次调用获取数据
return null;
}
}
(1)通过图片URL获取图片,调用阿里市场购买的接口,识别图片,获取验证码
(https://market.aliyun.com/products/57126001/cmapi014396.html#sku=yuncode839600006)阿里市场
(2)模拟登陆,并将Cookie存入Redis, 以后的数据获取只需要携带Cookie即可
(3)当Cookie过期(其他人登陆网站,因为是管理网站,其他人登陆的情况少),重新获取
但是,以上代码总是出现错误,Cookie总是不正确的。
(1)在分析登陆之后,我怀疑是 通过获取URL获取图片验证码时,与(用户名,密码,验证码)时是不同的,也就是说,我每次模拟登陆时使用的验证码总是过期的。
(2)于是思考之后,得到新思路:
public class CookieAndCodeDTO extends BaseDTO {
// 识别前 验证码base64编码
private String base64;
// 识别后 验证码
private String code;
//cookie
private String cookie;
public String getBase64() {
return base64;
}
public void setBase64(String base64) {
this.base64 = base64;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getCookie() {
return cookie;
}
public void setCookie(String cookie) {
this.cookie = cookie;
}
}
// 图片识别以及携带Cookie
/**
* Created by Administrator on 2018-2-1.
*/
public class AliYun {
private static Logger logger = Logger.getLogger(AliYun.class);
/**
* Base64编码 解码
* @param imgURL 验证码(图片)请求路径
* @return CookieAndCodeDTO (携带Cookie 和 验证码Base64编码)
*/
public static CookieAndCodeDTO getImageStrFromUrl(String imgURL) {
byte[] data = null;
InputStream inStream = null;
CookieAndCodeDTO cookieAndCodeDTO = new CookieAndCodeDTO();
try {
// 创建URL
URL url = new URL(imgURL);
// 创建链接
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setConnectTimeout(5 * 1000);
//创建传输对象
Map> headers=conn.getHeaderFields();
List cookies = headers.get("Set-Cookie");
if (null != cookies && cookies.size() > 0) {
//获取图片时的Cookie
String cookie = cookies.get(0).split(";")[0];
cookieAndCodeDTO.setCookie(cookie);
}
inStream = conn.getInputStream();
BufferedImage src = ImageIO.read(inStream);
File file = new File("D:\\temp.jpg");
ImageIO.write(src, "jpg", file);
InputStream inputStream = new FileInputStream(file);
data = new byte[inputStream.available()];
inputStream.read(data);
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
inStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
// 对字节数组Base64编码
BASE64Encoder encoder = new BASE64Encoder();
// 返回Base64编码过的字节数组字符串
cookieAndCodeDTO.setBase64(encoder.encode(data));
return cookieAndCodeDTO;
}
/**
* 识别验证码(图片)
* @param imgPath 验证码(图片)请求路径
* @return CookieAndCodeDTO (携带Cookie 和 验证码)
*/
public static CookieAndCodeDTO getLoginCode(String imgPath) {
CookieAndCodeDTO cookieAndCodeDTO = getImageStrFromUrl(imgPath);
String baseImg = cookieAndCodeDTO.getBase64();//base64转换
baseImg = baseImg.replaceAll("\\r\\n", "");
String host = "http://jisuyzmsb.market.alicloudapi.com";
String path = "/captcha/recognize";
String appcode = "你购买接口之后的CODE";
Map bodys = new HashMap();
bodys.put("pic", baseImg);
HttpRequest request = HttpRequest.post(host + path + "?type=en4", bodys, true).header("Authorization", "APPCODE " + appcode).header("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
String result = request.body();
logger.debug("阿里云接口识别结果:" + result);
// result 出现过格式异常的情况
JSONObject jsonObject = JSON.parseObject(result);
if (jsonObject.containsKey("status") && jsonObject.getString("status").equals("0")) {
//识别正确 {"status":"0","msg":"ok","result":{"type":"en4","code":"5sfw"}}
JSONObject rs = jsonObject.getJSONObject("result");
cookieAndCodeDTO.setCode(rs.getString("code"));
return cookieAndCodeDTO;
} else if (jsonObject.containsKey("status") && jsonObject.getString("status").equals("210")) {
//识别错误 {"status":"210","msg":"未知错误","result":""}
logger.error("阿里云接口识别失败:" + result);
}
return null;
}
}
//模拟登陆 并保存Cookie
private void login() {
CookieAndCodeDTO cookieAndCodeDTO = AliYun.getLoginCode(LoginConfig.getUrl() + LoginConfig.CODE_URL); //(1)获取 Cookie 和 验证码
assert cookieAndCodeDTO != null;
String vldcode = cookieAndCodeDTO.getCode();
assert vldcode != null;
String url = loginConfig.getUrl() + LoginConfig.LOGIN_URL.replace("{username}", loginConfig.getUsername()).replace("{password}", loginConfig.getPassword()).replace("{vldcode}", vldcode);
// (2) 模拟登陆 携带Cookie
HttpRequest request = HttpRequest.get(url).header("Cookie", cookieAndCodeDTO.getCookie());
// (3) 保存Cookie
if (null != cookieAndCodeDTO.getCookie()) {
String key = RedisKeyList.getLoginSessionId();
Jedis jedis = RedisClient.getJedis();
try {
if (jedis != null) {
jedis.set(key, cookieAndCodeDTO.getCookie());
}
} catch (Exception ignored) {
} finally {
RedisClient.returnResource(jedis);
}
}
}
伪代码同上。
运行成功。(代码有许多不完善的地方,解释也可能不是很准确,如果有知道的可以留言)