前段时间在写web页面,为了方便用selenium写了一个级联打开页面上的所有超链接,代码如下
import com.thoughtworks.selenium.DefaultSelenium; import com.thoughtworks.selenium.Selenium; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class OpenLinkTestThread extends Thread { private EnviromentPara ep; public OpenLinkTestThread(EnviromentPara ep) { this.ep = ep; } @Override public void run() { openLinkTest(); } public void openLinkTest() { try { Selenium selenium = new DefaultSelenium("localhost", Constant.port, ep.getBrowser(), ep.getUrl()); //4444 is default server port, ep.getBrowser():"*firefox","*googlechrome","*iexplore",不过ie支持不是很好 selenium.start(); //selenium.open(ep.getUrl()); selenium.openWindow(ep.getUrl(), ep.getUrl());//打开一个新的窗口,窗口id:ep.getUrl() selenium.waitForPopUp(ep.getUrl(), "100000"); openLinkForOnePage(selenium, ep.getBrowser(), ep.getUrl(), ep.isIsRecursion(),1);//ep.isIsRecursion()是否递归打开链接 if (ep.isCloseOnFinish()) { Thread.sleep(10000); selenium.stop(); } } catch (Exception e) { e.printStackTrace(); } } public void openLinkForOnePage(Selenium selenium, String browser, String url, boolean recursion, int recursionDeep) { selenium.selectWindow(url); String htmlSource = selenium.getHtmlSource(); List list = getAllLinkForOnePage(htmlSource); for (int i = 0; list != null && i < list.size(); i++) { selenium.openWindow((String) list.get(i), (String) list.get(i)); selenium.waitForPopUp((String) list.get(i), "100000"); if (recursion) { if(recursionDeep < ep.getRecursionDeep()) openLinkForOnePage(selenium, browser, (String) list.get(i), recursion,recursionDeep+1); } } } public List getAllLinkForOnePage(String htmlSource) {//提取页面上的所有超链接 List list = new ArrayList(); Pattern linkElementPattern = Pattern.compile("<a\\s.*?href=\"([^\"]+)\"[^>]*>(.*?)</a>"); Matcher linkElementMatcher = linkElementPattern.matcher(htmlSource); while (linkElementMatcher.find()) { String temp = linkElementMatcher.group(1); if(temp!=null) temp=temp.trim(); if (temp==null||temp.startsWith("#") || temp.toLowerCase().startsWith("javascript:")) {// continue; } temp = temp.replace("&", "&"); list.add(temp); } return list; } }