Java正则表达式--截取网页中的链接中的TITLE和URL,且URL必须以HTTP或HTTPS开头

// 截取网页中的链接中的TITLE和URL,且URL必须以HTTP或HTTPS开头
s = "<div><a href='http://www.baidu.com' title='tip'>aaa</a><a href='https://www.baidu.com?param=1' title='baiduSearch'>bbb</a><span><a href='file:///d:/test/Arr.txt'>ccc</a></span></div>";
mode = "<a\\s*href=(?='?http|https)([^>]*)title=([^>]*)>(.*?)</a>";
p = Pattern.compile(mode);
m = p.matcher(s);
while (m.find()) {
	System.out.println("find...");
	String url = m.group(1);
	String title = m.group(2);
	String text = m.group(3);
	System.out.println("title:" + title + ",src:" + url + ",text:" + text);
}



你可能感兴趣的:(java,正则表达式)