正则表达式过滤img标签中的src的内容

package com.xx.content.questions.xx.util;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringUtils;

import com.eebbk.edu.common.util.json.JsonTool;

public class HtmlUtil {

//	// 匹配url地址
//    private static final String patternSrcStr = "http(\'?|\"?)(.*?)(\'|\"|>|\\))";
//    // 匹配css url地址
//    private static final String patternUrlStr = "\\s+([^>]*)url\\((.*?)\\)";
    
    // 匹配url地址
    private static final String patternSrcStr = "(\'?|\"?)(.*?)(\'|\"|>|\\))";
    // 匹配css url地址
//    private static final String patternUrlStr = "\\s+([^>]*)\\((.*?)\\)";
    /** 默认小猿地址 */
    private static final String DEFAULT_XIAOYUAN_DOMAIN = "http://xxx";
    
	/**
	 * @description 从html中过滤出图片地址
	 * @param content
	 * @return
	 */
	public static List extralImg(String content) 
	{
		Set datas = new HashSet<>();
		if("".equals(content) || content == null)
		{
			return null;
		}
		Pattern patternSrc = Pattern.compile(patternSrcStr,Pattern.CASE_INSENSITIVE);
        Matcher matcherSrc = patternSrc.matcher(content);
        while(matcherSrc.find()) 
        {
        	if(StringUtils.isNotEmpty(matcherSrc.group(1)))
        	{        		
        		datas.add( matcherSrc.group(1));
        	}
        	if(StringUtils.isNotEmpty(matcherSrc.group(2)))
        	{        		
        		datas.add(matcherSrc.group(2));
        	}
        }
        List result = new ArrayList<>();
        for(String url : datas) {
        	if(isReplace(url)) {
        		url = url.replace("+", "%2B").replace("amp;", "");
        		result.add(url);
        	}
        }
        return result;
	}
	
	private static String[] suffixs = {"png", "jpg", "jpeg", "mp3", "gif", "svg", "css", "js", "GIF"};
	
	public static boolean isReplace(String url) {
		for(String suffix : suffixs) {
			if(url.endsWith("." + suffix) || url.contains("latext")) {
				return true;
			}
		}
		return false;
	}
	
    public static String delHTMLTag(String htmlStr)
    {
        String regEx_script = "]*?>[\\s\\S]*?<\\/script>"; // 定义script的正则表达式
        String regEx_style = "]*?>[\\s\\S]*?<\\/style>"; // 定义style的正则表达式
        String regEx_html = "<[^>]+>"; // 定义HTML标签的正则表达式

        Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
        Matcher m_script = p_script.matcher(htmlStr);
        htmlStr = m_script.replaceAll(""); // 过滤script标签

        Pattern p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);
        Matcher m_style = p_style.matcher(htmlStr);
        htmlStr = m_style.replaceAll(""); // 过滤style标签

        Pattern p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
        Matcher m_html = p_html.matcher(htmlStr);
        htmlStr = m_html.replaceAll(""); // 过滤html标签
        // 去除过滤html标签后的空行
        htmlStr = htmlStr.replaceAll("((\r\n)|\n)[\\s\t ]*(\\1)+", "").replaceAll("^((\r\n)|\n)", "").trim().replace("\\n", "").trim();
        htmlStr = htmlStr.replaceAll("\r|\n|\r", "");
        htmlStr = htmlStr.replace(" ", " ");
        Pattern p = Pattern.compile("\\s+");
        Matcher m = p.matcher(htmlStr);
        htmlStr = m.replaceAll(" ");
        return htmlStr;
    }
    
    public static void main(String[] args) 
    {
		String str = "


化简.


(1)


(2).



"; String a="

Life used to be fun for teenagers in Britain. They used to haveto spend, and free time to while away (消磨) and meet in teenage coffee bars. But for many young people, life is   now.  are difficult to find. There’s not so much money around. Things are more, and it’s hard to find a place to live in. Teachers say that students work harder than they used to. They are more interested in   exams. They know that good exam may bring them better jobs.

"; String b="
"; String out="

A.\"\"  B.\"\"  C.\"\"  D.\"\"  

"; // String out2="
实验室中用如图所示的装置进行甲烷与氯气在光照下反应的实验。
\"\"
光照下反应一段时间后,下列装置示意图中能正确反映实验现象的是\(\rm{(}\)  \(\rm{)}\)
"; String imggg=""; System.out.println(JsonTool.toJson(extralImg(imggg))); // System.out.println(JsonTool.toJson(extralImg(b))); } }

 

你可能感兴趣的:(正则表达式)