又见XML解析

package parseXML;

import java.io.File;
import java.io.FileInputStream;
import java.util.Iterator;
import java.util.List;

import org.jdom.Content;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.input.SAXBuilder;

import utils.NumberUtils;
import entity.SearchUnit;
import exception.ParseFileException;

public class JDomParse {

	public SearchUnit getSearchUnit(File file) throws Exception {

		SearchUnit unit = new SearchUnit();
		SAXBuilder builder = new SAXBuilder(false);
		Document doc = builder.build(new FileInputStream(file));
		Element SEContent = doc.getRootElement();

		unit.setTitle(SEContent.getChildText("Title").replaceAll("<[^>]*>", "").trim());

		if (SEContent.getChildText("Description").replaceAll("<[^>]*>", "").trim().length() < 100)
			unit.setDescription(SEContent.getChildText("Description").replaceAll("<[^>]*>", "").trim());
		else
			unit.setDescription(SEContent.getChildText("Description").replaceAll("<[^>]*>", "").trim().substring(0, 99));
		
		unit.setSystem(file.getName().substring(5, 7));

		String cost = SEContent.getChildText("Cost").trim();
		unit.setCost(cost);
		double costDouble = Double.parseDouble(cost);
		unit.setCostBySorting(NumberUtils.pad(costDouble));

		unit.setLocation(SEContent.getChildText("Location").trim());
		unit.setCreateTime(SEContent.getChildText("CreateTime").trim());
		unit.setLastModifyTime(SEContent.getChildText("LastModifyTime").trim());
		
		if(SEContent.getChildText("MediaType").trim().equals("")||SEContent.getChildText("MediaType").trim()==null)
			unit.setMediaType("T");
		else
			unit.setMediaType(SEContent.getChildText("MediaType").trim());
		
		if (SEContent.getChildText("ThumbURL") == null
				|| SEContent.getChildText("ThumbURL").length() <= 0) {
			unit.setThumbURL("0");
		} else {
			unit.setThumbURL(SEContent.getChildText("ThumbURL"));
		}
		if (SEContent.getChildText("ExpertComment") == null
				|| SEContent.getChildText("ExpertComment").length() <= 0) {
			unit.setExpertComment("0");
		} else {
			unit.setExpertComment(SEContent.getChildText("ExpertComment"));
		}
		if (SEContent.getChildText("ConsumerComment") == null
				|| SEContent.getChildText("ConsumerComment").length() <= 0) {
			unit.setConsumerCommenet("0");
		} else {
			unit.setConsumerCommenet(SEContent.getChildText("ConsumerComment"));
		}

		if (SEContent.getChildText("Recommend") == null
				|| SEContent.getChildText("Recommend").length() <= 0) {
			unit.setRecommend("1");
		} else {
			unit.setRecommend(SEContent.getChildText("Recommend").trim());
		}

		if (SEContent.getChildText("SunFlower") == null
				|| SEContent.getChildText("SunFlower").length() <= 0) {
			unit.setSunFlower("0");
		} else {
			unit.setSunFlower(SEContent.getChildText("SunFlower").trim());
		}

		if (SEContent.getChildText("Discount") == null
				|| SEContent.getChildText("Discount").length() <= 0) {
			unit.setDiscount("1");
		} else {
			unit.setDiscount(SEContent.getChildText("Discount").trim());
		}

		unit.setTheme(SEContent.getChildText("Theme").trim());
		
		unit.setOwner(((Element) SEContent.getChildren("Copyright").get(0)).getChildText("Owner").trim());
		
		String contributors = new String();

		Element contributor = (Element) SEContent.getChildren("Contributor").get(0);
		List valueList = contributor.getChildren();
		for (Iterator iter = valueList.iterator(); iter.hasNext();) {
			Content value = (Content) iter.next();
			String name = value.getValue();
			contributors = contributors + name + ";";
		}
		unit.setContributor(contributors.substring(0, contributors.length() - 1));

		StringBuffer contributorss = new StringBuffer();

		//unit.setContributor(((Element) SEContent.getChildren("Contributor").get(0)).getChildText("value").trim());
		return unit;

	}

	public static void main(String[] args) {
		File file = new File("D:\\CEOM_01_M_20090805133924_0727_N.xml");
		SearchUnit su = null;
		try {
			su = new JDomParse().getSearchUnit(file);
		} catch (Exception e) {
			ParseFileException pfe = new ParseFileException();
			if (pfe.isFileNotFoundException(e)) {
				System.out.println("File Not Found !");
			}
		}
		try {
			System.out.println(su.toString());
		} catch (Exception e) {
			ParseFileException pfe = new ParseFileException();
			if (pfe.isNullPointerException(e))
				System.out.println("Su is null !");
		}

	}

}


1.利用 replaceAll("<[^>]*>", "") 去除内容里面的成对的html标签
2.包含了xml解析中常遇到的情况
<root>
   root
</root>
----
<root>
<name>root</name>
</root>

记下..

你可能感兴趣的:(xml)