Java 百度ocr文字识别-发票识别,并在页面显示信息

最底下有全部代码链接

架构目录如下

Java 百度ocr文字识别-发票识别,并在页面显示信息_第1张图片

 

效果如下

Java 百度ocr文字识别-发票识别,并在页面显示信息_第2张图片

 

其中access_token获取方法看官网文档http://ai.baidu.com/docs#/Auth/top

我用的Fiddler

Java 百度ocr文字识别-发票识别,并在页面显示信息_第3张图片

Java 百度ocr文字识别-发票识别,并在页面显示信息_第4张图片

 

Controller层

package ocr.controller;

import java.io.IOException;
import java.io.InputStream;
import java.net.URISyntaxException;
import java.util.Map;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.multipart.MultipartHttpServletRequest;

import com.alibaba.fastjson.JSONObject;

import ocr.pojo.Invoice;
import tools.OcrTools;

@Controller
@RequestMapping("/invoice")
public class OcrController {

	@Autowired
	private OcrTools ocrTools;
	
	@RequestMapping("/index")
	public String index(Model m) {
		return "/static/testOcr.jsp";
	} 
	
	@RequestMapping(value="/getMsg", method=RequestMethod.POST)
	@ResponseBody
	public Invoice upload(MultipartHttpServletRequest filesRequest) {
		MultipartFile file = filesRequest.getFile("file");
		try {
			InputStream in = file.getInputStream();
			String ocrResult = ocrTools.getocrByInputStream(in);
			
            Map resultMap = JSONObject.parseObject(ocrResult,Map.class);
            String invoiceString = JSONObject.toJSONString(resultMap.get("words_result"));
            Invoice invoice = JSONObject.parseObject(invoiceString,Invoice.class);
            return invoice;
        } catch (IOException e) {
            e.printStackTrace();
        }
		return null;
	}
}

 

实体层

package ocr.pojo;

import java.math.BigDecimal;

import org.springframework.stereotype.Controller;

@Controller
public class Invoice {

	private String invoiceCode; //发票代码
	private String invoiceDate; //开票时间
	private String purchaserName; //购方名称
	private String purchaserRegisterNum; //购方纳税人识别号
	private BigDecimal amountInFiguers; //价税合计
	private String sellerName; //售方名称
	private String sellerRegisterNum; //售方纳税人识别号
	public String getInvoiceCode() {
		return invoiceCode;
	}
	public void setInvoiceCode(String invoiceCode) {
		this.invoiceCode = invoiceCode;
	}
	public String getInvoiceDate() {
		return invoiceDate;
	}
	public void setInvoiceDate(String invoiceDate) {
		this.invoiceDate = invoiceDate;
	}
	public String getPurchaserName() {
		return purchaserName;
	}
	public void setPurchaserName(String purchaserName) {
		this.purchaserName = purchaserName;
	}
	public String getPurchaserRegisterNum() {
		return purchaserRegisterNum;
	}
	public void setPurchaserRegisterNum(String purchaserRegisterNum) {
		this.purchaserRegisterNum = purchaserRegisterNum;
	}
	public BigDecimal getAmountInFiguers() {
		return amountInFiguers;
	}
	public void setAmountInFiguers(BigDecimal amountInFiguers) {
		this.amountInFiguers = amountInFiguers;
	}
	public String getSellerName() {
		return sellerName;
	}
	public void setSellerName(String sellerName) {
		this.sellerName = sellerName;
	}
	public String getSellerRegisterNum() {
		return sellerRegisterNum;
	}
	public void setSellerRegisterNum(String sellerRegisterNum) {
		this.sellerRegisterNum = sellerRegisterNum;
	}
	@Override
	public String toString() {
		return "Invoice [invoiceCode=" + invoiceCode + ", invoiceDate=" + invoiceDate + ", purchaserName="
				+ purchaserName + ", purchaserRegisterNum=" + purchaserRegisterNum + ", amountInFiguers="
				+ amountInFiguers + ", sellerName=" + sellerName + ", sellerRegisterNum=" + sellerRegisterNum + "]";
	}			
}

 

获取识别文字tools层

package tools;

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.Base64;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicHeader;
import org.springframework.stereotype.Component;

@Component
public class OcrTools {
	/**
	 * 识别图片
	 * @param filePath 图片路径
	 * @return 识别结果
	 */
	public static String getocrByInputStream(InputStream in){
		byte[] fileByte = getFileBytes(in);// 获取图片字节数组
		String base64UrlencodedImg = base64Urlencode(fileByte);// 编码
		return sendOcr(base64UrlencodedImg);// 发送给百度进行文字识别
	}
	
	/**
	 * 传入base64 + UrlEncode 编码后的图片
	 * 得到图片解析结果字符串(百度返回的Json)
	 * @param base64UrlencodedImg
	 * @return
	 * @throws ClientProtocolException
	 * @throws IOException
	 */
	public static String sendOcr(String base64UrlencodedImg){
		CloseableHttpClient httpclient = HttpClients.createMinimal();
		HttpPost post = new HttpPost("https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice?access_token=填自己的!!!");
		Header header = new BasicHeader("Content-Type","application/x-www-form-urlencoded");
		post.setHeader(header);
		try {
			HttpEntity entity = new StringEntity("image=" + base64UrlencodedImg);
			post.setEntity(entity);
			CloseableHttpResponse response = httpclient.execute(post);
			InputStream in = response.getEntity().getContent();
			ByteArrayOutputStream bos = new ByteArrayOutputStream();  
	        byte[] b = new byte[1000];  
	        int n;  
	        while ((n = in.read(b)) != -1) {  
	            bos.write(b, 0, n);  
	        }
	        in.close();
	        bos.close();  
	        byte[] buffer = bos.toByteArray(); 
			
			return new String(buffer,"utf-8");
		}catch(Exception e){
			e.printStackTrace();
		}
		return null;
	}
	
	/**
	 * 图片转字节数组
	 * @param filePath 图片本地路径
	 * @return 图片字节数组
	 */
	private static byte[] getFileBytes(InputStream in){  
        byte[] buffer = null;  
        try {  
           // File file = new File(filePath);  
           // FileInputStream fis = new FileInputStream(file);  
            ByteArrayOutputStream bos = new ByteArrayOutputStream();  
            byte[] b = new byte[1000];  
            int n;  
            while ((n = in.read(b)) != -1) {  
                bos.write(b, 0, n);  
            }  
            in.close();  
            bos.close();  
            buffer = bos.toByteArray();  
        } catch (FileNotFoundException e) {  
            e.printStackTrace();  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
        return buffer;
    }
	
	/**
	 * 对字节数组进行base64编码与url编码
	 * @param b
	 * @return
	 */
	private static String base64Urlencode(byte[] b) {
		byte[] base64Img = Base64.getEncoder().encode(b);
		try {
			String base64UrlencodedImg = URLEncoder.encode(new String(base64Img), "utf-8");
			return base64UrlencodedImg;
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
		}
		return null;
	}
	
	/**
	 * InputStream 转String
	 * @param is
	 * @return
	 */
	public String convertStreamToString(InputStream is) {   
		BufferedReader reader = new BufferedReader(new InputStreamReader(is));   
		StringBuilder sb = new StringBuilder();   
		String line = null;   
		try {   
			while ((line = reader.readLine()) != null) {   
				sb.append(line + "/n");   
			}   
		} catch (IOException e) {   
			e.printStackTrace();   
		} finally {   
			try {   
				is.close();   
		    } catch (IOException e) {   
	             e.printStackTrace();   
		    }   
		}   
		return sb.toString();   		 
	}   
}

 

页面jsp

<%@ page language="java" contentType="text/html; charset=UTF-8"
    pageEncoding="UTF-8"%>






Insert title here


	
发票代码 开票时间
购方名称 购方纳税人识别号
价税合计
售方名称 售方纳税人识别号

 

配置文件和pom.xml不详细贴了

 

完整代码

链接:https://pan.baidu.com/s/1uo8aFDtX5es4kkTIGEvXiA 
提取码:gb7x 
 

想学习更多看官方文档https://cloud.baidu.com/doc/OCR/s/ijwvxzq2g

(后续自己又改善了一下,加了service层,如有需要,可私信)

你可能感兴趣的:(文字识别)