最底下有全部代码链接
架构目录如下
效果如下
其中access_token获取方法看官网文档http://ai.baidu.com/docs#/Auth/top
我用的Fiddler
Controller层
package ocr.controller;
import java.io.IOException;
import java.io.InputStream;
import java.net.URISyntaxException;
import java.util.Map;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.multipart.MultipartHttpServletRequest;
import com.alibaba.fastjson.JSONObject;
import ocr.pojo.Invoice;
import tools.OcrTools;
@Controller
@RequestMapping("/invoice")
public class OcrController {
@Autowired
private OcrTools ocrTools;
@RequestMapping("/index")
public String index(Model m) {
return "/static/testOcr.jsp";
}
@RequestMapping(value="/getMsg", method=RequestMethod.POST)
@ResponseBody
public Invoice upload(MultipartHttpServletRequest filesRequest) {
MultipartFile file = filesRequest.getFile("file");
try {
InputStream in = file.getInputStream();
String ocrResult = ocrTools.getocrByInputStream(in);
Map resultMap = JSONObject.parseObject(ocrResult,Map.class);
String invoiceString = JSONObject.toJSONString(resultMap.get("words_result"));
Invoice invoice = JSONObject.parseObject(invoiceString,Invoice.class);
return invoice;
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
}
实体层
package ocr.pojo;
import java.math.BigDecimal;
import org.springframework.stereotype.Controller;
@Controller
public class Invoice {
private String invoiceCode; //发票代码
private String invoiceDate; //开票时间
private String purchaserName; //购方名称
private String purchaserRegisterNum; //购方纳税人识别号
private BigDecimal amountInFiguers; //价税合计
private String sellerName; //售方名称
private String sellerRegisterNum; //售方纳税人识别号
public String getInvoiceCode() {
return invoiceCode;
}
public void setInvoiceCode(String invoiceCode) {
this.invoiceCode = invoiceCode;
}
public String getInvoiceDate() {
return invoiceDate;
}
public void setInvoiceDate(String invoiceDate) {
this.invoiceDate = invoiceDate;
}
public String getPurchaserName() {
return purchaserName;
}
public void setPurchaserName(String purchaserName) {
this.purchaserName = purchaserName;
}
public String getPurchaserRegisterNum() {
return purchaserRegisterNum;
}
public void setPurchaserRegisterNum(String purchaserRegisterNum) {
this.purchaserRegisterNum = purchaserRegisterNum;
}
public BigDecimal getAmountInFiguers() {
return amountInFiguers;
}
public void setAmountInFiguers(BigDecimal amountInFiguers) {
this.amountInFiguers = amountInFiguers;
}
public String getSellerName() {
return sellerName;
}
public void setSellerName(String sellerName) {
this.sellerName = sellerName;
}
public String getSellerRegisterNum() {
return sellerRegisterNum;
}
public void setSellerRegisterNum(String sellerRegisterNum) {
this.sellerRegisterNum = sellerRegisterNum;
}
@Override
public String toString() {
return "Invoice [invoiceCode=" + invoiceCode + ", invoiceDate=" + invoiceDate + ", purchaserName="
+ purchaserName + ", purchaserRegisterNum=" + purchaserRegisterNum + ", amountInFiguers="
+ amountInFiguers + ", sellerName=" + sellerName + ", sellerRegisterNum=" + sellerRegisterNum + "]";
}
}
获取识别文字tools层
package tools;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.Base64;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicHeader;
import org.springframework.stereotype.Component;
@Component
public class OcrTools {
/**
* 识别图片
* @param filePath 图片路径
* @return 识别结果
*/
public static String getocrByInputStream(InputStream in){
byte[] fileByte = getFileBytes(in);// 获取图片字节数组
String base64UrlencodedImg = base64Urlencode(fileByte);// 编码
return sendOcr(base64UrlencodedImg);// 发送给百度进行文字识别
}
/**
* 传入base64 + UrlEncode 编码后的图片
* 得到图片解析结果字符串(百度返回的Json)
* @param base64UrlencodedImg
* @return
* @throws ClientProtocolException
* @throws IOException
*/
public static String sendOcr(String base64UrlencodedImg){
CloseableHttpClient httpclient = HttpClients.createMinimal();
HttpPost post = new HttpPost("https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice?access_token=填自己的!!!");
Header header = new BasicHeader("Content-Type","application/x-www-form-urlencoded");
post.setHeader(header);
try {
HttpEntity entity = new StringEntity("image=" + base64UrlencodedImg);
post.setEntity(entity);
CloseableHttpResponse response = httpclient.execute(post);
InputStream in = response.getEntity().getContent();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
byte[] b = new byte[1000];
int n;
while ((n = in.read(b)) != -1) {
bos.write(b, 0, n);
}
in.close();
bos.close();
byte[] buffer = bos.toByteArray();
return new String(buffer,"utf-8");
}catch(Exception e){
e.printStackTrace();
}
return null;
}
/**
* 图片转字节数组
* @param filePath 图片本地路径
* @return 图片字节数组
*/
private static byte[] getFileBytes(InputStream in){
byte[] buffer = null;
try {
// File file = new File(filePath);
// FileInputStream fis = new FileInputStream(file);
ByteArrayOutputStream bos = new ByteArrayOutputStream();
byte[] b = new byte[1000];
int n;
while ((n = in.read(b)) != -1) {
bos.write(b, 0, n);
}
in.close();
bos.close();
buffer = bos.toByteArray();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return buffer;
}
/**
* 对字节数组进行base64编码与url编码
* @param b
* @return
*/
private static String base64Urlencode(byte[] b) {
byte[] base64Img = Base64.getEncoder().encode(b);
try {
String base64UrlencodedImg = URLEncoder.encode(new String(base64Img), "utf-8");
return base64UrlencodedImg;
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return null;
}
/**
* InputStream 转String
* @param is
* @return
*/
public String convertStreamToString(InputStream is) {
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
StringBuilder sb = new StringBuilder();
String line = null;
try {
while ((line = reader.readLine()) != null) {
sb.append(line + "/n");
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return sb.toString();
}
}
页面jsp
<%@ page language="java" contentType="text/html; charset=UTF-8"
pageEncoding="UTF-8"%>
Insert title here
发票代码
开票时间
购方名称
购方纳税人识别号
价税合计
售方名称
售方纳税人识别号
配置文件和pom.xml不详细贴了
完整代码
链接:https://pan.baidu.com/s/1uo8aFDtX5es4kkTIGEvXiA
提取码:gb7x
想学习更多看官方文档https://cloud.baidu.com/doc/OCR/s/ijwvxzq2g
(后续自己又改善了一下,加了service层,如有需要,可私信)