MyBatis高效同步百万级数据

   一.有一个需求,需要同步百万级的数据。看到要求首先想到的是用存储过程+Job定时批量同步,后来发现这个任务是一表同步多表,且一表上的字段难以满足多表的需求,在对数据的细节处理上使用Java同步更方便,故考虑使用MyBatis foreach list的同步方式。但使用这种同步方式需要考虑性能方面的问题很多。

  • 问题一:查询返回的List数据不能太大,不然会报java.lang.OutOfMemoryError: GC overhead limit exceeded;
  • 问题二:当表的列数较多(20+),以及一次性插入的行数较多(5000+)时,整个插入的耗时十分漫长,达到了10+分钟,这是不能忍的。
  • 问题三:后台的Java定时任务容易耗尽单体应用的cpu资源,导致系统访问变慢,与其他核心模块抢夺cpu资源,拉低系统的整体性能,严重可导致系统宕机
  1. 优化设计一:百万级数据不可能一次性同步完,毕竟不是ETL数据迁移,使用MyBatis得考虑性能方面的要求,因此这里使用SpringBoot Task分批处理,每次5000条,2分钟执行一次任务;
  2. 优化设计二:在batchAddList(List List) 同步的方法里对List进行切割,分批操作。按经验来说,一次性插500~1000行数量是比较合适的,时间消耗也能接受。
  3. 优化设计三:对于查询需要同步的数据加ROWNUM <= 5000 ,返回的List再分批操作处理,根据插入的性能,一次性插入500条,已同步的数据设置状态为status = 1,未同步的数据status = 0。下次再跑定时任务时同步status = 0的数据,避免同步的数据重复;
  4. 优化设计四:加入LOG ERRORS记录DML错误日志,如果遇到了错误(如:违反唯一性约束条件),则这条语句会整个回滚,避免同步异常数据
  5. 优化设计五:如果是微服务项目,可以放在Task模块里,不是微服务项目,就单独建一个工程,不要放到单体工程应用里运行,特别是用户并发量大,有许多长连接访问需求(例如短信接口,地址解析等第三方接口)的工程;

二.具体实现

import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Lazy;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;

import com.google.common.collect.Lists;
import com.haige.material.entity.ImtOiScan;
import com.haige.material.service.ImtOiScanService;
import com.haige.material.service.InOutProductService;
import com.haige.material.service.MaterialService;

@Component
@Lazy(value=false)
public class SynMaterielTask {
	private static Logger logger = LoggerFactory.getLogger(SynMaterielTask.class);
	
	@Autowired
    private ImtOiScanService imtOiScanService;
	
	@Autowired
    private MaterialService materialService;
	
	@Autowired
	private InOutProductService inOutProductService;
	
    @Scheduled(fixedDelay = 545000)
	public void doMaterielTask() {
		logger.error("----------------------doMaterielTask---------------------------");
		// 1.从 IMT_OI_SCAN表中取数据
		List imtOiScanList = Lists.newArrayList();
		imtOiScanList = imtOiScanService.getInOutProductListUnion();
		logger.debug("要同步的数据条数:" + imtOiScanList.size());
		if (imtOiScanList.size() > 0) {
			logger.debug("更新的第一条数据 : " + imtOiScanList.get(0));
			// 2. 数据同步到订单表
			materialService.batchAddMaterial(imtOiScanList);
			// 3.数据存到关联表
			inOutProductService.batchAddInOutProduct(imtOiScanList);
			// 4.更新IMT_OI_SCAN表imt_status状态
			imtOiScanService.updateImtOiScanImtStatusMerge();
		}
		logger.debug("------------------end----doMaterielTask---------------------------");
	}
}
package com.haige.material.service;

import java.util.Date;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;

import com.google.common.collect.Lists;
import com.haige.material.dao.ImtOiScanDao;
import com.haige.material.entity.ImtOiScan;

/**
 * 数据同步接口
 * @author linhaiy
 *
 */
@Service
@Transactional(readOnly = false)
public class ImtOiScanService {

	private static Logger logger = LoggerFactory.getLogger(ImtOiScanService.class);
	
	@Autowired
    private ImtOiScanDao imtOiScanDao;
	
	public List getInOutProductListUnion(){
		logger.debug("开始查询IMT_OI_SCAN表时间:  " + new Date());
		List imtOiScanList = Lists.newArrayList();
        imtOiScanList = imtOiScanDao.selectImtOiScanList();
        logger.debug("结束查询IMT_OI_SCAN表时间:  " + new Date());
        return imtOiScanList;
	}
	
	public void updateImtOiScanImtStatusMerge(){
		logger.debug("更新IMT_OI_SCAN表imt_status状态开始时间 :" + new Date());
        imtOiScanDao.updateImtOiScanImtStatusMerge();
        logger.debug("更新IMT_OI_SCAN表imt_status状态结束时间 :" + new Date());
    }
}
import java.util.List;

import com.haige.material.entity.ImtOiScan;

/**
 * 数据同步Dao层
 * @author linhaiy
 * @date 2019.06.26
 */
public interface ImtOiScanDao {

	public List selectImtOiScanList();

	public void updateImtOiScanImtStatusMerge();
}




	

	
		MERGE INTO IMT_OI_SCAN IMT_OI_SCAN
		USING (SELECT a.id
		FROM MATERIAL a LEFT JOIN IMT_OI_SCAN b on a.id = b.id WHERE
		b.imt_status = '0' ) MATERIAL
		ON (IMT_OI_SCAN.id=MATERIAL.id)
		WHEN
		MATCHED THEN
		UPDATE SET IMT_OI_SCAN.imt_status = '1'
	
package com.haige.material.service;

import java.util.Date;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import com.google.common.collect.Lists;
import com.haige.material.dao.MaterialDao;
import com.haige.material.entity.ImtOiScan;

/**
 * 发放Service
 * @author linhaiy
 * @date 2019.06.26
 */
@Service
public class MaterialService {

	private static Logger logger = LoggerFactory.getLogger(MaterialService.class);
	
	@Autowired
	private MaterialDao materialDao;

	public void batchAddMaterial(List materialList) {
		logger.debug("同步物料开始时间 :" + new Date());
		int batchCount = 500;
		int batchLastIndex = batchCount - 1;

		for (int index = 0; index < materialList.size();) {
			if (batchLastIndex > materialList.size() - 1) {
				List list = Lists.newArrayList();
				batchLastIndex = materialList.size() - 1;
				list.addAll(materialList.subList(index, batchLastIndex + 1));
				index = batchLastIndex + 1;
				materialDao.insertMaterial(list);
			} else {
				List list1 = Lists.newArrayList();
				list1.addAll(materialList.subList(index, batchLastIndex + 1));
				materialDao.insertMaterial(list1);
				index = batchLastIndex + 1;
				batchLastIndex = index + (batchCount - 1);
			}
		}
		logger.debug("同步物料结束时间 :" + new Date());
	}
}
/**
 * 内外码同步到物料表Dao层
 * @author linhaiy
 * @date 2019.06.26
 */
public interface MaterialDao {

	public void insertMaterial(List inOutDetail);
}




	
		INSERT ALL
		
			INTO MATERIAL
			(ID,PURCHASE_ORDER,SHIPPING_ORDER,CUSTOMER_CODE,CUSTOMER_NAME,
			SUPPLIER_NAME,RECEIVE_NAME,MATERIAL_CODE,MATERIAL_NAME,PRODUCT_ID,SECURITY,
			W_ID,SHIPPING_TIME,IMT_TIME,IMT_STATUS,PRODUCT_ID_NUM,W_ID_NUM,SECURITY_NUM,SERIAL,BUSI_TYPE)
			VALUES
			(
			#{item.id,jdbcType=VARCHAR},
			#{item.purchaseOrder,jdbcType=VARCHAR},
			#{item.shippingOrder,jdbcType=VARCHAR},
			#{item.customerCode,jdbcType=VARCHAR},
			#{item.customerName,jdbcType=VARCHAR},
			#{item.supplierName,jdbcType=VARCHAR},
			#{item.receiveName,jdbcType=VARCHAR},
			#{item.materialCode,jdbcType=VARCHAR},
			#{item.materialName,jdbcType=VARCHAR},
			#{item.productId,jdbcType=VARCHAR},
			#{item.security,jdbcType=VARCHAR},
			#{item.outCode,jdbcType=VARCHAR},
			#{item.shippingTime,jdbcType=DATE},
			SYSDATE,
			#{item.imtStatus,jdbcType=NUMERIC},
			#{item.productIdNum,jdbcType=NUMERIC},
			#{item.wIdNum,jdbcType=NUMERIC},
			#{item.securityNum,jdbcType=NUMERIC},
			#{item.serial,jdbcType=VARCHAR},
			#{item.busiType,jdbcType=VARCHAR}
			)
			log errors into ERR$_MATERIAL('tag') reject limit unlimited
		

		SELECT 1 FROM DUAL
	
package com.haige.material.service;

import java.util.Date;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.google.common.collect.Lists;
import com.haige.material.dao.InOutProductDao;
import com.haige.material.entity.ImtOiScan;

/**
 * 关联表数据同步Service
 * @author linhaiy
 * @date 2019.06.26
 */
@Service
public class InOutProductService {

	private static Logger logger = LoggerFactory.getLogger(InOutProductService.class);

	@Autowired
	private InOutProductDao inOutProductDao;
    
	public void batchAddInOutProduct(List inOutProductList) {
		int batchCount = 1000;
		int batchLastIndex = batchCount - 1;
		logger.debug("同步关联表开始时间 :" + new Date());
		for (int index = 0; index < inOutProductList.size();) {
			if (batchLastIndex > inOutProductList.size() - 1) {
				List list = Lists.newArrayList();
				batchLastIndex = inOutProductList.size() - 1;
				list.addAll(inOutProductList.subList(index, batchLastIndex + 1));
				index = batchLastIndex + 1;
				inOutProductDao.insertInOutProduct(list);
			} else {
				List list1 = Lists.newArrayList();
				list1.addAll(inOutProductList.subList(index, batchLastIndex + 1));
				inOutProductDao.insertInOutProduct(list1);
				index = batchLastIndex + 1;
				batchLastIndex = index + (batchCount - 1);
			}
		}
		logger.debug("同步关联表开始时间 :" + new Date());
	}
}
/**
 * 关联表数据同步Dao层
 * @author linhaiy
 *
 */
public interface InOutProductDao {

	public void insertInOutProduct(List inOutProductList);
}



	
		a.ID as id,
		a.N_ID as inCode,
		a.W_ID as outCode,
		a.PRODUCT_ID
		as productId,
		a.N_NUMBER as inNum,
		a.W_NUMBER as outNum,
		a.CREATE_BY as
		createBy,
		a.CREATE_DATE as createDate,
		a.UPDATE_BY as updateBy,
		a.UPDATE_DATE as updateDate,
		a.REMARKS as remarks,
		a.STATUS as status,
		a.SECURITY as security,
		a.S_NUMBER as securityNum,
		a.P_NUMBER as
		productNum,
		a.BAOCAI_TYPE as baoCaiType
	

	

三.foreach用法介绍

1.foreach的主要用在构建in条件中,它可以在SQL语句中进行迭代一个集合

2.可以传递一个 List 实例或者数组作为参数对象传给 MyBatis。当你这么做的时候,MyBatis 会自动将它包装在一个 Map 中,用名称作为键。List 实例将会以“list”作为键,而数组实例将会以“array”作为键

3.foreach的collection能够支持的三种集合类型list,array以及map

4.foreach元素的属性主要有 item,index,collection,open,separator,close

  •     item:集合中每一个元素进行迭代时的别名
  •     index:指定一个名字,用于表示在迭代过程中,每次迭代到的位置
  •     collection:若传递的是集合,则为list;若传递的是数组,则为array;若传入的参数是多个的时候,我们就需要把它们封装成一个Map或者Object,则为传入参数map的key即就是参数名
  •     open:该语句以什么开始
  •     close:以什么结束
  •     separator:在每次进行迭代之间以什么符号作为分隔符('union all',',')

 

你可能感兴趣的:(定时任务调度,学习笔记)