ZipOutputStream zipOutputStream = new ZipOutputStream(new FileOutputStream("E:/company/qiyi/bi2.0/temp1.zip")); zipOutputStream.putNextEntry(new ZipEntry("temp.csv"));
ExcelReader reader = EasyExcel.read("E:/company/qiyi/bi2.0/Job_Seeker_V15.xlsx").build();
List<ReadSheet> readSheets = reader.excelExecutor().sheetList();
List<ReadSheet> results = Lists.newArrayList();
if(!readSheets.isEmpty()){
for(int i = 0; i < readSheets.size(); i++){
ReadSheet init;
if(0 == i){
init = EasyExcel.readSheet(readSheets.get(i).getSheetNo()).registerReadListener(new NoModelDataExcelToCsvListener(zipOutputStream, true)).build();
}else{
init = EasyExcel.readSheet(readSheets.get(i).getSheetNo()).registerReadListener(new NoModelDataExcelToCsvListener(zipOutputStream, false)).build();
}
results.add(init);
}
}
reader.read(results);
reader.finish();
zipOutputStream.closeEntry();
zipOutputStream.close();
public class NoModelDataExcelToCsvListener extends AnalysisEventListener<Map<Integer, String>> {
/**
* 每隔5条存储数据库,实际使用中可以3000条,然后清理list ,方便内存回收
*/
private static final int BATCH_COUNT = 100;
List<Map<Integer, String>> list = new ArrayList<Map<Integer, String>>();
private ZipOutputStream zipOutputStream;
/**
* 是否读取表头开关
*/
private boolean headerOk;
public NoModelDataExcelToCsvListener(){
}
public NoModelDataExcelToCsvListener(ZipOutputStream zipOutputStream, boolean headerOk){
this.zipOutputStream = zipOutputStream;
this.headerOk = headerOk;
}
@SneakyThrows
@Override
public void invoke(Map<Integer, String> data, AnalysisContext context) {
log.info("解析到一条数据:id:{}, userId", data.get(0), data.get(1));
list.add(data);
if(list.size() >= BATCH_COUNT){
saveData();
list.clear();
}
}
@Override
public void doAfterAllAnalysed(AnalysisContext context) {
saveData();
log.info("====== 公共导出 excel 转 csv 完成! =====");
}
@Override
public void onException(Exception exception, AnalysisContext context) {
log.error("解析失败,但是继续解析下一行:{}", exception.getMessage());
// 如果是某一个单元格的转换异常 能获取到具体行号
// 如果要获取头的信息 配合invokeHeadMap使用
if (exception instanceof ExcelAnalysisException) {
ExcelDataConvertException excelDataConvertException = (ExcelDataConvertException) exception;
log.error("第{}行,第{}列解析异常", excelDataConvertException.getRowIndex(),
excelDataConvertException.getColumnIndex());
}
}
/**
* 加上存储数据库
*/
private void saveData() {
// log.info("{}条数据,开始存储数据库!", list.size());
// log.info("存储数据库成功!");
}
@Override
public void invokeHeadMap(Map<Integer, String> headMap, AnalysisContext context) {
log.info("head解析到一条数据:{}", JSON.toJSONString(headMap));
if(headerOk){
list.add(headMap);
}else{
log.info("=== 多个sheet,当前不需要表头信息 ====");
}
}
}
异常提示
com.alibaba.excel.exception.ExcelAnalysisException: org.xml.sax.SAXParseException; lineNumber: 44476; columnNumber: 81; Invalid byte 2 of 4-byte UTF-8 sequence.
at com.alibaba.excel.analysis.v07.XlsxSaxAnalyser.parseXmlSource(XlsxSaxAnalyser.java:183)
at com.alibaba.excel.analysis.v07.XlsxSaxAnalyser.execute(XlsxSaxAnalyser.java:201)
at com.alibaba.excel.analysis.ExcelAnalyserImpl.analysis(ExcelAnalyserImpl.java:115)
at com.alibaba.excel.ExcelReader.read(ExcelReader.java:182)
at com.qiyee.web.controller.common.dfdfdfdfd.dkehnt(dfdfdfdfd.java:89)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:483)
at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
at org.junit.runner.JUnitCore.run(JUnitCore.java:137)
at com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:68)
at com.intellij.rt.execution.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:47)
at com.intellij.rt.execution.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:242)
at com.intellij.rt.execution.junit.JUnitStarter.main(JUnitStarter.java:70)
Caused by: org.xml.sax.SAXParseException; lineNumber: 44476; columnNumber: 81; Invalid byte 2 of 4-byte UTF-8 sequence. at org.apache.xerces.parsers.AbstractSAXParser.parse(Unknown Source) at org.apache.xerces.jaxp.SAXParserImpl$JAXPSAXParser.parse(Unknown Source) at com.alibaba.excel.analysis.v07.XlsxSaxAnalyser.parseXmlSource(XlsxSaxAnalyser.java:178) ... 26 more Caused by: org.apache.xerces.impl.io.MalformedByteSequenceException: Invalid byte 2 of 4-byte UTF-8 sequence. at org.apache.xerces.impl.io.UTF8Reader.invalidByte(Unknown Source) at org.apache.xerces.impl.io.UTF8Reader.read(Unknown Source) at org.apache.xerces.impl.XMLEntityScanner.load(Unknown Source) at org.apache.xerces.impl.XMLEntityScanner.scanLiteral(Unknown Source) at org.apache.xerces.impl.XMLScanner.scanAttributeValue(Unknown Source) at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanAttribute(Unknown Source) at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanStartElement(Unknown Source) at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl$FragmentContentDispatcher.dispatch(Unknown Source) at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanDocument(Unknown Source) at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source) at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source) at org.apache.xerces.parsers.XMLParser.parse(Unknown Source) ... 29 more
使用wps 打开excel 找到报错的那一行数据,查看 并无特殊字符,查看excel zip解压后的文件 并无异常.