[alibaba/easyexcel]读取 poi 生成的excel报错 com.alibaba.excel.exception.ExcelAnalysisException: org.xml.sax.SAXParseException; lineNumber: 44476; columnNumber: 81; Invalid byte 2 of 4-byte UTF-8 sequence.

2024-05-09 871 views
4

ZipOutputStream zipOutputStream = new ZipOutputStream(new FileOutputStream("E:/company/qiyi/bi2.0/temp1.zip")); zipOutputStream.putNextEntry(new ZipEntry("temp.csv"));

  ExcelReader reader = EasyExcel.read("E:/company/qiyi/bi2.0/Job_Seeker_V15.xlsx").build();
  List<ReadSheet> readSheets = reader.excelExecutor().sheetList();
  List<ReadSheet> results = Lists.newArrayList();

  if(!readSheets.isEmpty()){
      for(int i = 0; i < readSheets.size(); i++){
          ReadSheet init;
          if(0 == i){
              init = EasyExcel.readSheet(readSheets.get(i).getSheetNo()).registerReadListener(new NoModelDataExcelToCsvListener(zipOutputStream, true)).build();
          }else{
              init = EasyExcel.readSheet(readSheets.get(i).getSheetNo()).registerReadListener(new NoModelDataExcelToCsvListener(zipOutputStream, false)).build();
          }
          results.add(init);
      }
  }

  reader.read(results);
  reader.finish();

  zipOutputStream.closeEntry();
  zipOutputStream.close();
public class NoModelDataExcelToCsvListener extends AnalysisEventListener<Map<Integer, String>> {
    /**
     * 每隔5条存储数据库,实际使用中可以3000条,然后清理list ,方便内存回收
     */
    private static final int BATCH_COUNT = 100;
    List<Map<Integer, String>> list = new ArrayList<Map<Integer, String>>();
    private ZipOutputStream zipOutputStream;
    /**
     * 是否读取表头开关
     */
    private boolean headerOk;

    public NoModelDataExcelToCsvListener(){

    }

    public NoModelDataExcelToCsvListener(ZipOutputStream zipOutputStream, boolean headerOk){
        this.zipOutputStream = zipOutputStream;
        this.headerOk = headerOk;
    }

    @SneakyThrows
    @Override
    public void invoke(Map<Integer, String> data, AnalysisContext context) {
        log.info("解析到一条数据:id:{}, userId", data.get(0), data.get(1));
        list.add(data);
        if(list.size() >= BATCH_COUNT){
            saveData();
            list.clear();
        }
    }

    @Override
    public void doAfterAllAnalysed(AnalysisContext context) {
        saveData();
        log.info("====== 公共导出 excel 转 csv 完成! =====");
    }

    @Override
    public void onException(Exception exception, AnalysisContext context) {
        log.error("解析失败,但是继续解析下一行:{}", exception.getMessage());
        // 如果是某一个单元格的转换异常 能获取到具体行号
        // 如果要获取头的信息 配合invokeHeadMap使用
        if (exception instanceof ExcelAnalysisException) {
            ExcelDataConvertException excelDataConvertException = (ExcelDataConvertException) exception;
            log.error("第{}行,第{}列解析异常", excelDataConvertException.getRowIndex(),
                    excelDataConvertException.getColumnIndex());
        }
    }

    /**
     * 加上存储数据库
     */
    private void saveData() {
//        log.info("{}条数据,开始存储数据库!", list.size());

//        log.info("存储数据库成功!");
    }

    @Override
    public void invokeHeadMap(Map<Integer, String> headMap, AnalysisContext context) {
        log.info("head解析到一条数据:{}", JSON.toJSONString(headMap));
        if(headerOk){
            list.add(headMap);
        }else{
            log.info("=== 多个sheet,当前不需要表头信息 ====");
        }
    }
}

异常提示

com.alibaba.excel.exception.ExcelAnalysisException: org.xml.sax.SAXParseException; lineNumber: 44476; columnNumber: 81; Invalid byte 2 of 4-byte UTF-8 sequence.

at com.alibaba.excel.analysis.v07.XlsxSaxAnalyser.parseXmlSource(XlsxSaxAnalyser.java:183)
at com.alibaba.excel.analysis.v07.XlsxSaxAnalyser.execute(XlsxSaxAnalyser.java:201)
at com.alibaba.excel.analysis.ExcelAnalyserImpl.analysis(ExcelAnalyserImpl.java:115)
at com.alibaba.excel.ExcelReader.read(ExcelReader.java:182)
at com.qiyee.web.controller.common.dfdfdfdfd.dkehnt(dfdfdfdfd.java:89)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:483)
at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
at org.junit.runner.JUnitCore.run(JUnitCore.java:137)
at com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:68)
at com.intellij.rt.execution.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:47)
at com.intellij.rt.execution.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:242)
at com.intellij.rt.execution.junit.JUnitStarter.main(JUnitStarter.java:70)

Caused by: org.xml.sax.SAXParseException; lineNumber: 44476; columnNumber: 81; Invalid byte 2 of 4-byte UTF-8 sequence. at org.apache.xerces.parsers.AbstractSAXParser.parse(Unknown Source) at org.apache.xerces.jaxp.SAXParserImpl$JAXPSAXParser.parse(Unknown Source) at com.alibaba.excel.analysis.v07.XlsxSaxAnalyser.parseXmlSource(XlsxSaxAnalyser.java:178) ... 26 more Caused by: org.apache.xerces.impl.io.MalformedByteSequenceException: Invalid byte 2 of 4-byte UTF-8 sequence. at org.apache.xerces.impl.io.UTF8Reader.invalidByte(Unknown Source) at org.apache.xerces.impl.io.UTF8Reader.read(Unknown Source) at org.apache.xerces.impl.XMLEntityScanner.load(Unknown Source) at org.apache.xerces.impl.XMLEntityScanner.scanLiteral(Unknown Source) at org.apache.xerces.impl.XMLScanner.scanAttributeValue(Unknown Source) at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanAttribute(Unknown Source) at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanStartElement(Unknown Source) at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl$FragmentContentDispatcher.dispatch(Unknown Source) at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanDocument(Unknown Source) at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source) at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source) at org.apache.xerces.parsers.XMLParser.parse(Unknown Source) ... 29 more

使用wps 打开excel 找到报错的那一行数据,查看 并无特殊字符,查看excel zip解压后的文件 并无异常.

回答

5

将多余的sheet页删除后 单独跑有问题的sheet 就是ok的

1

提供下excel

1

我也想看这个问题!!

4

@chenzhongyu11 我把poi 生成excel 的 cellStyle 设置为文本(之前都是默认常规) 尝试了下发现 easyExcel 3.1.0 读取无异常了。 十分费解