解决大批量数据导出Excel产生内存溢出的方案

r囧r小猫 2022-08-13 14:58 287阅读 0赞

解决大批量数据导出Excel产生内存溢出的方案

POI或者JXL在导出大量数据的时候,由于它们将每一个单元格生都成一个Cell对象,所以很容易导致内存溢出。解决这个问题,唯一的办法是弄清楚Excel的二进制格式(汗),并且用流的方式读写Excel。POI和JXL其实提供了二进制方式读写Excel的API,只是因为缺少文档和实例,所以使用的人不多。我编写了这个简单的合并Excel的类,它只适合合并结构相同的多个Excel文件。好在这个功能已经可以解决数据导出产生OOM的问题:将数据分批导出然后合并。
下面的代码使用POI3.1,合并11个3000多行的文档用时约6秒,我实在找不到更多的测试用的文档了。

Java代码 复制代码

  1. @SuppressWarnings(“unchecked”)
  2. public class XlsMergeUtil {
  3. private static Logger logger = LoggerFactory.getLogger(XlsMergeUtil.class);
  4. /**
  5. * 将多个Xls文件合并为一个,适用于只有一个sheet,并且格式相同的文档
  6. * @param inputs 输入的Xls文件
  7. * @param out 输出文件
  8. */
  9. public static void merge(InputStream[] inputs, OutputStream out) {
  10. if (inputs == null || inputs.length <= 1) {
  11. throw new IllegalArgumentException(“没有传入输入流数组,或只有一个输入流.”);
  12. }
  13. List rootRecords = getRecords(inputs[0]);
  14. Workbook workbook = Workbook.createWorkbook(rootRecords);
  15. List sheets = getSheets(workbook, rootRecords);
  16. if(sheets == null || sheets.size() == 0) {
  17. throw new IllegalArgumentException(“第一篇文档的格式错误,必须有至少一个sheet”);
  18. }
  19. //以第一篇文档的最后一个sheet为根,以后的数据都追加在这个sheet后面
  20. Sheet rootSheet = sheets.get(sheets.size() - 1);
  21. int rootRows = getRowsOfSheet(rootSheet); //记录第一篇文档的行数,以后的行数在此基础上增加
  22. rootSheet.setLoc(rootSheet.getDimsLoc());
  23. Map map = new HashMap(10000);
  24. for (int i = 1; i < inputs.length; i++) { //从第二篇开始遍历
  25. List records = getRecords(inputs[i]);
  26. int rowsOfCurXls = 0;
  27. //遍历当前文档的每一个record
  28. for (Iterator itr = records.iterator(); itr.hasNext();) {
  29. Record record = (Record) itr.next();
  30. if (record.getSid() == RowRecord.sid) { //如果是RowRecord
  31. RowRecord rowRecord = (RowRecord) record;
  32. //调整行号
  33. rowRecord.setRowNumber(rootRows + rowRecord.getRowNumber());
  34. rootSheet.addRow(rowRecord); //追加Row
  35. rowsOfCurXls++; //记录当前文档的行数
  36. }
  37. //SST记录,SST保存xls文件中唯一的String,各个String都是对应着SST记录的索引
  38. else if (record.getSid() == SSTRecord.sid) {
  39. SSTRecord sstRecord = (SSTRecord) record;
  40. for (int j = 0; j < sstRecord.getNumUniqueStrings(); j++) {
  41. int index = workbook.addSSTString(sstRecord.getString(j));
  42. //记录原来的索引和现在的索引的对应关系
  43. map.put(Integer.valueOf(j), Integer.valueOf(index));
  44. }
  45. } else if (record.getSid() == LabelSSTRecord.sid) {
  46. LabelSSTRecord label = (LabelSSTRecord) record;
  47. //调整SST索引的对应关系
  48. label.setSSTIndex(map.get(Integer.valueOf(label.getSSTIndex())));
  49. }
  50. //追加ValueCell
  51. if (record instanceof CellValueRecordInterface) {
  52. CellValueRecordInterface cell = (CellValueRecordInterface) record;
  53. int cellRow = cell.getRow() + rootRows;
  54. cell.setRow(cellRow);
  55. rootSheet.addValueRecord(cellRow, cell);
  56. }
  57. }
  58. rootRows += rowsOfCurXls;
  59. }
  60. byte[] data = getBytes(workbook, sheets.toArray(new Sheet[0]));
  61. write(out, data);
  62. }
  63. static void write(OutputStream out, byte[] data) {
  64. POIFSFileSystem fs = new POIFSFileSystem();
  65. // Write out the Workbook stream
  66. try {
  67. fs.createDocument(new ByteArrayInputStream(data), “Workbook”);
  68. fs.writeFilesystem(out);
  69. out.flush();
  70. } catch (IOException e) {
  71. e.printStackTrace();
  72. } finally {
  73. try {
  74. out.close();
  75. } catch (IOException e) {
  76. e.printStackTrace();
  77. }
  78. }
  79. }
  80. static List getSheets(Workbook workbook, List records) {
  81. int recOffset = workbook.getNumRecords();
  82. int sheetNum = 0;
  83. // convert all LabelRecord records to LabelSSTRecord
  84. convertLabelRecords(records, recOffset, workbook);
  85. List sheets = new ArrayList();
  86. while (recOffset < records.size()) {
  87. Sheet sh = Sheet.createSheet(records, sheetNum++, recOffset);
  88. recOffset = sh.getEofLoc() + 1;
  89. if (recOffset == 1) {
  90. break;
  91. }
  92. sheets.add(sh);
  93. }
  94. return sheets;
  95. }
  96. static int getRows(List records) {
  97. int row = 0;
  98. for (Iterator itr = records.iterator(); itr.hasNext();) {
  99. Record record = (Record) itr.next();
  100. if (record.getSid() == RowRecord.sid) {
  101. row++;
  102. }
  103. }
  104. return row;
  105. }
  106. static int getRowsOfSheet(Sheet sheet) {
  107. int rows = 0;
  108. sheet.setLoc(0);
  109. while(sheet.getNextRow() != null) {
  110. rows++;
  111. }
  112. return rows;
  113. }
  114. @SuppressWarnings(“deprecation”)
  115. static List getRecords(InputStream input) {
  116. try {
  117. POIFSFileSystem poifs = new POIFSFileSystem(input);
  118. InputStream stream = poifs.getRoot().createDocumentInputStream(“Workbook”);
  119. return org.apache.poi.hssf.record.RecordFactory.createRecords(stream);
  120. } catch (IOException e) {
  121. logger.error(“IO异常:{}“, e.getMessage());
  122. e.printStackTrace();
  123. }
  124. return Collections.EMPTY_LIST;
  125. }
  126. static void convertLabelRecords(List records, int offset, Workbook workbook) {
  127. for (int k = offset; k < records.size(); k++) {
  128. Record rec = (Record) records.get(k);
  129. if (rec.getSid() == LabelRecord.sid) {
  130. LabelRecord oldrec = (LabelRecord) rec;
  131. records.remove(k);
  132. LabelSSTRecord newrec = new LabelSSTRecord();
  133. int stringid = workbook.addSSTString(new UnicodeString(oldrec.getValue()));
  134. newrec.setRow(oldrec.getRow());
  135. newrec.setColumn(oldrec.getColumn());
  136. newrec.setXFIndex(oldrec.getXFIndex());
  137. newrec.setSSTIndex(stringid);
  138. records.add(k, newrec);
  139. }
  140. }
  141. }
  142. public static byte[] getBytes(Workbook workbook, Sheet[] sheets) {
  143. // HSSFSheet[] sheets = getSheets();
  144. int nSheets = sheets.length;
  145. // before getting the workbook size we must tell the sheets that
  146. // serialization is about to occur.
  147. for (int i = 0; i < nSheets; i++) {
  148. sheets[i].preSerialize();
  149. }
  150. int totalsize = workbook.getSize();
  151. // pre-calculate all the sheet sizes and set BOF indexes
  152. int[] estimatedSheetSizes = new int[nSheets];
  153. for (int k = 0; k < nSheets; k++) {
  154. workbook.setSheetBof(k, totalsize);
  155. int sheetSize = sheets[k].getSize();
  156. estimatedSheetSizes[k] = sheetSize;
  157. totalsize += sheetSize;
  158. }
  159. byte[] retval = new byte[totalsize];
  160. int pos = workbook.serialize(0, retval);
  161. for (int k = 0; k < nSheets; k++) {
  162. int serializedSize = sheets[k].serialize(pos, retval);
  163. if (serializedSize != estimatedSheetSizes[k]) {
  164. throw new IllegalStateException(“Actual serialized sheet size (“ + serializedSize
    • “) differs from pre-calculated size (“ + estimatedSheetSizes[k] + “) for sheet (“ + k
    • “)”);
  165. Sheet.serializeIndexRecord() does not
  166. }
  167. pos += serializedSize;
  168. }
  169. return retval;
  170. }
  171. public static void main(String[] args) throws Exception {
  172. final String PATH = “E://projects//java//ws_0//export//data//“;
  173. InputStream[] inputs = new InputStream[10];
  174. inputs[0] = new java.io.FileInputStream(PATH + “07_10.xls”);
  175. for(int i = 1; i <= 9; i++) {
  176. inputs[i] = new java.io.FileInputStream(PATH + “07_0” + i + “.xls”);
  177. }
  178. OutputStream out = new FileOutputStream(PATH + “xx.xls”);
  179. long t1 = System.currentTimeMillis();
  180. merge(inputs, out);
  181. System.out.println(System.currentTimeMillis() - t1);//简陋的测试一下时间
  182. }
  183. }

leasass 写道

讲讲我的解决方法,
一般导出的Excel用来做报表或者统计用的,也不要求图片或其它对象,就是数据,
我的做法是写文本格式的Excel文件,而不是用POI等生成二进制的文件,
第一种格式,CSV,最简单的,格式最差,最基本的行列,不能合并,不能设置着色,
第二种,HTML格式的,如:”

….
“这样的文本,后辍名改为XLS就可以了,可以设置跨行列的合并,可以着色,图片没试过,估计是可以的,还可以设置单元格对齐,单元格的格式等,

写文本的时候,根本不用担心 OOM的问题,我最大写过 500多M的一个excel文件,不过这已经没有意义了,excel一个Sheet最大6万多行,多了也显示不出来.

你的办法我也用过,的确没有溢出的问题,不过客户不同意的。我们现在的这个的客户很各色,以前其他的客户就可以用你的办法搞定。比如客户要求一次导出20w行数据,这就要求导出的文件必须带sheet,上面的代码也不支持sheet,下面的就可以了:

Java代码 复制代码

  1. @SuppressWarnings(“unchecked”)
  2. public class XlsMergeUtil {
  3. private static Logger logger = LoggerFactory.getLogger(XlsMergeUtil.class);
  4. /**
  5. * 将多个Xls文件合并为一个,适用于只有一个sheet,并且格式相同的文档
  6. * @param inputs 输入的Xls文件,第一个XLS文件必须给出足够sheet空间
  7. * 例如,总共200000行数据,第一个文件至少3个空白sheet
  8. * @param out 输出文件
  9. */
  10. public static void merge(InputStream[] inputs, OutputStream out) {
  11. if (inputs == null || inputs.length <= 1) {
  12. throw new IllegalArgumentException(“没有传入输入流数组,或只有一个输入流.”);
  13. }
  14. List rootRecords = getRecords(inputs[0]);
  15. Workbook workbook = Workbook.createWorkbook(rootRecords);
  16. List sheets = getSheets(workbook, rootRecords);
  17. if(sheets == null || sheets.size() == 0) {
  18. throw new IllegalArgumentException(“第一篇文档的格式错误,必须有至少一个sheet”);
  19. }
  20. //以第一篇文档的第一个sheet为根,以后的数据都追加在这个sheet后面
  21. Sheet rootSheet = sheets.get(0);
  22. int rootRows = getRowsOfSheet(rootSheet); //记录第一篇文档的行数,以后的行数在此基础上增加
  23. rootSheet.setLoc(rootSheet.getDimsLoc());
  24. Map map = new HashMap(10000);
  25. int sheetIndex = 0;
  26. for (int i = 1; i < inputs.length; i++) { //从第二篇开始遍历
  27. List records = getRecords(inputs[i]);
  28. //达到最大行数限制,换一个sheet
  29. if(getRows(records) + rootRows >= RowRecord.MAX_ROW_NUMBER) {
  30. if((++sheetIndex) > (sheets.size() - 1)) {
  31. logger.warn(“第一个文档给出的sheets小于需要的数量,部分数据未能合并.”);
  32. break;
  33. }
  34. rootSheet = sheets.get(sheetIndex);
  35. rootRows = getRowsOfSheet(rootSheet);
  36. rootSheet.setLoc(rootSheet.getDimsLoc());
  37. logger.debug(“切换Sheet{}“, sheetIndex);
  38. }
  39. int rowsOfCurXls = 0;
  40. //遍历当前文档的每一个record
  41. for (Iterator itr = records.iterator(); itr.hasNext();) {
  42. Record record = (Record) itr.next();
  43. if (record.getSid() == RowRecord.sid) { //如果是RowRecord
  44. RowRecord rowRecord = (RowRecord) record;
  45. //调整行号
  46. rowRecord.setRowNumber(rootRows + rowRecord.getRowNumber());
  47. rootSheet.addRow(rowRecord); //追加Row
  48. rowsOfCurXls++; //记录当前文档的行数
  49. }
  50. //SST记录,SST保存xls文件中唯一的String,各个String都是对应着SST记录的索引
  51. else if (record.getSid() == SSTRecord.sid) {
  52. SSTRecord sstRecord = (SSTRecord) record;
  53. for (int j = 0; j < sstRecord.getNumUniqueStrings(); j++) {
  54. int index = workbook.addSSTString(sstRecord.getString(j));
  55. //记录原来的索引和现在的索引的对应关系
  56. map.put(Integer.valueOf(j), Integer.valueOf(index));
  57. }
  58. } else if (record.getSid() == LabelSSTRecord.sid) {
  59. LabelSSTRecord label = (LabelSSTRecord) record;
  60. //调整SST索引的对应关系
  61. label.setSSTIndex(map.get(Integer.valueOf(label.getSSTIndex())));
  62. }
  63. //追加ValueCell
  64. if (record instanceof CellValueRecordInterface) {
  65. CellValueRecordInterface cell = (CellValueRecordInterface) record;
  66. int cellRow = cell.getRow() + rootRows;
  67. cell.setRow(cellRow);
  68. rootSheet.addValueRecord(cellRow, cell);
  69. }
  70. }
  71. rootRows += rowsOfCurXls;
  72. }
  73. byte[] data = getBytes(workbook, sheets.toArray(new Sheet[0]));
  74. write(out, data);
  75. }
  76. static void write(OutputStream out, byte[] data) {
  77. POIFSFileSystem fs = new POIFSFileSystem();
  78. // Write out the Workbook stream
  79. try {
  80. fs.createDocument(new ByteArrayInputStream(data), “Workbook”);
  81. fs.writeFilesystem(out);
  82. out.flush();
  83. } catch (IOException e) {
  84. e.printStackTrace();
  85. } finally {
  86. try {
  87. out.close();
  88. } catch (IOException e) {
  89. e.printStackTrace();
  90. }
  91. }
  92. }
  93. static List getSheets(Workbook workbook, List records) {
  94. int recOffset = workbook.getNumRecords();
  95. int sheetNum = 0;
  96. // convert all LabelRecord records to LabelSSTRecord
  97. convertLabelRecords(records, recOffset, workbook);
  98. List sheets = new ArrayList();
  99. while (recOffset < records.size()) {
  100. Sheet sh = Sheet.createSheet(records, sheetNum++, recOffset);
  101. recOffset = sh.getEofLoc() + 1;
  102. if (recOffset == 1) {
  103. break;
  104. }
  105. sheets.add(sh);
  106. }
  107. return sheets;
  108. }
  109. static int getRows(List records) {
  110. int row = 0;
  111. for (Iterator itr = records.iterator(); itr.hasNext();) {
  112. Record record = (Record) itr.next();
  113. if (record.getSid() == RowRecord.sid) {
  114. row++;
  115. }
  116. }
  117. return row;
  118. }
  119. static int getRowsOfSheet(Sheet sheet) {
  120. int rows = 0;
  121. sheet.setLoc(0);
  122. while(sheet.getNextRow() != null) {
  123. rows++;
  124. }
  125. return rows;
  126. }
  127. @SuppressWarnings(“deprecation”)
  128. static List getRecords(InputStream input) {
  129. try {
  130. POIFSFileSystem poifs = new POIFSFileSystem(input);
  131. InputStream stream = poifs.getRoot().createDocumentInputStream(“Workbook”);
  132. return org.apache.poi.hssf.record.RecordFactory.createRecords(stream);
  133. } catch (IOException e) {
  134. logger.error(“IO异常:{}“, e.getMessage());
  135. e.printStackTrace();
  136. }
  137. return Collections.EMPTY_LIST;
  138. }
  139. static void convertLabelRecords(List records, int offset, Workbook workbook) {
  140. for (int k = offset; k < records.size(); k++) {
  141. Record rec = (Record) records.get(k);
  142. if (rec.getSid() == LabelRecord.sid) {
  143. LabelRecord oldrec = (LabelRecord) rec;
  144. records.remove(k);
  145. LabelSSTRecord newrec = new LabelSSTRecord();
  146. int stringid = workbook.addSSTString(new UnicodeString(oldrec.getValue()));
  147. newrec.setRow(oldrec.getRow());
  148. newrec.setColumn(oldrec.getColumn());
  149. newrec.setXFIndex(oldrec.getXFIndex());
  150. newrec.setSSTIndex(stringid);
  151. records.add(k, newrec);
  152. }
  153. }
  154. }
  155. public static byte[] getBytes(Workbook workbook, Sheet[] sheets) {
  156. // HSSFSheet[] sheets = getSheets();
  157. int nSheets = sheets.length;
  158. // before getting the workbook size we must tell the sheets that
  159. // serialization is about to occur.
  160. for (int i = 0; i < nSheets; i++) {
  161. sheets[i].preSerialize();
  162. }
  163. int totalsize = workbook.getSize();
  164. // pre-calculate all the sheet sizes and set BOF indexes
  165. int[] estimatedSheetSizes = new int[nSheets];
  166. for (int k = 0; k < nSheets; k++) {
  167. workbook.setSheetBof(k, totalsize);
  168. int sheetSize = sheets[k].getSize();
  169. estimatedSheetSizes[k] = sheetSize;
  170. totalsize += sheetSize;
  171. }
  172. logger.debug(“分配内存{}bytes”, totalsize);
  173. byte[] retval = new byte[totalsize];
  174. int pos = workbook.serialize(0, retval);
  175. for (int k = 0; k < nSheets; k++) {
  176. int serializedSize = sheets[k].serialize(pos, retval);
  177. if (serializedSize != estimatedSheetSizes[k]) {
  178. // Wrong offset values have been passed in the call to setSheetBof() above.
  179. // For books with more than one sheet, this discrepancy would cause excel
  180. // to report errors and loose data while reading the workbook
  181. throw new IllegalStateException(“Actual serialized sheet size (“ + serializedSize
    • “) differs from pre-calculated size (“ + estimatedSheetSizes[k] + “) for sheet (“ + k
    • “)”);
  182. // TODO - add similar sanity check to ensure that Sheet.serializeIndexRecord() does not
  183. // write mis-aligned offsets either
  184. }
  185. pos += serializedSize;
  186. }
  187. return retval;
  188. }
  189. public static void main(String[] args) throws Exception {
  190. final String PATH = “E://projects//java//ws_0//export//data//“;
  191. InputStream[] inputs = new InputStream[25];
  192. inputs[0] = new java.io.FileInputStream(PATH + “07_10.xls”);
  193. for(int i = 1; i < 25 ; i++) {
  194. inputs[i] = new java.io.FileInputStream(PATH + “07_01.xls”);
  195. }
  196. OutputStream out = new FileOutputStream(PATH + “xx.xls”);
  197. long t1 = System.currentTimeMillis();
  198. merge(inputs, out);
  199. System.out.println(System.currentTimeMillis() - t1);
  200. }
  201. }

转自:http://www.javaeye.com/topic/240053

发表评论

表情:
评论列表 (有 0 条评论,287人围观)

还没有评论,来说两句吧...

相关阅读