mirror of
https://github.com/jmcnamara/libxlsxwriter.git
synced 2026-05-15 14:15:54 -06:00
Added constant_memory mode.
This commit is contained in:
parent
afa9305893
commit
63f1419b76
18 changed files with 194 additions and 39 deletions
|
|
@ -23,6 +23,7 @@ my @examples = (
|
|||
[ 'dates_and_times02.c', 'Writing dates and times with datetime' ],
|
||||
[ 'dates_and_times03.c', 'Dates and times with different formats' ],
|
||||
[ 'utf8.c', 'A example of some UTF-8 text' ],
|
||||
[ 'constant_memory.c', 'Write a large file with constant memory usage' ],
|
||||
);
|
||||
|
||||
# Convert the array refs to a hash for lookups.
|
||||
|
|
|
|||
BIN
docs/images/constant_memory.png
Normal file
BIN
docs/images/constant_memory.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 72 KiB |
|
|
@ -55,8 +55,18 @@ If required, it is also possible to specify the calculated result of the
|
|||
formula using the worksheet_write_formula_num() function.
|
||||
|
||||
|
||||
# Reporting Bugs
|
||||
## Strings aren't displayed in Apple Numbers in 'constant_memory' mode
|
||||
|
||||
In new_workbook_opt `constant_memory` mode libxlsxwriter uses an optimisation
|
||||
where cell strings aren't stored in an Excel structure call "shared strings"
|
||||
and instead are written "in-line".
|
||||
|
||||
This is a documented Excel feature that is supported by most spreadsheet
|
||||
applications. One known exception is Apple Numbers for Mac where the string
|
||||
data isn't displayed.
|
||||
|
||||
|
||||
# Reporting Bugs
|
||||
|
||||
Here are some tips on reporting bugs in `libxlsxwriter`.
|
||||
|
||||
|
|
|
|||
|
|
@ -89,7 +89,14 @@ Next example: @ref utf8.c
|
|||
@example utf8.c
|
||||
A simple Unicode UTF-8 example. Note, the source file is UTF-8 encoded.
|
||||
|
||||
|
||||
Next example: @ref constant_memory.c
|
||||
@image html utf8.png
|
||||
|
||||
@example constant_memory.c
|
||||
Example of using libxlsxwriter for writing large files in constant memory
|
||||
mode.
|
||||
|
||||
@image html constant_memory.png
|
||||
|
||||
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -1,16 +1,73 @@
|
|||
/**
|
||||
@page working_with_memory Working with Memory and Performance
|
||||
|
||||
Constant Memory Mode
|
||||
--------------------
|
||||
|
||||
By default `libxlsxwriter` holds all cell data in memory. This is to allow
|
||||
future features where formatting is applied separately from the data.
|
||||
By default libxlsxwriter holds all cell data in memory. This is to allow
|
||||
non-sequential data storage and also to allow future features where formatting
|
||||
is applied separately from the data.
|
||||
|
||||
The effect of this is that for large files libxlsxwriter can consume a lot of
|
||||
memory.
|
||||
|
||||
This issue will be resolved in the next 1-2 releases to allow a small constant
|
||||
memory usage like in the Perl and Python modules that libxlsxwriter was ported
|
||||
from.
|
||||
Fortunately, this memory usage can be reduced almost completely by using
|
||||
new_workbook_opt() and the lxw_workbook_options `constant_memory` property:
|
||||
|
||||
@dontinclude constant_memory.c
|
||||
@skipline include
|
||||
@until return
|
||||
@skipline }
|
||||
|
||||
This optimisation works by flushing each row after a subsequent row is written.
|
||||
In this way the largest amount of data held in memory for a worksheet is the
|
||||
amount of data required to hold a single row of data.
|
||||
|
||||
If required, this memory usage can be reduced even more by setting
|
||||
`LXW_COL_MAX` in worksheet.c from 16384 down to a value that matches the
|
||||
maximum column that is likely to be encountered.
|
||||
|
||||
The trade-off when using `constant_memory` mode is that data must be added
|
||||
sequentiallly in row order and you won't be able to take advantage of any new
|
||||
features that manipulate cell data after it is written. Currently the only
|
||||
such feature is worksheet_set_row() which in `constant_memory` mode can only
|
||||
be used when writing data to the current row.
|
||||
|
||||
Row Column Order
|
||||
----------------
|
||||
|
||||
Since each new row flushes the previous row, data must be written in sequential
|
||||
row order when `constant_memory` mode is on:
|
||||
|
||||
@code
|
||||
lxw_workbook *workbook = new_workbook_opt("constant_memory.xlsx", &options);
|
||||
...
|
||||
|
||||
// !! Don't use "column x row" order in 'constant_memory' mode. Only
|
||||
// the first column of data will be written.
|
||||
for (col = 0; col < max_col; col++) {
|
||||
for (row = 0; row < max_row; row++) {
|
||||
worksheet_write_number(worksheet, row, col, 123.45, NULL);
|
||||
}
|
||||
-*- mode: }
|
||||
@endcode
|
||||
|
||||
Inline strings
|
||||
--------------
|
||||
|
||||
Another optimisation that is used to reduce memory usage in `constant_memory`
|
||||
mode is that cell strings aren't stored in an Excel structure call "shared
|
||||
strings" and instead are written "in-line". This is a documented Excel feature
|
||||
that is supported by most spreadsheet applications. One known exception is
|
||||
Apple Numbers for Mac where the string data isn't displayed.
|
||||
|
||||
Performance
|
||||
-----------
|
||||
|
||||
Currently the library isn't optimised. There are some known bottlenecks that
|
||||
will be reduced in upcoming versions. Also, the library is currently single
|
||||
threaded.
|
||||
|
||||
|
||||
Next: @ref examples
|
||||
|
||||
|
|
|
|||
33
examples/constant_memory.c
Normal file
33
examples/constant_memory.c
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Example of using libxlsxwriter for writing large files in constant memory
|
||||
* mode.
|
||||
*
|
||||
* Copyright 2014, John McNamara, jmcnamara@cpan.org
|
||||
*
|
||||
*/
|
||||
|
||||
#include "xlsxwriter.h"
|
||||
|
||||
int main() {
|
||||
|
||||
lxw_row_t row;
|
||||
lxw_col_t col;
|
||||
lxw_row_t max_row = 1000;
|
||||
lxw_col_t max_col = 50;
|
||||
|
||||
/* Set the worksheet options. */
|
||||
lxw_workbook_options options;
|
||||
options.constant_memory = 1;
|
||||
|
||||
/* Create a new workbook with options. */
|
||||
lxw_workbook *workbook = new_workbook_opt("constant_memory.xlsx", &options);
|
||||
lxw_worksheet *worksheet = workbook_add_worksheet(workbook, NULL);
|
||||
|
||||
for (row = 0; row < max_row; row++) {
|
||||
for (col = 0; col < max_col; col++) {
|
||||
worksheet_write_number(worksheet, row, col, 123.45, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
return workbook_close(workbook);
|
||||
}
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* libxlsxwriter
|
||||
*
|
||||
*
|
||||
* Copyright 2014, John McNamara, jmcnamara@cpan.org. See LICENSE.txt.
|
||||
*/
|
||||
|
||||
|
|
@ -12,7 +12,7 @@
|
|||
* represents the Excel file as it is written on disk.
|
||||
*
|
||||
* See @ref workbook.h for full details of the functionality.
|
||||
*
|
||||
*
|
||||
* @file workbook.h
|
||||
*
|
||||
* @brief Functions related to creating an Excel xlsx workbook.
|
||||
|
|
@ -64,6 +64,21 @@ enum lxw_close_error {
|
|||
/* TODO. Need to add/document more. */
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Workbook options.
|
||||
*
|
||||
* Optional parameters when creating a new Workbool object via
|
||||
* new_workbook_opt().
|
||||
*
|
||||
* Currently only the `constant_memory` property is supported:
|
||||
*
|
||||
* * `constant_memory`
|
||||
*/
|
||||
typedef struct lxw_workbook_options {
|
||||
/** Optimise the workbook to use constant memory for worksheets */
|
||||
uint8_t constant_memory;
|
||||
} lxw_workbook_options;
|
||||
|
||||
/**
|
||||
* @brief Struct to represent an Excel workbook.
|
||||
*
|
||||
|
|
@ -79,6 +94,7 @@ typedef struct lxw_workbook {
|
|||
lxw_sst *sst;
|
||||
lxw_doc_properties *properties;
|
||||
const char *filename;
|
||||
lxw_workbook_options options;
|
||||
|
||||
uint16_t num_sheets;
|
||||
uint16_t first_sheet;
|
||||
|
|
@ -122,6 +138,33 @@ extern "C" {
|
|||
*/
|
||||
lxw_workbook *new_workbook(const char *filename);
|
||||
|
||||
/**
|
||||
* @brief Create a new workbook object, and set the workbook options.
|
||||
*
|
||||
* @param filename The name of the new Excel file to create.
|
||||
* @param options Workbook options.
|
||||
*
|
||||
* @return A lxw_workbook instance.
|
||||
*
|
||||
* This method is the same as the `new_workbook()` constructor but allows
|
||||
* additional options to be set.
|
||||
*
|
||||
* @code
|
||||
* lxw_workbook_options options = {.constant_memory = 1};
|
||||
*
|
||||
* lxw_workbook *workbook = new_workbook_opt("filename.xlsx", &options);
|
||||
* @endcode
|
||||
*
|
||||
* Note, in this mode a row of data is written and then discarded when a cell
|
||||
* in a new row is added via one of the worksheet `worksheet_write_*()`
|
||||
* methods. Therefore, once this mode is active, data should be written in
|
||||
* sequential row order.
|
||||
*
|
||||
* See @ref working_with_memory for more details.
|
||||
*
|
||||
*/
|
||||
lxw_workbook *new_workbook_opt(const char *filename, lxw_workbook_options *options);
|
||||
|
||||
/**
|
||||
* @brief Add a new worksheet to a workbook:
|
||||
*
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ INC_DIR = ../include
|
|||
MINIZIP_DIR = ../third_party/minizip
|
||||
|
||||
# Flags passed to compiler.
|
||||
CFLAGS += -g -O0 -Wall -Wextra -pedantic -ansi
|
||||
CFLAGS += -g -O3 -Wall -Wextra -pedantic -ansi
|
||||
|
||||
# Library names.
|
||||
LIBXLSXWRITER_A = libxlsxwriter.a
|
||||
|
|
@ -81,7 +81,7 @@ test_compile : $(OBJS)
|
|||
$(Q)$(CC) -fPIC -I$(INC_DIR) $(CFLAGS) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
%.to : %.c $(HDRS)
|
||||
$(Q)$(CC) -DTESTING -I$(INC_DIR) $(CFLAGS) $(CXXFLAGS) -c $< -o $@
|
||||
$(Q)$(CC) -g -O0 -DTESTING -I$(INC_DIR) $(CFLAGS) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
|
||||
# Clean up any temp/build files.
|
||||
|
|
|
|||
|
|
@ -584,6 +584,15 @@ _workbook_assemble_xml_file(lxw_workbook *self)
|
|||
*/
|
||||
lxw_workbook *
|
||||
new_workbook(const char *filename)
|
||||
{
|
||||
return new_workbook_opt(filename, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a new workbook object with options.
|
||||
*/
|
||||
lxw_workbook *
|
||||
new_workbook_opt(const char *filename, lxw_workbook_options *options)
|
||||
{
|
||||
lxw_format *format;
|
||||
lxw_workbook *workbook;
|
||||
|
|
@ -622,6 +631,9 @@ new_workbook(const char *filename)
|
|||
/* Initialise its index. */
|
||||
_get_xf_index(format);
|
||||
|
||||
if (options)
|
||||
workbook->options.constant_memory = options->constant_memory;
|
||||
|
||||
return workbook;
|
||||
|
||||
mem_error:
|
||||
|
|
@ -658,7 +670,7 @@ workbook_add_worksheet(lxw_workbook *self, const char *sheetname)
|
|||
init_data.hidden = 0;
|
||||
init_data.index = self->num_sheets;
|
||||
init_data.sst = self->sst;
|
||||
init_data.optimize = self->optimize;
|
||||
init_data.optimize = self->options.constant_memory;
|
||||
|
||||
/* Create a new worksheet object. */
|
||||
worksheet = _new_worksheet(&init_data);
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
#define LXW_ROW_MAX 1048576
|
||||
#define LXW_COL_MAX 16384
|
||||
#define LXW_STR_MAX 32767
|
||||
#define BUFFER_SIZE 4096
|
||||
|
||||
/*
|
||||
* Forward declarations.
|
||||
|
|
@ -625,9 +626,8 @@ _worksheet_write_sheet_data(lxw_worksheet *self)
|
|||
STATIC void
|
||||
_worksheet_write_optimized_sheet_data(lxw_worksheet *self)
|
||||
{
|
||||
#define buffer_size 4096
|
||||
uint16_t read_size = 1;
|
||||
char buffer[buffer_size];
|
||||
char buffer[BUFFER_SIZE];
|
||||
|
||||
if (self->dim_rowmin == LXW_ROW_MAX) {
|
||||
/* If the dimensions aren"t defined then there is no data to write. */
|
||||
|
|
@ -642,7 +642,7 @@ _worksheet_write_optimized_sheet_data(lxw_worksheet *self)
|
|||
rewind(self->optimize_tmpfile);
|
||||
|
||||
while (read_size) {
|
||||
read_size = fread(buffer, 1, buffer_size, self->optimize_tmpfile);
|
||||
read_size = fread(buffer, 1, BUFFER_SIZE, self->optimize_tmpfile);
|
||||
fwrite(buffer, 1, read_size, self->file);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,10 +11,9 @@
|
|||
|
||||
int main() {
|
||||
|
||||
lxw_workbook *workbook = new_workbook("test_optimize01.xlsx");
|
||||
|
||||
workbook->optimize = 1;
|
||||
lxw_workbook_options options = {1};
|
||||
|
||||
lxw_workbook *workbook = new_workbook_opt("test_optimize01.xlsx", &options);
|
||||
lxw_worksheet *worksheet = workbook_add_worksheet(workbook, NULL);
|
||||
|
||||
worksheet_write_string(worksheet, 0, 0, "Hello", NULL);
|
||||
|
|
|
|||
|
|
@ -11,10 +11,9 @@
|
|||
|
||||
int main() {
|
||||
|
||||
lxw_workbook *workbook = new_workbook("test_optimize02.xlsx");
|
||||
|
||||
workbook->optimize = 1;
|
||||
lxw_workbook_options options = {1};
|
||||
|
||||
lxw_workbook *workbook = new_workbook_opt("test_optimize02.xlsx", &options);
|
||||
lxw_worksheet *worksheet = workbook_add_worksheet(workbook, NULL);
|
||||
|
||||
worksheet_write_string(worksheet, 0, 0, "Hello", NULL);
|
||||
|
|
|
|||
|
|
@ -11,10 +11,9 @@
|
|||
|
||||
int main() {
|
||||
|
||||
lxw_workbook *workbook = new_workbook("test_optimize21.xlsx");
|
||||
|
||||
workbook->optimize = 1;
|
||||
lxw_workbook_options options = {1};
|
||||
|
||||
lxw_workbook *workbook = new_workbook_opt("test_optimize21.xlsx", &options);
|
||||
lxw_worksheet *worksheet = workbook_add_worksheet(workbook, NULL);
|
||||
|
||||
worksheet_write_string(worksheet, CELL("A1"), "Foo", NULL);
|
||||
|
|
|
|||
|
|
@ -11,10 +11,9 @@
|
|||
|
||||
int main() {
|
||||
|
||||
lxw_workbook *workbook = new_workbook("test_optimize22.xlsx");
|
||||
|
||||
workbook->optimize = 1;
|
||||
lxw_workbook_options options = {1};
|
||||
|
||||
lxw_workbook *workbook = new_workbook_opt("test_optimize22.xlsx", &options);
|
||||
lxw_worksheet *worksheet = workbook_add_worksheet(workbook, NULL);
|
||||
lxw_format *bold = workbook_add_format(workbook);
|
||||
|
||||
|
|
|
|||
|
|
@ -11,10 +11,9 @@
|
|||
|
||||
int main() {
|
||||
|
||||
lxw_workbook *workbook = new_workbook("test_optimize23.xlsx");
|
||||
|
||||
workbook->optimize = 1;
|
||||
lxw_workbook_options options = {1};
|
||||
|
||||
lxw_workbook *workbook = new_workbook_opt("test_optimize23.xlsx", &options);
|
||||
lxw_worksheet *worksheet = workbook_add_worksheet(workbook, NULL);
|
||||
lxw_format *bold = workbook_add_format(workbook);
|
||||
|
||||
|
|
|
|||
|
|
@ -11,10 +11,9 @@
|
|||
|
||||
int main() {
|
||||
|
||||
lxw_workbook *workbook = new_workbook("test_optimize24.xlsx");
|
||||
|
||||
workbook->optimize = 1;
|
||||
lxw_workbook_options options = {1};
|
||||
|
||||
lxw_workbook *workbook = new_workbook_opt("test_optimize24.xlsx", &options);
|
||||
lxw_worksheet *worksheet = workbook_add_worksheet(workbook, NULL);
|
||||
lxw_format *bold = workbook_add_format(workbook);
|
||||
|
||||
|
|
|
|||
|
|
@ -11,10 +11,9 @@
|
|||
|
||||
int main() {
|
||||
|
||||
lxw_workbook *workbook = new_workbook("test_optimize25.xlsx");
|
||||
|
||||
workbook->optimize = 1;
|
||||
lxw_workbook_options options = {1};
|
||||
|
||||
lxw_workbook *workbook = new_workbook_opt("test_optimize25.xlsx", &options);
|
||||
lxw_worksheet *worksheet = workbook_add_worksheet(workbook, NULL);
|
||||
lxw_format *bold = workbook_add_format(workbook);
|
||||
|
||||
|
|
|
|||
|
|
@ -11,10 +11,9 @@
|
|||
|
||||
int main() {
|
||||
|
||||
lxw_workbook *workbook = new_workbook("test_optimize26.xlsx");
|
||||
|
||||
workbook->optimize = 1;
|
||||
lxw_workbook_options options = {1};
|
||||
|
||||
lxw_workbook *workbook = new_workbook_opt("test_optimize26.xlsx", &options);
|
||||
lxw_worksheet *worksheet = workbook_add_worksheet(workbook, NULL);
|
||||
|
||||
worksheet_write_string(worksheet, 2, 2, "café", NULL);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue