Added constant_memory mode.

This commit is contained in:
John McNamara 2014-06-26 00:49:41 +01:00
parent afa9305893
commit 63f1419b76
18 changed files with 194 additions and 39 deletions

View file

@ -23,6 +23,7 @@ my @examples = (
[ 'dates_and_times02.c', 'Writing dates and times with datetime' ],
[ 'dates_and_times03.c', 'Dates and times with different formats' ],
[ 'utf8.c', 'A example of some UTF-8 text' ],
[ 'constant_memory.c', 'Write a large file with constant memory usage' ],
);
# Convert the array refs to a hash for lookups.

Binary file not shown.

After

Width:  |  Height:  |  Size: 72 KiB

View file

@ -55,8 +55,18 @@ If required, it is also possible to specify the calculated result of the
formula using the worksheet_write_formula_num() function.
# Reporting Bugs
## Strings aren't displayed in Apple Numbers in 'constant_memory' mode
In new_workbook_opt `constant_memory` mode libxlsxwriter uses an optimisation
where cell strings aren't stored in an Excel structure call "shared strings"
and instead are written "in-line".
This is a documented Excel feature that is supported by most spreadsheet
applications. One known exception is Apple Numbers for Mac where the string
data isn't displayed.
# Reporting Bugs
Here are some tips on reporting bugs in `libxlsxwriter`.

View file

@ -89,7 +89,14 @@ Next example: @ref utf8.c
@example utf8.c
A simple Unicode UTF-8 example. Note, the source file is UTF-8 encoded.
Next example: @ref constant_memory.c
@image html utf8.png
@example constant_memory.c
Example of using libxlsxwriter for writing large files in constant memory
mode.
@image html constant_memory.png
*/

View file

@ -1,16 +1,73 @@
/**
@page working_with_memory Working with Memory and Performance
Constant Memory Mode
--------------------
By default `libxlsxwriter` holds all cell data in memory. This is to allow
future features where formatting is applied separately from the data.
By default libxlsxwriter holds all cell data in memory. This is to allow
non-sequential data storage and also to allow future features where formatting
is applied separately from the data.
The effect of this is that for large files libxlsxwriter can consume a lot of
memory.
This issue will be resolved in the next 1-2 releases to allow a small constant
memory usage like in the Perl and Python modules that libxlsxwriter was ported
from.
Fortunately, this memory usage can be reduced almost completely by using
new_workbook_opt() and the lxw_workbook_options `constant_memory` property:
@dontinclude constant_memory.c
@skipline include
@until return
@skipline }
This optimisation works by flushing each row after a subsequent row is written.
In this way the largest amount of data held in memory for a worksheet is the
amount of data required to hold a single row of data.
If required, this memory usage can be reduced even more by setting
`LXW_COL_MAX` in worksheet.c from 16384 down to a value that matches the
maximum column that is likely to be encountered.
The trade-off when using `constant_memory` mode is that data must be added
sequentiallly in row order and you won't be able to take advantage of any new
features that manipulate cell data after it is written. Currently the only
such feature is worksheet_set_row() which in `constant_memory` mode can only
be used when writing data to the current row.
Row Column Order
----------------
Since each new row flushes the previous row, data must be written in sequential
row order when `constant_memory` mode is on:
@code
lxw_workbook *workbook = new_workbook_opt("constant_memory.xlsx", &options);
...
// !! Don't use "column x row" order in 'constant_memory' mode. Only
// the first column of data will be written.
for (col = 0; col < max_col; col++) {
for (row = 0; row < max_row; row++) {
worksheet_write_number(worksheet, row, col, 123.45, NULL);
}
-*- mode: }
@endcode
Inline strings
--------------
Another optimisation that is used to reduce memory usage in `constant_memory`
mode is that cell strings aren't stored in an Excel structure call "shared
strings" and instead are written "in-line". This is a documented Excel feature
that is supported by most spreadsheet applications. One known exception is
Apple Numbers for Mac where the string data isn't displayed.
Performance
-----------
Currently the library isn't optimised. There are some known bottlenecks that
will be reduced in upcoming versions. Also, the library is currently single
threaded.
Next: @ref examples

View file

@ -0,0 +1,33 @@
/*
* Example of using libxlsxwriter for writing large files in constant memory
* mode.
*
* Copyright 2014, John McNamara, jmcnamara@cpan.org
*
*/
#include "xlsxwriter.h"
int main() {
lxw_row_t row;
lxw_col_t col;
lxw_row_t max_row = 1000;
lxw_col_t max_col = 50;
/* Set the worksheet options. */
lxw_workbook_options options;
options.constant_memory = 1;
/* Create a new workbook with options. */
lxw_workbook *workbook = new_workbook_opt("constant_memory.xlsx", &options);
lxw_worksheet *worksheet = workbook_add_worksheet(workbook, NULL);
for (row = 0; row < max_row; row++) {
for (col = 0; col < max_col; col++) {
worksheet_write_number(worksheet, row, col, 123.45, NULL);
}
}
return workbook_close(workbook);
}

View file

@ -1,6 +1,6 @@
/*
* libxlsxwriter
*
*
* Copyright 2014, John McNamara, jmcnamara@cpan.org. See LICENSE.txt.
*/
@ -12,7 +12,7 @@
* represents the Excel file as it is written on disk.
*
* See @ref workbook.h for full details of the functionality.
*
*
* @file workbook.h
*
* @brief Functions related to creating an Excel xlsx workbook.
@ -64,6 +64,21 @@ enum lxw_close_error {
/* TODO. Need to add/document more. */
};
/**
* @brief Workbook options.
*
* Optional parameters when creating a new Workbool object via
* new_workbook_opt().
*
* Currently only the `constant_memory` property is supported:
*
* * `constant_memory`
*/
typedef struct lxw_workbook_options {
/** Optimise the workbook to use constant memory for worksheets */
uint8_t constant_memory;
} lxw_workbook_options;
/**
* @brief Struct to represent an Excel workbook.
*
@ -79,6 +94,7 @@ typedef struct lxw_workbook {
lxw_sst *sst;
lxw_doc_properties *properties;
const char *filename;
lxw_workbook_options options;
uint16_t num_sheets;
uint16_t first_sheet;
@ -122,6 +138,33 @@ extern "C" {
*/
lxw_workbook *new_workbook(const char *filename);
/**
* @brief Create a new workbook object, and set the workbook options.
*
* @param filename The name of the new Excel file to create.
* @param options Workbook options.
*
* @return A lxw_workbook instance.
*
* This method is the same as the `new_workbook()` constructor but allows
* additional options to be set.
*
* @code
* lxw_workbook_options options = {.constant_memory = 1};
*
* lxw_workbook *workbook = new_workbook_opt("filename.xlsx", &options);
* @endcode
*
* Note, in this mode a row of data is written and then discarded when a cell
* in a new row is added via one of the worksheet `worksheet_write_*()`
* methods. Therefore, once this mode is active, data should be written in
* sequential row order.
*
* See @ref working_with_memory for more details.
*
*/
lxw_workbook *new_workbook_opt(const char *filename, lxw_workbook_options *options);
/**
* @brief Add a new worksheet to a workbook:
*

View file

@ -19,7 +19,7 @@ INC_DIR = ../include
MINIZIP_DIR = ../third_party/minizip
# Flags passed to compiler.
CFLAGS += -g -O0 -Wall -Wextra -pedantic -ansi
CFLAGS += -g -O3 -Wall -Wextra -pedantic -ansi
# Library names.
LIBXLSXWRITER_A = libxlsxwriter.a
@ -81,7 +81,7 @@ test_compile : $(OBJS)
$(Q)$(CC) -fPIC -I$(INC_DIR) $(CFLAGS) $(CXXFLAGS) -c $< -o $@
%.to : %.c $(HDRS)
$(Q)$(CC) -DTESTING -I$(INC_DIR) $(CFLAGS) $(CXXFLAGS) -c $< -o $@
$(Q)$(CC) -g -O0 -DTESTING -I$(INC_DIR) $(CFLAGS) $(CXXFLAGS) -c $< -o $@
# Clean up any temp/build files.

View file

@ -584,6 +584,15 @@ _workbook_assemble_xml_file(lxw_workbook *self)
*/
lxw_workbook *
new_workbook(const char *filename)
{
return new_workbook_opt(filename, NULL);
}
/*
* Create a new workbook object with options.
*/
lxw_workbook *
new_workbook_opt(const char *filename, lxw_workbook_options *options)
{
lxw_format *format;
lxw_workbook *workbook;
@ -622,6 +631,9 @@ new_workbook(const char *filename)
/* Initialise its index. */
_get_xf_index(format);
if (options)
workbook->options.constant_memory = options->constant_memory;
return workbook;
mem_error:
@ -658,7 +670,7 @@ workbook_add_worksheet(lxw_workbook *self, const char *sheetname)
init_data.hidden = 0;
init_data.index = self->num_sheets;
init_data.sst = self->sst;
init_data.optimize = self->optimize;
init_data.optimize = self->options.constant_memory;
/* Create a new worksheet object. */
worksheet = _new_worksheet(&init_data);

View file

@ -17,6 +17,7 @@
#define LXW_ROW_MAX 1048576
#define LXW_COL_MAX 16384
#define LXW_STR_MAX 32767
#define BUFFER_SIZE 4096
/*
* Forward declarations.
@ -625,9 +626,8 @@ _worksheet_write_sheet_data(lxw_worksheet *self)
STATIC void
_worksheet_write_optimized_sheet_data(lxw_worksheet *self)
{
#define buffer_size 4096
uint16_t read_size = 1;
char buffer[buffer_size];
char buffer[BUFFER_SIZE];
if (self->dim_rowmin == LXW_ROW_MAX) {
/* If the dimensions aren"t defined then there is no data to write. */
@ -642,7 +642,7 @@ _worksheet_write_optimized_sheet_data(lxw_worksheet *self)
rewind(self->optimize_tmpfile);
while (read_size) {
read_size = fread(buffer, 1, buffer_size, self->optimize_tmpfile);
read_size = fread(buffer, 1, BUFFER_SIZE, self->optimize_tmpfile);
fwrite(buffer, 1, read_size, self->file);
}

View file

@ -11,10 +11,9 @@
int main() {
lxw_workbook *workbook = new_workbook("test_optimize01.xlsx");
workbook->optimize = 1;
lxw_workbook_options options = {1};
lxw_workbook *workbook = new_workbook_opt("test_optimize01.xlsx", &options);
lxw_worksheet *worksheet = workbook_add_worksheet(workbook, NULL);
worksheet_write_string(worksheet, 0, 0, "Hello", NULL);

View file

@ -11,10 +11,9 @@
int main() {
lxw_workbook *workbook = new_workbook("test_optimize02.xlsx");
workbook->optimize = 1;
lxw_workbook_options options = {1};
lxw_workbook *workbook = new_workbook_opt("test_optimize02.xlsx", &options);
lxw_worksheet *worksheet = workbook_add_worksheet(workbook, NULL);
worksheet_write_string(worksheet, 0, 0, "Hello", NULL);

View file

@ -11,10 +11,9 @@
int main() {
lxw_workbook *workbook = new_workbook("test_optimize21.xlsx");
workbook->optimize = 1;
lxw_workbook_options options = {1};
lxw_workbook *workbook = new_workbook_opt("test_optimize21.xlsx", &options);
lxw_worksheet *worksheet = workbook_add_worksheet(workbook, NULL);
worksheet_write_string(worksheet, CELL("A1"), "Foo", NULL);

View file

@ -11,10 +11,9 @@
int main() {
lxw_workbook *workbook = new_workbook("test_optimize22.xlsx");
workbook->optimize = 1;
lxw_workbook_options options = {1};
lxw_workbook *workbook = new_workbook_opt("test_optimize22.xlsx", &options);
lxw_worksheet *worksheet = workbook_add_worksheet(workbook, NULL);
lxw_format *bold = workbook_add_format(workbook);

View file

@ -11,10 +11,9 @@
int main() {
lxw_workbook *workbook = new_workbook("test_optimize23.xlsx");
workbook->optimize = 1;
lxw_workbook_options options = {1};
lxw_workbook *workbook = new_workbook_opt("test_optimize23.xlsx", &options);
lxw_worksheet *worksheet = workbook_add_worksheet(workbook, NULL);
lxw_format *bold = workbook_add_format(workbook);

View file

@ -11,10 +11,9 @@
int main() {
lxw_workbook *workbook = new_workbook("test_optimize24.xlsx");
workbook->optimize = 1;
lxw_workbook_options options = {1};
lxw_workbook *workbook = new_workbook_opt("test_optimize24.xlsx", &options);
lxw_worksheet *worksheet = workbook_add_worksheet(workbook, NULL);
lxw_format *bold = workbook_add_format(workbook);

View file

@ -11,10 +11,9 @@
int main() {
lxw_workbook *workbook = new_workbook("test_optimize25.xlsx");
workbook->optimize = 1;
lxw_workbook_options options = {1};
lxw_workbook *workbook = new_workbook_opt("test_optimize25.xlsx", &options);
lxw_worksheet *worksheet = workbook_add_worksheet(workbook, NULL);
lxw_format *bold = workbook_add_format(workbook);

View file

@ -11,10 +11,9 @@
int main() {
lxw_workbook *workbook = new_workbook("test_optimize26.xlsx");
workbook->optimize = 1;
lxw_workbook_options options = {1};
lxw_workbook *workbook = new_workbook_opt("test_optimize26.xlsx", &options);
lxw_worksheet *worksheet = workbook_add_worksheet(workbook, NULL);
worksheet_write_string(worksheet, 2, 2, "café", NULL);