summaryrefslogtreecommitdiff
path: root/stream.go
diff options
context:
space:
mode:
authorCameron Howey <cameronh@emailline.net>2019-12-28 20:45:10 -0800
committerxuri <xuri.me@gmail.com>2019-12-29 12:45:10 +0800
commit5c87effc7e6c97fff36a56dea1afac8a2f06fb37 (patch)
tree35153a2d36aed19c142bc54c6e8d632eff270109 /stream.go
parent8b960ee1e624bd2776a351a4a3b2ad04c29bae9a (diff)
Stream to Excel table (#530)
* Support all datatypes for StreamWriter * Support setting styles with StreamWriter **NOTE:** This is a breaking change. Values are now explicitly passed as a []interface{} for simplicity. We also let styles to be set at the same time. * Create function to write stream into a table * Write rows directly to buffer Avoiding the xml.Encoder makes the streamer faster and use less memory. Using the included benchmark, the results went from: > BenchmarkStreamWriter-4 514 2576155 ns/op 454918 B/op 6592 allocs/op down to: > BenchmarkStreamWriter-4 1614 777480 ns/op 147608 B/op 5570 allocs/op * Use AddTable instead of SetTable This requires reading the cells after they have been written, which requires additional structure for the temp file. As a bonus, we now efficiently allocate only one buffer when reading the file back into memory, using the same approach as ioutil.ReadFile. * Use an exported Cell type to handle inline styles for StreamWriter
Diffstat (limited to 'stream.go')
-rw-r--r--stream.go495
1 files changed, 387 insertions, 108 deletions
diff --git a/stream.go b/stream.go
index 1b1bbe3..e981f78 100644
--- a/stream.go
+++ b/stream.go
@@ -12,20 +12,23 @@ package excelize
import (
"bytes"
"encoding/xml"
- "errors"
"fmt"
+ "io"
"io/ioutil"
"os"
"reflect"
+ "strconv"
+ "strings"
+ "time"
)
// StreamWriter defined the type of stream writer.
type StreamWriter struct {
- tmpFile *os.File
- File *File
- Sheet string
- SheetID int
- SheetData bytes.Buffer
+ File *File
+ Sheet string
+ SheetID int
+ rawData bufferedWriter
+ tableParts string
}
// NewStreamWriter return stream writer struct by given worksheet name for
@@ -46,7 +49,7 @@ type StreamWriter struct {
// row[colID] = rand.Intn(640000)
// }
// cell, _ := excelize.CoordinatesToCellName(1, rowID)
-// if err := streamWriter.SetRow(cell, &row); err != nil {
+// if err := streamWriter.SetRow(cell, row, nil); err != nil {
// panic(err)
// }
// }
@@ -62,157 +65,433 @@ func (f *File) NewStreamWriter(sheet string) (*StreamWriter, error) {
if sheetID == 0 {
return nil, fmt.Errorf("sheet %s is not exist", sheet)
}
- rsw := &StreamWriter{
+ sw := &StreamWriter{
File: f,
Sheet: sheet,
SheetID: sheetID,
}
- rsw.SheetData.WriteString("<sheetData>")
- return rsw, nil
+
+ ws, err := f.workSheetReader(sheet)
+ if err != nil {
+ return nil, err
+ }
+ sw.rawData.WriteString(XMLHeader + `<worksheet` + templateNamespaceIDMap)
+ bulkAppendOtherFields(&sw.rawData, ws, "XMLName", "SheetData", "TableParts")
+ sw.rawData.WriteString(`<sheetData>`)
+ return sw, nil
}
-// SetRow writes an array to streaming row by given worksheet name, starting
-// coordinate and a pointer to array type 'slice'. Note that, cell settings
-// with styles are not supported currently and after set rows, you must call the
-// 'Flush' method to end the streaming writing process. The following
-// shows the supported data types:
+// AddTable creates an Excel table for the StreamWriter using the given
+// coordinate area and format set. For example, create a table of A1:D5:
//
-// int
-// string
+// err := sw.AddTable("A1", "D5", ``)
//
-func (sw *StreamWriter) SetRow(axis string, slice interface{}) error {
- col, row, err := CellNameToCoordinates(axis)
+// Create a table of F2:H6 with format set:
+//
+// err := sw.AddTable("F2", "H6", `{"table_name":"table","table_style":"TableStyleMedium2","show_first_column":true,"show_last_column":true,"show_row_stripes":false,"show_column_stripes":true}`)
+//
+// Note that the table must be at least two lines including the header. The
+// header cells must contain strings and must be unique.
+//
+// Currently only one table is allowed for a StreamWriter. AddTable must be
+// called after the rows are written but before Flush.
+//
+// See File.AddTable for details on the table format.
+func (sw *StreamWriter) AddTable(hcell, vcell, format string) error {
+ formatSet, err := parseFormatTableSet(format)
if err != nil {
return err
}
- // Make sure 'slice' is a Ptr to Slice
- v := reflect.ValueOf(slice)
- if v.Kind() != reflect.Ptr || v.Elem().Kind() != reflect.Slice {
- return errors.New("pointer to slice expected")
+
+ coordinates, err := areaRangeToCoordinates(hcell, vcell)
+ if err != nil {
+ return err
}
- v = v.Elem()
- sw.SheetData.WriteString(fmt.Sprintf(`<row r="%d">`, row))
- for i := 0; i < v.Len(); i++ {
- axis, err := CoordinatesToCellName(col+i, row)
+ sortCoordinates(coordinates)
+
+ // Correct the minimum number of rows, the table at least two lines.
+ if coordinates[1] == coordinates[3] {
+ coordinates[3]++
+ }
+
+ // Correct table reference coordinate area, such correct C1:B3 to B1:C3.
+ ref, err := sw.File.coordinatesToAreaRef(coordinates)
+ if err != nil {
+ return err
+ }
+
+ // create table columns using the first row
+ tableHeaders, err := sw.getRowValues(coordinates[1], coordinates[0], coordinates[2])
+ if err != nil {
+ return err
+ }
+ tableColumn := make([]*xlsxTableColumn, len(tableHeaders))
+ for i, name := range tableHeaders {
+ tableColumn[i] = &xlsxTableColumn{
+ ID: i + 1,
+ Name: name,
+ }
+ }
+
+ tableID := sw.File.countTables() + 1
+
+ name := formatSet.TableName
+ if name == "" {
+ name = "Table" + strconv.Itoa(tableID)
+ }
+
+ table := xlsxTable{
+ XMLNS: NameSpaceSpreadSheet,
+ ID: tableID,
+ Name: name,
+ DisplayName: name,
+ Ref: ref,
+ AutoFilter: &xlsxAutoFilter{
+ Ref: ref,
+ },
+ TableColumns: &xlsxTableColumns{
+ Count: len(tableColumn),
+ TableColumn: tableColumn,
+ },
+ TableStyleInfo: &xlsxTableStyleInfo{
+ Name: formatSet.TableStyle,
+ ShowFirstColumn: formatSet.ShowFirstColumn,
+ ShowLastColumn: formatSet.ShowLastColumn,
+ ShowRowStripes: formatSet.ShowRowStripes,
+ ShowColumnStripes: formatSet.ShowColumnStripes,
+ },
+ }
+
+ sheetRelationshipsTableXML := "../tables/table" + strconv.Itoa(tableID) + ".xml"
+ tableXML := strings.Replace(sheetRelationshipsTableXML, "..", "xl", -1)
+
+ // Add first table for given sheet.
+ sheetPath, _ := sw.File.sheetMap[trimSheetName(sw.Sheet)]
+ sheetRels := "xl/worksheets/_rels/" + strings.TrimPrefix(sheetPath, "xl/worksheets/") + ".rels"
+ rID := sw.File.addRels(sheetRels, SourceRelationshipTable, sheetRelationshipsTableXML, "")
+
+ sw.tableParts = fmt.Sprintf(`<tableParts count="1"><tablePart r:id="rId%d"></tablePart></tableParts>`, rID)
+
+ sw.File.addContentTypePart(tableID, "table")
+
+ b, _ := xml.Marshal(table)
+ sw.File.saveFileList(tableXML, b)
+ return nil
+}
+
+// Extract values from a row in the StreamWriter.
+func (sw *StreamWriter) getRowValues(hrow, hcol, vcol int) (res []string, err error) {
+ res = make([]string, vcol-hcol+1)
+
+ r, err := sw.rawData.Reader()
+ if err != nil {
+ return nil, err
+ }
+
+ dec := xml.NewDecoder(r)
+ for {
+ token, err := dec.Token()
+ if err == io.EOF {
+ return res, nil
+ }
if err != nil {
- return err
+ return nil, err
}
- switch val := v.Index(i).Interface().(type) {
- case int:
- sw.SheetData.WriteString(fmt.Sprintf(`<c r="%s"><v>%d</v></c>`, axis, val))
- case string:
- sw.SheetData.WriteString(sw.setCellStr(axis, val))
- default:
- sw.SheetData.WriteString(sw.setCellStr(axis, fmt.Sprint(val)))
+ startElement, ok := getRowElement(token, hrow)
+ if !ok {
+ continue
}
- }
- sw.SheetData.WriteString(`</row>`)
- // Try to use local storage
- chunk := 1 << 24
- if sw.SheetData.Len() >= chunk {
- if sw.tmpFile == nil {
- err := sw.createTmp()
+ // decode cells
+ var row xlsxRow
+ if err := dec.DecodeElement(&row, &startElement); err != nil {
+ return nil, err
+ }
+ for _, c := range row.C {
+ col, _, err := CellNameToCoordinates(c.R)
if err != nil {
- // can not use local storage
- return nil
+ return nil, err
}
+ if col < hcol || col > vcol {
+ continue
+ }
+ res[col-hcol] = c.V
}
- // use local storage
- _, err := sw.tmpFile.Write(sw.SheetData.Bytes())
- if err != nil {
- return nil
+ return res, nil
+ }
+}
+
+// Check if the token is an XLSX row with the matching row number.
+func getRowElement(token xml.Token, hrow int) (startElement xml.StartElement, ok bool) {
+ startElement, ok = token.(xml.StartElement)
+ if !ok {
+ return
+ }
+ ok = startElement.Name.Local == "row"
+ if !ok {
+ return
+ }
+ ok = false
+ for _, attr := range startElement.Attr {
+ if attr.Name.Local != "r" {
+ continue
+ }
+ row, _ := strconv.Atoi(attr.Value)
+ if row == hrow {
+ ok = true
+ return
}
- sw.SheetData.Reset()
}
- return err
+ return
}
-// Flush ending the streaming writing process.
-func (sw *StreamWriter) Flush() error {
- sw.SheetData.WriteString(`</sheetData>`)
+// Cell can be used directly in StreamWriter.SetRow to specify a style and
+// a value.
+type Cell struct {
+ StyleID int
+ Value interface{}
+}
- ws, err := sw.File.workSheetReader(sw.Sheet)
+// SetRow writes an array to stream rows by giving a worksheet name, starting
+// coordinate and a pointer to an array of values. Note that you must call the
+// 'Flush' method to end the streaming writing process.
+//
+// As a special case, if Cell is used as a value, then the Cell.StyleID will be
+// applied to that cell.
+func (sw *StreamWriter) SetRow(axis string, values []interface{}) error {
+ col, row, err := CellNameToCoordinates(axis)
if err != nil {
return err
}
- sheetXML := fmt.Sprintf("xl/worksheets/sheet%d.xml", sw.SheetID)
- delete(sw.File.Sheet, sheetXML)
- delete(sw.File.checked, sheetXML)
- var sheetDataByte []byte
- if sw.tmpFile != nil {
- // close the local storage file
- if err = sw.tmpFile.Close(); err != nil {
- return err
- }
- file, err := os.Open(sw.tmpFile.Name())
+ fmt.Fprintf(&sw.rawData, `<row r="%d">`, row)
+ for i, val := range values {
+ axis, err := CoordinatesToCellName(col+i, row)
if err != nil {
return err
}
-
- sheetDataByte, err = ioutil.ReadAll(file)
- if err != nil {
- return err
+ c := xlsxC{R: axis}
+ if v, ok := val.(Cell); ok {
+ c.S = v.StyleID
+ val = v.Value
+ } else if v, ok := val.(*Cell); ok && v != nil {
+ c.S = v.StyleID
+ val = v.Value
}
-
- if err := file.Close(); err != nil {
- return err
+ switch val := val.(type) {
+ case int:
+ c.T, c.V = setCellInt(val)
+ case int8:
+ c.T, c.V = setCellInt(int(val))
+ case int16:
+ c.T, c.V = setCellInt(int(val))
+ case int32:
+ c.T, c.V = setCellInt(int(val))
+ case int64:
+ c.T, c.V = setCellInt(int(val))
+ case uint:
+ c.T, c.V = setCellInt(int(val))
+ case uint8:
+ c.T, c.V = setCellInt(int(val))
+ case uint16:
+ c.T, c.V = setCellInt(int(val))
+ case uint32:
+ c.T, c.V = setCellInt(int(val))
+ case uint64:
+ c.T, c.V = setCellInt(int(val))
+ case float32:
+ c.T, c.V = setCellFloat(float64(val), -1, 32)
+ case float64:
+ c.T, c.V = setCellFloat(val, -1, 64)
+ case string:
+ c.T, c.V, c.XMLSpace = setCellStr(val)
+ case []byte:
+ c.T, c.V, c.XMLSpace = setCellStr(string(val))
+ case time.Duration:
+ c.T, c.V = setCellDuration(val)
+ case time.Time:
+ c.T, c.V, _, err = setCellTime(val)
+ case bool:
+ c.T, c.V = setCellBool(val)
+ case nil:
+ c.T, c.V, c.XMLSpace = setCellStr("")
+ default:
+ c.T, c.V, c.XMLSpace = setCellStr(fmt.Sprint(val))
}
-
- err = os.Remove(sw.tmpFile.Name())
if err != nil {
return err
}
+ writeCell(&sw.rawData, c)
}
+ sw.rawData.WriteString(`</row>`)
+ return sw.rawData.Sync()
+}
- sheetDataByte = append(sheetDataByte, sw.SheetData.Bytes()...)
- replaceMap := map[string][]byte{
- "XMLName": {},
- "SheetData": sheetDataByte,
+func writeCell(buf *bufferedWriter, c xlsxC) {
+ buf.WriteString(`<c`)
+ if c.XMLSpace.Value != "" {
+ fmt.Fprintf(buf, ` xml:%s="%s"`, c.XMLSpace.Name.Local, c.XMLSpace.Value)
}
- sw.SheetData.Reset()
- sw.File.XLSX[fmt.Sprintf("xl/worksheets/sheet%d.xml", sw.SheetID)] =
- StreamMarshalSheet(ws, replaceMap)
- return err
+ fmt.Fprintf(buf, ` r="%s"`, c.R)
+ if c.S != 0 {
+ fmt.Fprintf(buf, ` s="%d"`, c.S)
+ }
+ if c.T != "" {
+ fmt.Fprintf(buf, ` t="%s"`, c.T)
+ }
+ buf.WriteString(`>`)
+ if c.V != "" {
+ buf.WriteString(`<v>`)
+ xml.EscapeText(buf, []byte(c.V))
+ buf.WriteString(`</v>`)
+ }
+ buf.WriteString(`</c>`)
}
-// createTmp creates a temporary file in the operating system default
-// temporary directory.
-func (sw *StreamWriter) createTmp() (err error) {
- sw.tmpFile, err = ioutil.TempFile(os.TempDir(), "excelize-")
- return err
+// Flush ending the streaming writing process.
+func (sw *StreamWriter) Flush() error {
+ sw.rawData.WriteString(`</sheetData>`)
+ sw.rawData.WriteString(sw.tableParts)
+ sw.rawData.WriteString(`</worksheet>`)
+ if err := sw.rawData.Flush(); err != nil {
+ return err
+ }
+
+ sheetXML := fmt.Sprintf("xl/worksheets/sheet%d.xml", sw.SheetID)
+ delete(sw.File.Sheet, sheetXML)
+ delete(sw.File.checked, sheetXML)
+
+ defer sw.rawData.Close()
+ b, err := sw.rawData.Bytes()
+ if err != nil {
+ return err
+ }
+ sw.File.XLSX[sheetXML] = b
+ return nil
}
-// StreamMarshalSheet provides method to serialization worksheets by field as
-// streaming.
-func StreamMarshalSheet(ws *xlsxWorksheet, replaceMap map[string][]byte) []byte {
+// bulkAppendOtherFields bulk-appends fields in a worksheet, skipping the
+// specified field names.
+func bulkAppendOtherFields(w io.Writer, ws *xlsxWorksheet, skip ...string) {
+ skipMap := make(map[string]struct{})
+ for _, name := range skip {
+ skipMap[name] = struct{}{}
+ }
+
s := reflect.ValueOf(ws).Elem()
typeOfT := s.Type()
- var marshalResult []byte
- marshalResult = append(marshalResult, []byte(XMLHeader+`<worksheet`+templateNamespaceIDMap)...)
+ enc := xml.NewEncoder(w)
for i := 0; i < s.NumField(); i++ {
- content, ok := replaceMap[typeOfT.Field(i).Name]
- if ok {
- marshalResult = append(marshalResult, content...)
+ f := s.Field(i)
+ if _, ok := skipMap[typeOfT.Field(i).Name]; ok {
continue
}
- out, _ := xml.Marshal(s.Field(i).Interface())
- marshalResult = append(marshalResult, out...)
+ enc.Encode(f.Interface())
}
- marshalResult = append(marshalResult, []byte(`</worksheet>`)...)
- return marshalResult
}
-// setCellStr provides a function to set string type value of a cell as
-// streaming. Total number of characters that a cell can contain 32767
-// characters.
-func (sw *StreamWriter) setCellStr(axis, value string) string {
- if len(value) > 32767 {
- value = value[0:32767]
+// bufferedWriter uses a temp file to store an extended buffer. Writes are
+// always made to an in-memory buffer, which will always succeed. The buffer
+// is written to the temp file with Sync, which may return an error. Therefore,
+// Sync should be periodically called and the error checked.
+type bufferedWriter struct {
+ tmp *os.File
+ buf bytes.Buffer
+}
+
+// Write to the in-memory buffer. The err is always nil.
+func (bw *bufferedWriter) Write(p []byte) (n int, err error) {
+ return bw.buf.Write(p)
+}
+
+// WriteString wites to the in-memory buffer. The err is always nil.
+func (bw *bufferedWriter) WriteString(p string) (n int, err error) {
+ return bw.buf.WriteString(p)
+}
+
+// Reader provides read-access to the underlying buffer/file.
+func (bw *bufferedWriter) Reader() (io.Reader, error) {
+ if bw.tmp == nil {
+ return bytes.NewReader(bw.buf.Bytes()), nil
+ }
+ if err := bw.Flush(); err != nil {
+ return nil, err
+ }
+ fi, err := bw.tmp.Stat()
+ if err != nil {
+ return nil, err
}
- // Leading and ending space(s) character detection.
- if len(value) > 0 && (value[0] == 32 || value[len(value)-1] == 32) {
- return fmt.Sprintf(`<c xml:space="preserve" r="%s" t="str"><v>%s</v></c>`, axis, value)
+ // os.File.ReadAt does not affect the cursor position and is safe to use here
+ return io.NewSectionReader(bw.tmp, 0, fi.Size()), nil
+}
+
+// Bytes returns the entire content of the bufferedWriter. If a temp file is
+// used, Bytes will efficiently allocate a buffer to prevent re-allocations.
+func (bw *bufferedWriter) Bytes() ([]byte, error) {
+ if bw.tmp == nil {
+ return bw.buf.Bytes(), nil
+ }
+
+ if err := bw.Flush(); err != nil {
+ return nil, err
+ }
+
+ var buf bytes.Buffer
+ if fi, err := bw.tmp.Stat(); err == nil {
+ if size := fi.Size() + bytes.MinRead; size > bytes.MinRead {
+ if int64(int(size)) == size {
+ buf.Grow(int(size))
+ } else {
+ return nil, bytes.ErrTooLarge
+ }
+ }
+ }
+
+ if _, err := bw.tmp.Seek(0, 0); err != nil {
+ return nil, err
+ }
+
+ _, err := buf.ReadFrom(bw.tmp)
+ return buf.Bytes(), err
+}
+
+// Sync will write the in-memory buffer to a temp file, if the in-memory buffer
+// has grown large enough. Any error will be returned.
+func (bw *bufferedWriter) Sync() (err error) {
+ // Try to use local storage
+ const chunk = 1 << 24
+ if bw.buf.Len() < chunk {
+ return nil
+ }
+ if bw.tmp == nil {
+ bw.tmp, err = ioutil.TempFile(os.TempDir(), "excelize-")
+ if err != nil {
+ // can not use local storage
+ return nil
+ }
+ }
+ return bw.Flush()
+}
+
+// Flush the entire in-memory buffer to the temp file, if a temp file is being
+// used.
+func (bw *bufferedWriter) Flush() error {
+ if bw.tmp == nil {
+ return nil
+ }
+ _, err := bw.buf.WriteTo(bw.tmp)
+ if err != nil {
+ return err
+ }
+ bw.buf.Reset()
+ return nil
+}
+
+// Close the underlying temp file and reset the in-memory buffer.
+func (bw *bufferedWriter) Close() error {
+ bw.buf.Reset()
+ if bw.tmp == nil {
+ return nil
}
- return fmt.Sprintf(`<c r="%s" t="str"><v>%s</v></c>`, axis, value)
+ defer os.Remove(bw.tmp.Name())
+ return bw.tmp.Close()
}