diff options
author | Cameron Howey <cameronh@emailline.net> | 2019-12-28 20:45:10 -0800 |
---|---|---|
committer | xuri <xuri.me@gmail.com> | 2019-12-29 12:45:10 +0800 |
commit | 5c87effc7e6c97fff36a56dea1afac8a2f06fb37 (patch) | |
tree | 35153a2d36aed19c142bc54c6e8d632eff270109 /stream.go | |
parent | 8b960ee1e624bd2776a351a4a3b2ad04c29bae9a (diff) |
Stream to Excel table (#530)
* Support all datatypes for StreamWriter
* Support setting styles with StreamWriter
**NOTE:** This is a breaking change. Values are now explicitly
passed as a []interface{} for simplicity. We also let styles to be
set at the same time.
* Create function to write stream into a table
* Write rows directly to buffer
Avoiding the xml.Encoder makes the streamer faster and use less
memory.
Using the included benchmark, the results went from:
> BenchmarkStreamWriter-4 514 2576155 ns/op 454918 B/op 6592 allocs/op
down to:
> BenchmarkStreamWriter-4 1614 777480 ns/op 147608 B/op 5570 allocs/op
* Use AddTable instead of SetTable
This requires reading the cells after they have been written,
which requires additional structure for the temp file.
As a bonus, we now efficiently allocate only one buffer when
reading the file back into memory, using the same approach
as ioutil.ReadFile.
* Use an exported Cell type to handle inline styles for StreamWriter
Diffstat (limited to 'stream.go')
-rw-r--r-- | stream.go | 495 |
1 files changed, 387 insertions, 108 deletions
@@ -12,20 +12,23 @@ package excelize import ( "bytes" "encoding/xml" - "errors" "fmt" + "io" "io/ioutil" "os" "reflect" + "strconv" + "strings" + "time" ) // StreamWriter defined the type of stream writer. type StreamWriter struct { - tmpFile *os.File - File *File - Sheet string - SheetID int - SheetData bytes.Buffer + File *File + Sheet string + SheetID int + rawData bufferedWriter + tableParts string } // NewStreamWriter return stream writer struct by given worksheet name for @@ -46,7 +49,7 @@ type StreamWriter struct { // row[colID] = rand.Intn(640000) // } // cell, _ := excelize.CoordinatesToCellName(1, rowID) -// if err := streamWriter.SetRow(cell, &row); err != nil { +// if err := streamWriter.SetRow(cell, row, nil); err != nil { // panic(err) // } // } @@ -62,157 +65,433 @@ func (f *File) NewStreamWriter(sheet string) (*StreamWriter, error) { if sheetID == 0 { return nil, fmt.Errorf("sheet %s is not exist", sheet) } - rsw := &StreamWriter{ + sw := &StreamWriter{ File: f, Sheet: sheet, SheetID: sheetID, } - rsw.SheetData.WriteString("<sheetData>") - return rsw, nil + + ws, err := f.workSheetReader(sheet) + if err != nil { + return nil, err + } + sw.rawData.WriteString(XMLHeader + `<worksheet` + templateNamespaceIDMap) + bulkAppendOtherFields(&sw.rawData, ws, "XMLName", "SheetData", "TableParts") + sw.rawData.WriteString(`<sheetData>`) + return sw, nil } -// SetRow writes an array to streaming row by given worksheet name, starting -// coordinate and a pointer to array type 'slice'. Note that, cell settings -// with styles are not supported currently and after set rows, you must call the -// 'Flush' method to end the streaming writing process. The following -// shows the supported data types: +// AddTable creates an Excel table for the StreamWriter using the given +// coordinate area and format set. For example, create a table of A1:D5: // -// int -// string +// err := sw.AddTable("A1", "D5", ``) // -func (sw *StreamWriter) SetRow(axis string, slice interface{}) error { - col, row, err := CellNameToCoordinates(axis) +// Create a table of F2:H6 with format set: +// +// err := sw.AddTable("F2", "H6", `{"table_name":"table","table_style":"TableStyleMedium2","show_first_column":true,"show_last_column":true,"show_row_stripes":false,"show_column_stripes":true}`) +// +// Note that the table must be at least two lines including the header. The +// header cells must contain strings and must be unique. +// +// Currently only one table is allowed for a StreamWriter. AddTable must be +// called after the rows are written but before Flush. +// +// See File.AddTable for details on the table format. +func (sw *StreamWriter) AddTable(hcell, vcell, format string) error { + formatSet, err := parseFormatTableSet(format) if err != nil { return err } - // Make sure 'slice' is a Ptr to Slice - v := reflect.ValueOf(slice) - if v.Kind() != reflect.Ptr || v.Elem().Kind() != reflect.Slice { - return errors.New("pointer to slice expected") + + coordinates, err := areaRangeToCoordinates(hcell, vcell) + if err != nil { + return err } - v = v.Elem() - sw.SheetData.WriteString(fmt.Sprintf(`<row r="%d">`, row)) - for i := 0; i < v.Len(); i++ { - axis, err := CoordinatesToCellName(col+i, row) + sortCoordinates(coordinates) + + // Correct the minimum number of rows, the table at least two lines. + if coordinates[1] == coordinates[3] { + coordinates[3]++ + } + + // Correct table reference coordinate area, such correct C1:B3 to B1:C3. + ref, err := sw.File.coordinatesToAreaRef(coordinates) + if err != nil { + return err + } + + // create table columns using the first row + tableHeaders, err := sw.getRowValues(coordinates[1], coordinates[0], coordinates[2]) + if err != nil { + return err + } + tableColumn := make([]*xlsxTableColumn, len(tableHeaders)) + for i, name := range tableHeaders { + tableColumn[i] = &xlsxTableColumn{ + ID: i + 1, + Name: name, + } + } + + tableID := sw.File.countTables() + 1 + + name := formatSet.TableName + if name == "" { + name = "Table" + strconv.Itoa(tableID) + } + + table := xlsxTable{ + XMLNS: NameSpaceSpreadSheet, + ID: tableID, + Name: name, + DisplayName: name, + Ref: ref, + AutoFilter: &xlsxAutoFilter{ + Ref: ref, + }, + TableColumns: &xlsxTableColumns{ + Count: len(tableColumn), + TableColumn: tableColumn, + }, + TableStyleInfo: &xlsxTableStyleInfo{ + Name: formatSet.TableStyle, + ShowFirstColumn: formatSet.ShowFirstColumn, + ShowLastColumn: formatSet.ShowLastColumn, + ShowRowStripes: formatSet.ShowRowStripes, + ShowColumnStripes: formatSet.ShowColumnStripes, + }, + } + + sheetRelationshipsTableXML := "../tables/table" + strconv.Itoa(tableID) + ".xml" + tableXML := strings.Replace(sheetRelationshipsTableXML, "..", "xl", -1) + + // Add first table for given sheet. + sheetPath, _ := sw.File.sheetMap[trimSheetName(sw.Sheet)] + sheetRels := "xl/worksheets/_rels/" + strings.TrimPrefix(sheetPath, "xl/worksheets/") + ".rels" + rID := sw.File.addRels(sheetRels, SourceRelationshipTable, sheetRelationshipsTableXML, "") + + sw.tableParts = fmt.Sprintf(`<tableParts count="1"><tablePart r:id="rId%d"></tablePart></tableParts>`, rID) + + sw.File.addContentTypePart(tableID, "table") + + b, _ := xml.Marshal(table) + sw.File.saveFileList(tableXML, b) + return nil +} + +// Extract values from a row in the StreamWriter. +func (sw *StreamWriter) getRowValues(hrow, hcol, vcol int) (res []string, err error) { + res = make([]string, vcol-hcol+1) + + r, err := sw.rawData.Reader() + if err != nil { + return nil, err + } + + dec := xml.NewDecoder(r) + for { + token, err := dec.Token() + if err == io.EOF { + return res, nil + } if err != nil { - return err + return nil, err } - switch val := v.Index(i).Interface().(type) { - case int: - sw.SheetData.WriteString(fmt.Sprintf(`<c r="%s"><v>%d</v></c>`, axis, val)) - case string: - sw.SheetData.WriteString(sw.setCellStr(axis, val)) - default: - sw.SheetData.WriteString(sw.setCellStr(axis, fmt.Sprint(val))) + startElement, ok := getRowElement(token, hrow) + if !ok { + continue } - } - sw.SheetData.WriteString(`</row>`) - // Try to use local storage - chunk := 1 << 24 - if sw.SheetData.Len() >= chunk { - if sw.tmpFile == nil { - err := sw.createTmp() + // decode cells + var row xlsxRow + if err := dec.DecodeElement(&row, &startElement); err != nil { + return nil, err + } + for _, c := range row.C { + col, _, err := CellNameToCoordinates(c.R) if err != nil { - // can not use local storage - return nil + return nil, err } + if col < hcol || col > vcol { + continue + } + res[col-hcol] = c.V } - // use local storage - _, err := sw.tmpFile.Write(sw.SheetData.Bytes()) - if err != nil { - return nil + return res, nil + } +} + +// Check if the token is an XLSX row with the matching row number. +func getRowElement(token xml.Token, hrow int) (startElement xml.StartElement, ok bool) { + startElement, ok = token.(xml.StartElement) + if !ok { + return + } + ok = startElement.Name.Local == "row" + if !ok { + return + } + ok = false + for _, attr := range startElement.Attr { + if attr.Name.Local != "r" { + continue + } + row, _ := strconv.Atoi(attr.Value) + if row == hrow { + ok = true + return } - sw.SheetData.Reset() } - return err + return } -// Flush ending the streaming writing process. -func (sw *StreamWriter) Flush() error { - sw.SheetData.WriteString(`</sheetData>`) +// Cell can be used directly in StreamWriter.SetRow to specify a style and +// a value. +type Cell struct { + StyleID int + Value interface{} +} - ws, err := sw.File.workSheetReader(sw.Sheet) +// SetRow writes an array to stream rows by giving a worksheet name, starting +// coordinate and a pointer to an array of values. Note that you must call the +// 'Flush' method to end the streaming writing process. +// +// As a special case, if Cell is used as a value, then the Cell.StyleID will be +// applied to that cell. +func (sw *StreamWriter) SetRow(axis string, values []interface{}) error { + col, row, err := CellNameToCoordinates(axis) if err != nil { return err } - sheetXML := fmt.Sprintf("xl/worksheets/sheet%d.xml", sw.SheetID) - delete(sw.File.Sheet, sheetXML) - delete(sw.File.checked, sheetXML) - var sheetDataByte []byte - if sw.tmpFile != nil { - // close the local storage file - if err = sw.tmpFile.Close(); err != nil { - return err - } - file, err := os.Open(sw.tmpFile.Name()) + fmt.Fprintf(&sw.rawData, `<row r="%d">`, row) + for i, val := range values { + axis, err := CoordinatesToCellName(col+i, row) if err != nil { return err } - - sheetDataByte, err = ioutil.ReadAll(file) - if err != nil { - return err + c := xlsxC{R: axis} + if v, ok := val.(Cell); ok { + c.S = v.StyleID + val = v.Value + } else if v, ok := val.(*Cell); ok && v != nil { + c.S = v.StyleID + val = v.Value } - - if err := file.Close(); err != nil { - return err + switch val := val.(type) { + case int: + c.T, c.V = setCellInt(val) + case int8: + c.T, c.V = setCellInt(int(val)) + case int16: + c.T, c.V = setCellInt(int(val)) + case int32: + c.T, c.V = setCellInt(int(val)) + case int64: + c.T, c.V = setCellInt(int(val)) + case uint: + c.T, c.V = setCellInt(int(val)) + case uint8: + c.T, c.V = setCellInt(int(val)) + case uint16: + c.T, c.V = setCellInt(int(val)) + case uint32: + c.T, c.V = setCellInt(int(val)) + case uint64: + c.T, c.V = setCellInt(int(val)) + case float32: + c.T, c.V = setCellFloat(float64(val), -1, 32) + case float64: + c.T, c.V = setCellFloat(val, -1, 64) + case string: + c.T, c.V, c.XMLSpace = setCellStr(val) + case []byte: + c.T, c.V, c.XMLSpace = setCellStr(string(val)) + case time.Duration: + c.T, c.V = setCellDuration(val) + case time.Time: + c.T, c.V, _, err = setCellTime(val) + case bool: + c.T, c.V = setCellBool(val) + case nil: + c.T, c.V, c.XMLSpace = setCellStr("") + default: + c.T, c.V, c.XMLSpace = setCellStr(fmt.Sprint(val)) } - - err = os.Remove(sw.tmpFile.Name()) if err != nil { return err } + writeCell(&sw.rawData, c) } + sw.rawData.WriteString(`</row>`) + return sw.rawData.Sync() +} - sheetDataByte = append(sheetDataByte, sw.SheetData.Bytes()...) - replaceMap := map[string][]byte{ - "XMLName": {}, - "SheetData": sheetDataByte, +func writeCell(buf *bufferedWriter, c xlsxC) { + buf.WriteString(`<c`) + if c.XMLSpace.Value != "" { + fmt.Fprintf(buf, ` xml:%s="%s"`, c.XMLSpace.Name.Local, c.XMLSpace.Value) } - sw.SheetData.Reset() - sw.File.XLSX[fmt.Sprintf("xl/worksheets/sheet%d.xml", sw.SheetID)] = - StreamMarshalSheet(ws, replaceMap) - return err + fmt.Fprintf(buf, ` r="%s"`, c.R) + if c.S != 0 { + fmt.Fprintf(buf, ` s="%d"`, c.S) + } + if c.T != "" { + fmt.Fprintf(buf, ` t="%s"`, c.T) + } + buf.WriteString(`>`) + if c.V != "" { + buf.WriteString(`<v>`) + xml.EscapeText(buf, []byte(c.V)) + buf.WriteString(`</v>`) + } + buf.WriteString(`</c>`) } -// createTmp creates a temporary file in the operating system default -// temporary directory. -func (sw *StreamWriter) createTmp() (err error) { - sw.tmpFile, err = ioutil.TempFile(os.TempDir(), "excelize-") - return err +// Flush ending the streaming writing process. +func (sw *StreamWriter) Flush() error { + sw.rawData.WriteString(`</sheetData>`) + sw.rawData.WriteString(sw.tableParts) + sw.rawData.WriteString(`</worksheet>`) + if err := sw.rawData.Flush(); err != nil { + return err + } + + sheetXML := fmt.Sprintf("xl/worksheets/sheet%d.xml", sw.SheetID) + delete(sw.File.Sheet, sheetXML) + delete(sw.File.checked, sheetXML) + + defer sw.rawData.Close() + b, err := sw.rawData.Bytes() + if err != nil { + return err + } + sw.File.XLSX[sheetXML] = b + return nil } -// StreamMarshalSheet provides method to serialization worksheets by field as -// streaming. -func StreamMarshalSheet(ws *xlsxWorksheet, replaceMap map[string][]byte) []byte { +// bulkAppendOtherFields bulk-appends fields in a worksheet, skipping the +// specified field names. +func bulkAppendOtherFields(w io.Writer, ws *xlsxWorksheet, skip ...string) { + skipMap := make(map[string]struct{}) + for _, name := range skip { + skipMap[name] = struct{}{} + } + s := reflect.ValueOf(ws).Elem() typeOfT := s.Type() - var marshalResult []byte - marshalResult = append(marshalResult, []byte(XMLHeader+`<worksheet`+templateNamespaceIDMap)...) + enc := xml.NewEncoder(w) for i := 0; i < s.NumField(); i++ { - content, ok := replaceMap[typeOfT.Field(i).Name] - if ok { - marshalResult = append(marshalResult, content...) + f := s.Field(i) + if _, ok := skipMap[typeOfT.Field(i).Name]; ok { continue } - out, _ := xml.Marshal(s.Field(i).Interface()) - marshalResult = append(marshalResult, out...) + enc.Encode(f.Interface()) } - marshalResult = append(marshalResult, []byte(`</worksheet>`)...) - return marshalResult } -// setCellStr provides a function to set string type value of a cell as -// streaming. Total number of characters that a cell can contain 32767 -// characters. -func (sw *StreamWriter) setCellStr(axis, value string) string { - if len(value) > 32767 { - value = value[0:32767] +// bufferedWriter uses a temp file to store an extended buffer. Writes are +// always made to an in-memory buffer, which will always succeed. The buffer +// is written to the temp file with Sync, which may return an error. Therefore, +// Sync should be periodically called and the error checked. +type bufferedWriter struct { + tmp *os.File + buf bytes.Buffer +} + +// Write to the in-memory buffer. The err is always nil. +func (bw *bufferedWriter) Write(p []byte) (n int, err error) { + return bw.buf.Write(p) +} + +// WriteString wites to the in-memory buffer. The err is always nil. +func (bw *bufferedWriter) WriteString(p string) (n int, err error) { + return bw.buf.WriteString(p) +} + +// Reader provides read-access to the underlying buffer/file. +func (bw *bufferedWriter) Reader() (io.Reader, error) { + if bw.tmp == nil { + return bytes.NewReader(bw.buf.Bytes()), nil + } + if err := bw.Flush(); err != nil { + return nil, err + } + fi, err := bw.tmp.Stat() + if err != nil { + return nil, err } - // Leading and ending space(s) character detection. - if len(value) > 0 && (value[0] == 32 || value[len(value)-1] == 32) { - return fmt.Sprintf(`<c xml:space="preserve" r="%s" t="str"><v>%s</v></c>`, axis, value) + // os.File.ReadAt does not affect the cursor position and is safe to use here + return io.NewSectionReader(bw.tmp, 0, fi.Size()), nil +} + +// Bytes returns the entire content of the bufferedWriter. If a temp file is +// used, Bytes will efficiently allocate a buffer to prevent re-allocations. +func (bw *bufferedWriter) Bytes() ([]byte, error) { + if bw.tmp == nil { + return bw.buf.Bytes(), nil + } + + if err := bw.Flush(); err != nil { + return nil, err + } + + var buf bytes.Buffer + if fi, err := bw.tmp.Stat(); err == nil { + if size := fi.Size() + bytes.MinRead; size > bytes.MinRead { + if int64(int(size)) == size { + buf.Grow(int(size)) + } else { + return nil, bytes.ErrTooLarge + } + } + } + + if _, err := bw.tmp.Seek(0, 0); err != nil { + return nil, err + } + + _, err := buf.ReadFrom(bw.tmp) + return buf.Bytes(), err +} + +// Sync will write the in-memory buffer to a temp file, if the in-memory buffer +// has grown large enough. Any error will be returned. +func (bw *bufferedWriter) Sync() (err error) { + // Try to use local storage + const chunk = 1 << 24 + if bw.buf.Len() < chunk { + return nil + } + if bw.tmp == nil { + bw.tmp, err = ioutil.TempFile(os.TempDir(), "excelize-") + if err != nil { + // can not use local storage + return nil + } + } + return bw.Flush() +} + +// Flush the entire in-memory buffer to the temp file, if a temp file is being +// used. +func (bw *bufferedWriter) Flush() error { + if bw.tmp == nil { + return nil + } + _, err := bw.buf.WriteTo(bw.tmp) + if err != nil { + return err + } + bw.buf.Reset() + return nil +} + +// Close the underlying temp file and reset the in-memory buffer. +func (bw *bufferedWriter) Close() error { + bw.buf.Reset() + if bw.tmp == nil { + return nil } - return fmt.Sprintf(`<c r="%s" t="str"><v>%s</v></c>`, axis, value) + defer os.Remove(bw.tmp.Name()) + return bw.tmp.Close() } |