From 5c87effc7e6c97fff36a56dea1afac8a2f06fb37 Mon Sep 17 00:00:00 2001 From: Cameron Howey <cameronh@emailline.net> Date: Sat, 28 Dec 2019 20:45:10 -0800 Subject: Stream to Excel table (#530) * Support all datatypes for StreamWriter * Support setting styles with StreamWriter **NOTE:** This is a breaking change. Values are now explicitly passed as a []interface{} for simplicity. We also let styles to be set at the same time. * Create function to write stream into a table * Write rows directly to buffer Avoiding the xml.Encoder makes the streamer faster and use less memory. Using the included benchmark, the results went from: > BenchmarkStreamWriter-4 514 2576155 ns/op 454918 B/op 6592 allocs/op down to: > BenchmarkStreamWriter-4 1614 777480 ns/op 147608 B/op 5570 allocs/op * Use AddTable instead of SetTable This requires reading the cells after they have been written, which requires additional structure for the temp file. As a bonus, we now efficiently allocate only one buffer when reading the file back into memory, using the same approach as ioutil.ReadFile. * Use an exported Cell type to handle inline styles for StreamWriter --- adjust.go | 23 ++- cell.go | 100 ++++++++---- stream.go | 495 ++++++++++++++++++++++++++++++++++++++++++++------------- stream_test.go | 82 +++++++--- 4 files changed, 540 insertions(+), 160 deletions(-) diff --git a/adjust.go b/adjust.go index c15d4b4..69ded1b 100644 --- a/adjust.go +++ b/adjust.go @@ -196,10 +196,14 @@ func (f *File) adjustAutoFilterHelper(dir adjustDirection, coordinates []int, nu // areaRefToCoordinates provides a function to convert area reference to a // pair of coordinates. func (f *File) areaRefToCoordinates(ref string) ([]int, error) { - coordinates := make([]int, 4) rng := strings.Split(ref, ":") - firstCell := rng[0] - lastCell := rng[1] + return areaRangeToCoordinates(rng[0], rng[1]) +} + +// areaRangeToCoordinates provides a function to convert cell range to a +// pair of coordinates. +func areaRangeToCoordinates(firstCell, lastCell string) ([]int, error) { + coordinates := make([]int, 4) var err error coordinates[0], coordinates[1], err = CellNameToCoordinates(firstCell) if err != nil { @@ -209,6 +213,19 @@ func (f *File) areaRefToCoordinates(ref string) ([]int, error) { return coordinates, err } +func sortCoordinates(coordinates []int) error { + if len(coordinates) != 4 { + return errors.New("coordinates length must be 4") + } + if coordinates[2] < coordinates[0] { + coordinates[2], coordinates[0] = coordinates[0], coordinates[2] + } + if coordinates[3] < coordinates[1] { + coordinates[3], coordinates[1] = coordinates[1], coordinates[3] + } + return nil +} + // coordinatesToAreaRef provides a function to convert a pair of coordinates // to area reference. func (f *File) coordinatesToAreaRef(coordinates []int) (string, error) { diff --git a/cell.go b/cell.go index da091ee..1aeddc1 100644 --- a/cell.go +++ b/cell.go @@ -83,7 +83,8 @@ func (f *File) SetCellValue(sheet, axis string, value interface{}) error { case []byte: err = f.SetCellStr(sheet, axis, string(v)) case time.Duration: - err = f.SetCellDefault(sheet, axis, strconv.FormatFloat(v.Seconds()/86400.0, 'f', -1, 32)) + _, d := setCellDuration(v) + err = f.SetCellDefault(sheet, axis, d) if err != nil { return err } @@ -131,28 +132,50 @@ func (f *File) setCellIntFunc(sheet, axis string, value interface{}) error { // setCellTimeFunc provides a method to process time type of value for // SetCellValue. func (f *File) setCellTimeFunc(sheet, axis string, value time.Time) error { - excelTime, err := timeToExcelTime(value) + xlsx, err := f.workSheetReader(sheet) if err != nil { return err } - if excelTime > 0 { - err = f.SetCellDefault(sheet, axis, strconv.FormatFloat(excelTime, 'f', -1, 64)) - if err != nil { - return err - } + cellData, col, _, err := f.prepareCell(xlsx, sheet, axis) + if err != nil { + return err + } + cellData.S = f.prepareCellStyle(xlsx, col, cellData.S) + + var isNum bool + cellData.T, cellData.V, isNum, err = setCellTime(value) + if err != nil { + return err + } + if isNum { err = f.setDefaultTimeStyle(sheet, axis, 22) if err != nil { return err } - } else { - err = f.SetCellStr(sheet, axis, value.Format(time.RFC3339Nano)) - if err != nil { - return err - } } return err } +func setCellTime(value time.Time) (t string, b string, isNum bool, err error) { + var excelTime float64 + excelTime, err = timeToExcelTime(value) + if err != nil { + return + } + isNum = excelTime > 0 + if isNum { + t, b = setCellDefault(strconv.FormatFloat(excelTime, 'f', -1, 64)) + } else { + t, b = setCellDefault(value.Format(time.RFC3339Nano)) + } + return +} + +func setCellDuration(value time.Duration) (t string, v string) { + v = strconv.FormatFloat(value.Seconds()/86400.0, 'f', -1, 32) + return +} + // SetCellInt provides a function to set int type value of a cell by given // worksheet name, cell coordinates and cell value. func (f *File) SetCellInt(sheet, axis string, value int) error { @@ -165,11 +188,15 @@ func (f *File) SetCellInt(sheet, axis string, value int) error { return err } cellData.S = f.prepareCellStyle(xlsx, col, cellData.S) - cellData.T = "" - cellData.V = strconv.Itoa(value) + cellData.T, cellData.V = setCellInt(value) return err } +func setCellInt(value int) (t string, v string) { + v = strconv.Itoa(value) + return +} + // SetCellBool provides a function to set bool type value of a cell by given // worksheet name, cell name and cell value. func (f *File) SetCellBool(sheet, axis string, value bool) error { @@ -182,13 +209,18 @@ func (f *File) SetCellBool(sheet, axis string, value bool) error { return err } cellData.S = f.prepareCellStyle(xlsx, col, cellData.S) - cellData.T = "b" + cellData.T, cellData.V = setCellBool(value) + return err +} + +func setCellBool(value bool) (t string, v string) { + t = "b" if value { - cellData.V = "1" + v = "1" } else { - cellData.V = "0" + v = "0" } - return err + return } // SetCellFloat sets a floating point value into a cell. The prec parameter @@ -210,11 +242,15 @@ func (f *File) SetCellFloat(sheet, axis string, value float64, prec, bitSize int return err } cellData.S = f.prepareCellStyle(xlsx, col, cellData.S) - cellData.T = "" - cellData.V = strconv.FormatFloat(value, 'f', prec, bitSize) + cellData.T, cellData.V = setCellFloat(value, prec, bitSize) return err } +func setCellFloat(value float64, prec, bitSize int) (t string, v string) { + v = strconv.FormatFloat(value, 'f', prec, bitSize) + return +} + // SetCellStr provides a function to set string type value of a cell. Total // number of characters that a cell can contain 32767 characters. func (f *File) SetCellStr(sheet, axis, value string) error { @@ -226,21 +262,25 @@ func (f *File) SetCellStr(sheet, axis, value string) error { if err != nil { return err } + cellData.S = f.prepareCellStyle(xlsx, col, cellData.S) + cellData.T, cellData.V, cellData.XMLSpace = setCellStr(value) + return err +} + +func setCellStr(value string) (t string, v string, ns xml.Attr) { if len(value) > 32767 { value = value[0:32767] } // Leading and ending space(s) character detection. if len(value) > 0 && (value[0] == 32 || value[len(value)-1] == 32) { - cellData.XMLSpace = xml.Attr{ + ns = xml.Attr{ Name: xml.Name{Space: NameSpaceXML, Local: "space"}, Value: "preserve", } } - - cellData.S = f.prepareCellStyle(xlsx, col, cellData.S) - cellData.T = "str" - cellData.V = value - return err + t = "str" + v = value + return } // SetCellDefault provides a function to set string type value of a cell as @@ -255,11 +295,15 @@ func (f *File) SetCellDefault(sheet, axis, value string) error { return err } cellData.S = f.prepareCellStyle(xlsx, col, cellData.S) - cellData.T = "" - cellData.V = value + cellData.T, cellData.V = setCellDefault(value) return err } +func setCellDefault(value string) (t string, v string) { + v = value + return +} + // GetCellFormula provides a function to get formula from cell by given // worksheet name and axis in XLSX file. func (f *File) GetCellFormula(sheet, axis string) (string, error) { diff --git a/stream.go b/stream.go index 1b1bbe3..e981f78 100644 --- a/stream.go +++ b/stream.go @@ -12,20 +12,23 @@ package excelize import ( "bytes" "encoding/xml" - "errors" "fmt" + "io" "io/ioutil" "os" "reflect" + "strconv" + "strings" + "time" ) // StreamWriter defined the type of stream writer. type StreamWriter struct { - tmpFile *os.File - File *File - Sheet string - SheetID int - SheetData bytes.Buffer + File *File + Sheet string + SheetID int + rawData bufferedWriter + tableParts string } // NewStreamWriter return stream writer struct by given worksheet name for @@ -46,7 +49,7 @@ type StreamWriter struct { // row[colID] = rand.Intn(640000) // } // cell, _ := excelize.CoordinatesToCellName(1, rowID) -// if err := streamWriter.SetRow(cell, &row); err != nil { +// if err := streamWriter.SetRow(cell, row, nil); err != nil { // panic(err) // } // } @@ -62,157 +65,433 @@ func (f *File) NewStreamWriter(sheet string) (*StreamWriter, error) { if sheetID == 0 { return nil, fmt.Errorf("sheet %s is not exist", sheet) } - rsw := &StreamWriter{ + sw := &StreamWriter{ File: f, Sheet: sheet, SheetID: sheetID, } - rsw.SheetData.WriteString("<sheetData>") - return rsw, nil + + ws, err := f.workSheetReader(sheet) + if err != nil { + return nil, err + } + sw.rawData.WriteString(XMLHeader + `<worksheet` + templateNamespaceIDMap) + bulkAppendOtherFields(&sw.rawData, ws, "XMLName", "SheetData", "TableParts") + sw.rawData.WriteString(`<sheetData>`) + return sw, nil } -// SetRow writes an array to streaming row by given worksheet name, starting -// coordinate and a pointer to array type 'slice'. Note that, cell settings -// with styles are not supported currently and after set rows, you must call the -// 'Flush' method to end the streaming writing process. The following -// shows the supported data types: +// AddTable creates an Excel table for the StreamWriter using the given +// coordinate area and format set. For example, create a table of A1:D5: // -// int -// string +// err := sw.AddTable("A1", "D5", ``) // -func (sw *StreamWriter) SetRow(axis string, slice interface{}) error { - col, row, err := CellNameToCoordinates(axis) +// Create a table of F2:H6 with format set: +// +// err := sw.AddTable("F2", "H6", `{"table_name":"table","table_style":"TableStyleMedium2","show_first_column":true,"show_last_column":true,"show_row_stripes":false,"show_column_stripes":true}`) +// +// Note that the table must be at least two lines including the header. The +// header cells must contain strings and must be unique. +// +// Currently only one table is allowed for a StreamWriter. AddTable must be +// called after the rows are written but before Flush. +// +// See File.AddTable for details on the table format. +func (sw *StreamWriter) AddTable(hcell, vcell, format string) error { + formatSet, err := parseFormatTableSet(format) if err != nil { return err } - // Make sure 'slice' is a Ptr to Slice - v := reflect.ValueOf(slice) - if v.Kind() != reflect.Ptr || v.Elem().Kind() != reflect.Slice { - return errors.New("pointer to slice expected") + + coordinates, err := areaRangeToCoordinates(hcell, vcell) + if err != nil { + return err } - v = v.Elem() - sw.SheetData.WriteString(fmt.Sprintf(`<row r="%d">`, row)) - for i := 0; i < v.Len(); i++ { - axis, err := CoordinatesToCellName(col+i, row) + sortCoordinates(coordinates) + + // Correct the minimum number of rows, the table at least two lines. + if coordinates[1] == coordinates[3] { + coordinates[3]++ + } + + // Correct table reference coordinate area, such correct C1:B3 to B1:C3. + ref, err := sw.File.coordinatesToAreaRef(coordinates) + if err != nil { + return err + } + + // create table columns using the first row + tableHeaders, err := sw.getRowValues(coordinates[1], coordinates[0], coordinates[2]) + if err != nil { + return err + } + tableColumn := make([]*xlsxTableColumn, len(tableHeaders)) + for i, name := range tableHeaders { + tableColumn[i] = &xlsxTableColumn{ + ID: i + 1, + Name: name, + } + } + + tableID := sw.File.countTables() + 1 + + name := formatSet.TableName + if name == "" { + name = "Table" + strconv.Itoa(tableID) + } + + table := xlsxTable{ + XMLNS: NameSpaceSpreadSheet, + ID: tableID, + Name: name, + DisplayName: name, + Ref: ref, + AutoFilter: &xlsxAutoFilter{ + Ref: ref, + }, + TableColumns: &xlsxTableColumns{ + Count: len(tableColumn), + TableColumn: tableColumn, + }, + TableStyleInfo: &xlsxTableStyleInfo{ + Name: formatSet.TableStyle, + ShowFirstColumn: formatSet.ShowFirstColumn, + ShowLastColumn: formatSet.ShowLastColumn, + ShowRowStripes: formatSet.ShowRowStripes, + ShowColumnStripes: formatSet.ShowColumnStripes, + }, + } + + sheetRelationshipsTableXML := "../tables/table" + strconv.Itoa(tableID) + ".xml" + tableXML := strings.Replace(sheetRelationshipsTableXML, "..", "xl", -1) + + // Add first table for given sheet. + sheetPath, _ := sw.File.sheetMap[trimSheetName(sw.Sheet)] + sheetRels := "xl/worksheets/_rels/" + strings.TrimPrefix(sheetPath, "xl/worksheets/") + ".rels" + rID := sw.File.addRels(sheetRels, SourceRelationshipTable, sheetRelationshipsTableXML, "") + + sw.tableParts = fmt.Sprintf(`<tableParts count="1"><tablePart r:id="rId%d"></tablePart></tableParts>`, rID) + + sw.File.addContentTypePart(tableID, "table") + + b, _ := xml.Marshal(table) + sw.File.saveFileList(tableXML, b) + return nil +} + +// Extract values from a row in the StreamWriter. +func (sw *StreamWriter) getRowValues(hrow, hcol, vcol int) (res []string, err error) { + res = make([]string, vcol-hcol+1) + + r, err := sw.rawData.Reader() + if err != nil { + return nil, err + } + + dec := xml.NewDecoder(r) + for { + token, err := dec.Token() + if err == io.EOF { + return res, nil + } if err != nil { - return err + return nil, err } - switch val := v.Index(i).Interface().(type) { - case int: - sw.SheetData.WriteString(fmt.Sprintf(`<c r="%s"><v>%d</v></c>`, axis, val)) - case string: - sw.SheetData.WriteString(sw.setCellStr(axis, val)) - default: - sw.SheetData.WriteString(sw.setCellStr(axis, fmt.Sprint(val))) + startElement, ok := getRowElement(token, hrow) + if !ok { + continue } - } - sw.SheetData.WriteString(`</row>`) - // Try to use local storage - chunk := 1 << 24 - if sw.SheetData.Len() >= chunk { - if sw.tmpFile == nil { - err := sw.createTmp() + // decode cells + var row xlsxRow + if err := dec.DecodeElement(&row, &startElement); err != nil { + return nil, err + } + for _, c := range row.C { + col, _, err := CellNameToCoordinates(c.R) if err != nil { - // can not use local storage - return nil + return nil, err } + if col < hcol || col > vcol { + continue + } + res[col-hcol] = c.V } - // use local storage - _, err := sw.tmpFile.Write(sw.SheetData.Bytes()) - if err != nil { - return nil + return res, nil + } +} + +// Check if the token is an XLSX row with the matching row number. +func getRowElement(token xml.Token, hrow int) (startElement xml.StartElement, ok bool) { + startElement, ok = token.(xml.StartElement) + if !ok { + return + } + ok = startElement.Name.Local == "row" + if !ok { + return + } + ok = false + for _, attr := range startElement.Attr { + if attr.Name.Local != "r" { + continue + } + row, _ := strconv.Atoi(attr.Value) + if row == hrow { + ok = true + return } - sw.SheetData.Reset() } - return err + return } -// Flush ending the streaming writing process. -func (sw *StreamWriter) Flush() error { - sw.SheetData.WriteString(`</sheetData>`) +// Cell can be used directly in StreamWriter.SetRow to specify a style and +// a value. +type Cell struct { + StyleID int + Value interface{} +} - ws, err := sw.File.workSheetReader(sw.Sheet) +// SetRow writes an array to stream rows by giving a worksheet name, starting +// coordinate and a pointer to an array of values. Note that you must call the +// 'Flush' method to end the streaming writing process. +// +// As a special case, if Cell is used as a value, then the Cell.StyleID will be +// applied to that cell. +func (sw *StreamWriter) SetRow(axis string, values []interface{}) error { + col, row, err := CellNameToCoordinates(axis) if err != nil { return err } - sheetXML := fmt.Sprintf("xl/worksheets/sheet%d.xml", sw.SheetID) - delete(sw.File.Sheet, sheetXML) - delete(sw.File.checked, sheetXML) - var sheetDataByte []byte - if sw.tmpFile != nil { - // close the local storage file - if err = sw.tmpFile.Close(); err != nil { - return err - } - file, err := os.Open(sw.tmpFile.Name()) + fmt.Fprintf(&sw.rawData, `<row r="%d">`, row) + for i, val := range values { + axis, err := CoordinatesToCellName(col+i, row) if err != nil { return err } - - sheetDataByte, err = ioutil.ReadAll(file) - if err != nil { - return err + c := xlsxC{R: axis} + if v, ok := val.(Cell); ok { + c.S = v.StyleID + val = v.Value + } else if v, ok := val.(*Cell); ok && v != nil { + c.S = v.StyleID + val = v.Value } - - if err := file.Close(); err != nil { - return err + switch val := val.(type) { + case int: + c.T, c.V = setCellInt(val) + case int8: + c.T, c.V = setCellInt(int(val)) + case int16: + c.T, c.V = setCellInt(int(val)) + case int32: + c.T, c.V = setCellInt(int(val)) + case int64: + c.T, c.V = setCellInt(int(val)) + case uint: + c.T, c.V = setCellInt(int(val)) + case uint8: + c.T, c.V = setCellInt(int(val)) + case uint16: + c.T, c.V = setCellInt(int(val)) + case uint32: + c.T, c.V = setCellInt(int(val)) + case uint64: + c.T, c.V = setCellInt(int(val)) + case float32: + c.T, c.V = setCellFloat(float64(val), -1, 32) + case float64: + c.T, c.V = setCellFloat(val, -1, 64) + case string: + c.T, c.V, c.XMLSpace = setCellStr(val) + case []byte: + c.T, c.V, c.XMLSpace = setCellStr(string(val)) + case time.Duration: + c.T, c.V = setCellDuration(val) + case time.Time: + c.T, c.V, _, err = setCellTime(val) + case bool: + c.T, c.V = setCellBool(val) + case nil: + c.T, c.V, c.XMLSpace = setCellStr("") + default: + c.T, c.V, c.XMLSpace = setCellStr(fmt.Sprint(val)) } - - err = os.Remove(sw.tmpFile.Name()) if err != nil { return err } + writeCell(&sw.rawData, c) } + sw.rawData.WriteString(`</row>`) + return sw.rawData.Sync() +} - sheetDataByte = append(sheetDataByte, sw.SheetData.Bytes()...) - replaceMap := map[string][]byte{ - "XMLName": {}, - "SheetData": sheetDataByte, +func writeCell(buf *bufferedWriter, c xlsxC) { + buf.WriteString(`<c`) + if c.XMLSpace.Value != "" { + fmt.Fprintf(buf, ` xml:%s="%s"`, c.XMLSpace.Name.Local, c.XMLSpace.Value) } - sw.SheetData.Reset() - sw.File.XLSX[fmt.Sprintf("xl/worksheets/sheet%d.xml", sw.SheetID)] = - StreamMarshalSheet(ws, replaceMap) - return err + fmt.Fprintf(buf, ` r="%s"`, c.R) + if c.S != 0 { + fmt.Fprintf(buf, ` s="%d"`, c.S) + } + if c.T != "" { + fmt.Fprintf(buf, ` t="%s"`, c.T) + } + buf.WriteString(`>`) + if c.V != "" { + buf.WriteString(`<v>`) + xml.EscapeText(buf, []byte(c.V)) + buf.WriteString(`</v>`) + } + buf.WriteString(`</c>`) } -// createTmp creates a temporary file in the operating system default -// temporary directory. -func (sw *StreamWriter) createTmp() (err error) { - sw.tmpFile, err = ioutil.TempFile(os.TempDir(), "excelize-") - return err +// Flush ending the streaming writing process. +func (sw *StreamWriter) Flush() error { + sw.rawData.WriteString(`</sheetData>`) + sw.rawData.WriteString(sw.tableParts) + sw.rawData.WriteString(`</worksheet>`) + if err := sw.rawData.Flush(); err != nil { + return err + } + + sheetXML := fmt.Sprintf("xl/worksheets/sheet%d.xml", sw.SheetID) + delete(sw.File.Sheet, sheetXML) + delete(sw.File.checked, sheetXML) + + defer sw.rawData.Close() + b, err := sw.rawData.Bytes() + if err != nil { + return err + } + sw.File.XLSX[sheetXML] = b + return nil } -// StreamMarshalSheet provides method to serialization worksheets by field as -// streaming. -func StreamMarshalSheet(ws *xlsxWorksheet, replaceMap map[string][]byte) []byte { +// bulkAppendOtherFields bulk-appends fields in a worksheet, skipping the +// specified field names. +func bulkAppendOtherFields(w io.Writer, ws *xlsxWorksheet, skip ...string) { + skipMap := make(map[string]struct{}) + for _, name := range skip { + skipMap[name] = struct{}{} + } + s := reflect.ValueOf(ws).Elem() typeOfT := s.Type() - var marshalResult []byte - marshalResult = append(marshalResult, []byte(XMLHeader+`<worksheet`+templateNamespaceIDMap)...) + enc := xml.NewEncoder(w) for i := 0; i < s.NumField(); i++ { - content, ok := replaceMap[typeOfT.Field(i).Name] - if ok { - marshalResult = append(marshalResult, content...) + f := s.Field(i) + if _, ok := skipMap[typeOfT.Field(i).Name]; ok { continue } - out, _ := xml.Marshal(s.Field(i).Interface()) - marshalResult = append(marshalResult, out...) + enc.Encode(f.Interface()) } - marshalResult = append(marshalResult, []byte(`</worksheet>`)...) - return marshalResult } -// setCellStr provides a function to set string type value of a cell as -// streaming. Total number of characters that a cell can contain 32767 -// characters. -func (sw *StreamWriter) setCellStr(axis, value string) string { - if len(value) > 32767 { - value = value[0:32767] +// bufferedWriter uses a temp file to store an extended buffer. Writes are +// always made to an in-memory buffer, which will always succeed. The buffer +// is written to the temp file with Sync, which may return an error. Therefore, +// Sync should be periodically called and the error checked. +type bufferedWriter struct { + tmp *os.File + buf bytes.Buffer +} + +// Write to the in-memory buffer. The err is always nil. +func (bw *bufferedWriter) Write(p []byte) (n int, err error) { + return bw.buf.Write(p) +} + +// WriteString wites to the in-memory buffer. The err is always nil. +func (bw *bufferedWriter) WriteString(p string) (n int, err error) { + return bw.buf.WriteString(p) +} + +// Reader provides read-access to the underlying buffer/file. +func (bw *bufferedWriter) Reader() (io.Reader, error) { + if bw.tmp == nil { + return bytes.NewReader(bw.buf.Bytes()), nil + } + if err := bw.Flush(); err != nil { + return nil, err + } + fi, err := bw.tmp.Stat() + if err != nil { + return nil, err } - // Leading and ending space(s) character detection. - if len(value) > 0 && (value[0] == 32 || value[len(value)-1] == 32) { - return fmt.Sprintf(`<c xml:space="preserve" r="%s" t="str"><v>%s</v></c>`, axis, value) + // os.File.ReadAt does not affect the cursor position and is safe to use here + return io.NewSectionReader(bw.tmp, 0, fi.Size()), nil +} + +// Bytes returns the entire content of the bufferedWriter. If a temp file is +// used, Bytes will efficiently allocate a buffer to prevent re-allocations. +func (bw *bufferedWriter) Bytes() ([]byte, error) { + if bw.tmp == nil { + return bw.buf.Bytes(), nil + } + + if err := bw.Flush(); err != nil { + return nil, err + } + + var buf bytes.Buffer + if fi, err := bw.tmp.Stat(); err == nil { + if size := fi.Size() + bytes.MinRead; size > bytes.MinRead { + if int64(int(size)) == size { + buf.Grow(int(size)) + } else { + return nil, bytes.ErrTooLarge + } + } + } + + if _, err := bw.tmp.Seek(0, 0); err != nil { + return nil, err + } + + _, err := buf.ReadFrom(bw.tmp) + return buf.Bytes(), err +} + +// Sync will write the in-memory buffer to a temp file, if the in-memory buffer +// has grown large enough. Any error will be returned. +func (bw *bufferedWriter) Sync() (err error) { + // Try to use local storage + const chunk = 1 << 24 + if bw.buf.Len() < chunk { + return nil + } + if bw.tmp == nil { + bw.tmp, err = ioutil.TempFile(os.TempDir(), "excelize-") + if err != nil { + // can not use local storage + return nil + } + } + return bw.Flush() +} + +// Flush the entire in-memory buffer to the temp file, if a temp file is being +// used. +func (bw *bufferedWriter) Flush() error { + if bw.tmp == nil { + return nil + } + _, err := bw.buf.WriteTo(bw.tmp) + if err != nil { + return err + } + bw.buf.Reset() + return nil +} + +// Close the underlying temp file and reset the in-memory buffer. +func (bw *bufferedWriter) Close() error { + bw.buf.Reset() + if bw.tmp == nil { + return nil } - return fmt.Sprintf(`<c r="%s" t="str"><v>%s</v></c>`, axis, value) + defer os.Remove(bw.tmp.Name()) + return bw.tmp.Close() } diff --git a/stream_test.go b/stream_test.go index 4482bd1..015f64b 100644 --- a/stream_test.go +++ b/stream_test.go @@ -1,6 +1,8 @@ package excelize import ( + "encoding/xml" + "fmt" "math/rand" "path/filepath" "strings" @@ -9,6 +11,25 @@ import ( "github.com/stretchr/testify/assert" ) +func BenchmarkStreamWriter(b *testing.B) { + file := NewFile() + + row := make([]interface{}, 10) + for colID := 0; colID < 10; colID++ { + row[colID] = colID + } + + for n := 0; n < b.N; n++ { + streamWriter, _ := file.NewStreamWriter("Sheet1") + for rowID := 10; rowID <= 110; rowID++ { + cell, _ := CoordinatesToCellName(1, rowID) + streamWriter.SetRow(cell, row) + } + } + + b.ReportAllocs() +} + func TestStreamWriter(t *testing.T) { file := NewFile() streamWriter, err := file.NewStreamWriter("Sheet1") @@ -17,16 +38,16 @@ func TestStreamWriter(t *testing.T) { // Test max characters in a cell. row := make([]interface{}, 1) row[0] = strings.Repeat("c", 32769) - assert.NoError(t, streamWriter.SetRow("A1", &row)) + assert.NoError(t, streamWriter.SetRow("A1", row)) // Test leading and ending space(s) character characters in a cell. row = make([]interface{}, 1) row[0] = " characters" - assert.NoError(t, streamWriter.SetRow("A2", &row)) + assert.NoError(t, streamWriter.SetRow("A2", row)) row = make([]interface{}, 1) row[0] = []byte("Word") - assert.NoError(t, streamWriter.SetRow("A3", &row)) + assert.NoError(t, streamWriter.SetRow("A3", row)) for rowID := 10; rowID <= 51200; rowID++ { row := make([]interface{}, 50) @@ -34,26 +55,13 @@ func TestStreamWriter(t *testing.T) { row[colID] = rand.Intn(640000) } cell, _ := CoordinatesToCellName(1, rowID) - assert.NoError(t, streamWriter.SetRow(cell, &row)) + assert.NoError(t, streamWriter.SetRow(cell, row)) } assert.NoError(t, streamWriter.Flush()) // Save xlsx file by the given path. assert.NoError(t, file.SaveAs(filepath.Join("test", "TestStreamWriter.xlsx"))) - // Test error exceptions - _, err = file.NewStreamWriter("SheetN") - assert.EqualError(t, err, "sheet SheetN is not exist") -} - -func TestFlush(t *testing.T) { - // Test error exceptions - file := NewFile() - streamWriter, err := file.NewStreamWriter("Sheet1") - assert.NoError(t, err) - streamWriter.Sheet = "SheetN" - assert.EqualError(t, streamWriter.Flush(), "sheet SheetN is not exist") - // Test close temporary file error file = NewFile() streamWriter, err = file.NewStreamWriter("Sheet1") @@ -64,17 +72,49 @@ func TestFlush(t *testing.T) { row[colID] = rand.Intn(640000) } cell, _ := CoordinatesToCellName(1, rowID) - assert.NoError(t, streamWriter.SetRow(cell, &row)) + assert.NoError(t, streamWriter.SetRow(cell, row)) } - assert.NoError(t, streamWriter.tmpFile.Close()) + assert.NoError(t, streamWriter.rawData.Close()) assert.Error(t, streamWriter.Flush()) } +func TestStreamTable(t *testing.T) { + file := NewFile() + streamWriter, err := file.NewStreamWriter("Sheet1") + assert.NoError(t, err) + + // Write some rows. We want enough rows to force a temp file (>16MB). + assert.NoError(t, streamWriter.SetRow("A1", []interface{}{"A", "B", "C"})) + row := []interface{}{1, 2, 3} + for r := 2; r < 10000; r++ { + assert.NoError(t, streamWriter.SetRow(fmt.Sprintf("A%d", r), row)) + } + + // Write a table. + assert.NoError(t, streamWriter.AddTable("A1", "C2", ``)) + assert.NoError(t, streamWriter.Flush()) + + // Verify the table has names. + var table xlsxTable + assert.NoError(t, xml.Unmarshal(file.XLSX["xl/tables/table1.xml"], &table)) + assert.Equal(t, "A", table.TableColumns.TableColumn[0].Name) + assert.Equal(t, "B", table.TableColumns.TableColumn[1].Name) + assert.Equal(t, "C", table.TableColumns.TableColumn[2].Name) +} + +func TestNewStreamWriter(t *testing.T) { + // Test error exceptions + file := NewFile() + _, err := file.NewStreamWriter("Sheet1") + assert.NoError(t, err) + _, err = file.NewStreamWriter("SheetN") + assert.EqualError(t, err, "sheet SheetN is not exist") +} + func TestSetRow(t *testing.T) { // Test error exceptions file := NewFile() streamWriter, err := file.NewStreamWriter("Sheet1") assert.NoError(t, err) - assert.EqualError(t, streamWriter.SetRow("A", &[]interface{}{}), `cannot convert cell "A" to coordinates: invalid cell name "A"`) - assert.EqualError(t, streamWriter.SetRow("A1", []interface{}{}), `pointer to slice expected`) + assert.EqualError(t, streamWriter.SetRow("A", []interface{}{}), `cannot convert cell "A" to coordinates: invalid cell name "A"`) } -- cgit v1.2.1