From 7965e1231b736f8507f93f6383b76332eb15ff5f Mon Sep 17 00:00:00 2001 From: xuri Date: Sat, 23 Nov 2019 04:13:59 +0800 Subject: Resolve #146, make the GetRow function read data as streaming. Ref: #382, #515 --- rows.go | 122 ++++++++++++++++++++++++++++++++++++++++++++++------------- rows_test.go | 23 ++++++++--- sheet.go | 63 ++++++++++++++++-------------- 3 files changed, 146 insertions(+), 62 deletions(-) diff --git a/rows.go b/rows.go index c8ad2b1..69a9846 100644 --- a/rows.go +++ b/rows.go @@ -10,6 +10,7 @@ package excelize import ( + "bytes" "encoding/xml" "errors" "fmt" @@ -49,16 +50,19 @@ func (f *File) GetRows(sheet string) ([][]string, error) { // Rows defines an iterator to a sheet type Rows struct { - err error - f *File - rows []xlsxRow - curRow int + err error + f *File + rows []xlsxRow + sheet string + curRow int + totalRow int + decoder *xml.Decoder } // Next will return true if find the next row element. func (rows *Rows) Next() bool { rows.curRow++ - return rows.curRow <= len(rows.rows) + return rows.curRow <= rows.totalRow } // Error will return the error when the find next row element @@ -68,19 +72,57 @@ func (rows *Rows) Error() error { // Columns return the current row's column values func (rows *Rows) Columns() ([]string, error) { - curRow := rows.rows[rows.curRow-1] - - columns := make([]string, len(curRow.C)) + var ( + err error + inElement string + row, cellCol int + columns []string + ) d := rows.f.sharedStringsReader() - for _, colCell := range curRow.C { - col, _, err := CellNameToCoordinates(colCell.R) - if err != nil { - return columns, err + for { + token, _ := rows.decoder.Token() + if token == nil { + break + } + switch startElement := token.(type) { + case xml.StartElement: + inElement = startElement.Name.Local + if inElement == "row" { + for _, attr := range startElement.Attr { + if attr.Name.Local == "r" { + row, err = strconv.Atoi(attr.Value) + if err != nil { + return columns, err + } + if row > rows.curRow { + return columns, err + } + } + } + } + if inElement == "c" { + colCell := xlsxC{} + _ = rows.decoder.DecodeElement(&colCell, &startElement) + cellCol, _, err = CellNameToCoordinates(colCell.R) + if err != nil { + return columns, err + } + blank := cellCol - len(columns) + for i := 1; i < blank; i++ { + columns = append(columns, "") + } + val, _ := colCell.getValueFrom(rows.f, d) + columns = append(columns, val) + } + case xml.EndElement: + inElement = startElement.Name.Local + if inElement == "row" { + return columns, err + } + default: } - val, _ := colCell.getValueFrom(rows.f, d) - columns[col-1] = val } - return columns, nil + return columns, err } // ErrSheetNotExist defines an error of sheet is not exist @@ -89,7 +131,7 @@ type ErrSheetNotExist struct { } func (err ErrSheetNotExist) Error() string { - return fmt.Sprintf("Sheet %s is not exist", string(err.SheetName)) + return fmt.Sprintf("sheet %s is not exist", string(err.SheetName)) } // Rows return a rows iterator. For example: @@ -104,22 +146,48 @@ func (err ErrSheetNotExist) Error() string { // } // func (f *File) Rows(sheet string) (*Rows, error) { - xlsx, err := f.workSheetReader(sheet) - if err != nil { - return nil, err - } name, ok := f.sheetMap[trimSheetName(sheet)] if !ok { return nil, ErrSheetNotExist{sheet} } - if xlsx != nil { - data := f.readXML(name) - f.saveFileList(name, replaceWorkSheetsRelationshipsNameSpaceBytes(namespaceStrictToTransitional(data))) + if f.Sheet[name] != nil { + // flush data + output, _ := xml.Marshal(f.Sheet[name]) + f.saveFileList(name, replaceWorkSheetsRelationshipsNameSpaceBytes(output)) + } + var ( + err error + inElement string + row int + rows Rows + ) + decoder := xml.NewDecoder(bytes.NewReader(f.readXML(name))) + for { + token, _ := decoder.Token() + if token == nil { + break + } + switch startElement := token.(type) { + case xml.StartElement: + inElement = startElement.Name.Local + if inElement == "row" { + for _, attr := range startElement.Attr { + if attr.Name.Local == "r" { + row, err = strconv.Atoi(attr.Value) + if err != nil { + return &rows, err + } + } + } + rows.totalRow = row + } + default: + } } - return &Rows{ - f: f, - rows: xlsx.SheetData.Row, - }, nil + rows.f = f + rows.sheet = name + rows.decoder = xml.NewDecoder(bytes.NewReader(f.readXML(name))) + return &rows, nil } // SetRowHeight provides a function to set the height of a single row. For diff --git a/rows_test.go b/rows_test.go index ff70118..47c9d96 100644 --- a/rows_test.go +++ b/rows_test.go @@ -47,10 +47,10 @@ func TestRowsIterator(t *testing.T) { sheet2 = "Sheet2" expectedNumRow = 11 ) - xlsx, err := OpenFile(filepath.Join("test", "Book1.xlsx")) + f, err := OpenFile(filepath.Join("test", "Book1.xlsx")) require.NoError(t, err) - rows, err := xlsx.Rows(sheet2) + rows, err := f.Rows(sheet2) require.NoError(t, err) var rowCount int for rows.Next() { @@ -59,9 +59,20 @@ func TestRowsIterator(t *testing.T) { } assert.Equal(t, expectedNumRow, rowCount) - rows = &Rows{f: xlsx, rows: []xlsxRow{{C: []xlsxC{{R: "A"}}}}, curRow: 1} - _, err = rows.Columns() - assert.EqualError(t, err, `cannot convert cell "A" to coordinates: invalid cell name "A"`) + // Valued cell sparse distribution test + f = NewFile() + cells := []string{"C1", "E1", "A3", "B3", "C3", "D3", "E3"} + for _, cell := range cells { + f.SetCellValue("Sheet1", cell, 1) + } + rows, err = f.Rows("Sheet1") + require.NoError(t, err) + rowCount = 0 + for rows.Next() { + rowCount++ + require.True(t, rowCount <= 3, "rowCount is greater than expected") + } + assert.Equal(t, 3, rowCount) } func TestRowsError(t *testing.T) { @@ -697,7 +708,7 @@ func TestDuplicateRowInvalidRownum(t *testing.T) { func TestErrSheetNotExistError(t *testing.T) { err := ErrSheetNotExist{SheetName: "Sheet1"} - assert.EqualValues(t, err.Error(), "Sheet Sheet1 is not exist") + assert.EqualValues(t, err.Error(), "sheet Sheet1 is not exist") } func BenchmarkRows(b *testing.B) { diff --git a/sheet.go b/sheet.go index 43c7cc0..c2e6bf6 100644 --- a/sheet.go +++ b/sheet.go @@ -699,15 +699,12 @@ func (f *File) SearchSheet(sheet, value string, reg ...bool) ([]string, error) { for _, r := range reg { regSearch = r } - xlsx, err := f.workSheetReader(sheet) - if err != nil { - return result, err - } name, ok := f.sheetMap[trimSheetName(sheet)] if !ok { - return result, nil + return result, ErrSheetNotExist{sheet} } - if xlsx != nil { + if f.Sheet[name] != nil { + // flush data output, _ := xml.Marshal(f.Sheet[name]) f.saveFileList(name, replaceWorkSheetsRelationshipsNameSpaceBytes(output)) } @@ -718,9 +715,10 @@ func (f *File) SearchSheet(sheet, value string, reg ...bool) ([]string, error) { // cell value, and regular expression. func (f *File) searchSheet(name, value string, regSearch bool) ([]string, error) { var ( - inElement string - result []string - r xlsxRow + err error + cellName, inElement string + result []string + cellCol, row int ) d := f.sharedStringsReader() decoder := xml.NewDecoder(bytes.NewReader(f.readXML(name))) @@ -733,31 +731,38 @@ func (f *File) searchSheet(name, value string, regSearch bool) ([]string, error) case xml.StartElement: inElement = startElement.Name.Local if inElement == "row" { - r = xlsxRow{} - _ = decoder.DecodeElement(&r, &startElement) - for _, colCell := range r.C { - val, _ := colCell.getValueFrom(f, d) - if regSearch { - regex := regexp.MustCompile(value) - if !regex.MatchString(val) { - continue - } - } else { - if val != value { - continue + for _, attr := range startElement.Attr { + if attr.Name.Local == "r" { + row, err = strconv.Atoi(attr.Value) + if err != nil { + return result, err } } - - cellCol, _, err := CellNameToCoordinates(colCell.R) - if err != nil { - return result, err + } + } + if inElement == "c" { + colCell := xlsxC{} + _ = decoder.DecodeElement(&colCell, &startElement) + val, _ := colCell.getValueFrom(f, d) + if regSearch { + regex := regexp.MustCompile(value) + if !regex.MatchString(val) { + continue } - cellName, err := CoordinatesToCellName(cellCol, r.R) - if err != nil { - return result, err + } else { + if val != value { + continue } - result = append(result, cellName) } + cellCol, _, err = CellNameToCoordinates(colCell.R) + if err != nil { + return result, err + } + cellName, err = CoordinatesToCellName(cellCol, row) + if err != nil { + return result, err + } + result = append(result, cellName) } default: } -- cgit v1.2.1