From 4daa6ed0b46fdd994e46403feb049b162eca19b8 Mon Sep 17 00:00:00 2001
From: xuri <xuri.me@gmail.com>
Date: Mon, 17 Jan 2022 08:05:52 +0800
Subject: Breaking change: remove `TotalRows` of row iterator and performance
 optimization

Reduce allocation memory 20%, and 80% GC times for the row's iterator
---
 cell_test.go |   8 +++
 rows.go      | 156 +++++++++++++++++++++++++----------------------------------
 rows_test.go |  12 +----
 3 files changed, 75 insertions(+), 101 deletions(-)

diff --git a/cell_test.go b/cell_test.go
index 21e5a44..b3bb997 100644
--- a/cell_test.go
+++ b/cell_test.go
@@ -340,6 +340,14 @@ func TestGetCellType(t *testing.T) {
 	assert.EqualError(t, err, newCellNameToCoordinatesError("A", newInvalidCellNameError("A")).Error())
 }
 
+func TestGetValueFrom(t *testing.T) {
+	f := NewFile()
+	c := xlsxC{T: "s"}
+	value, err := c.getValueFrom(f, f.sharedStringsReader(), false)
+	assert.NoError(t, err)
+	assert.Equal(t, "", value)
+}
+
 func TestGetCellFormula(t *testing.T) {
 	// Test get cell formula on not exist worksheet.
 	f := NewFile()
diff --git a/rows.go b/rows.go
index 0ced386..8072079 100644
--- a/rows.go
+++ b/rows.go
@@ -68,29 +68,49 @@ func (f *File) GetRows(sheet string, opts ...Options) ([][]string, error) {
 
 // Rows defines an iterator to a sheet.
 type Rows struct {
-	err                         error
-	curRow, totalRows, stashRow int
-	rawCellValue                bool
-	sheet                       string
-	f                           *File
-	tempFile                    *os.File
-	decoder                     *xml.Decoder
+	err                     error
+	curRow, seekRow         int
+	needClose, rawCellValue bool
+	sheet                   string
+	f                       *File
+	tempFile                *os.File
+	sst                     *xlsxSST
+	decoder                 *xml.Decoder
+	token                   xml.Token
 }
 
 // CurrentRow returns the row number that represents the current row.
 func (rows *Rows) CurrentRow() int {
-	return rows.curRow
-}
-
-// TotalRows returns the total rows count in the worksheet.
-func (rows *Rows) TotalRows() int {
-	return rows.totalRows
+	return rows.seekRow
 }
 
 // Next will return true if find the next row element.
 func (rows *Rows) Next() bool {
-	rows.curRow++
-	return rows.curRow <= rows.totalRows
+	rows.seekRow++
+	if rows.curRow >= rows.seekRow {
+		return true
+	}
+	for {
+		token, _ := rows.decoder.Token()
+		if token == nil {
+			return false
+		}
+		switch xmlElement := token.(type) {
+		case xml.StartElement:
+			if xmlElement.Name.Local == "row" {
+				rows.curRow++
+				if rowNum, _ := attrValToInt("r", xmlElement.Attr); rowNum != 0 {
+					rows.curRow = rowNum
+				}
+				rows.token = token
+				return true
+			}
+		case xml.EndElement:
+			if xmlElement.Name.Local == "sheetData" {
+				return false
+			}
+		}
+	}
 }
 
 // Error will return the error when the error occurs.
@@ -109,44 +129,40 @@ func (rows *Rows) Close() error {
 
 // Columns return the current row's column values.
 func (rows *Rows) Columns(opts ...Options) ([]string, error) {
-	var rowIterator rowXMLIterator
-	if rows.stashRow >= rows.curRow {
-		return rowIterator.columns, rowIterator.err
+	if rows.curRow > rows.seekRow {
+		return nil, nil
 	}
-	rows.rawCellValue = parseOptions(opts...).RawCellValue
-	rowIterator.rows = rows
-	rowIterator.d = rows.f.sharedStringsReader()
+	var rowIterator rowXMLIterator
+	var token xml.Token
+	rows.rawCellValue, rows.sst = parseOptions(opts...).RawCellValue, rows.f.sharedStringsReader()
 	for {
-		token, _ := rows.decoder.Token()
-		if token == nil {
+		if rows.token != nil {
+			token = rows.token
+		} else if token, _ = rows.decoder.Token(); token == nil {
 			break
 		}
 		switch xmlElement := token.(type) {
 		case xml.StartElement:
 			rowIterator.inElement = xmlElement.Name.Local
 			if rowIterator.inElement == "row" {
-				rowIterator.row++
-				if rowIterator.attrR, rowIterator.err = attrValToInt("r", xmlElement.Attr); rowIterator.attrR != 0 {
-					rowIterator.row = rowIterator.attrR
+				rowNum := 0
+				if rowNum, rowIterator.err = attrValToInt("r", xmlElement.Attr); rowNum != 0 {
+					rows.curRow = rowNum
+				} else if rows.token == nil {
+					rows.curRow++
 				}
-				if rowIterator.row > rowIterator.rows.curRow {
-					rowIterator.rows.stashRow = rowIterator.row - 1
+				if rows.curRow > rows.seekRow {
+					rows.token = nil
 					return rowIterator.columns, rowIterator.err
 				}
 			}
-			rowXMLHandler(&rowIterator, &xmlElement, rows.rawCellValue)
-			if rowIterator.err != nil {
+			if rows.rowXMLHandler(&rowIterator, &xmlElement, rows.rawCellValue); rowIterator.err != nil {
+				rows.token = nil
 				return rowIterator.columns, rowIterator.err
 			}
+			rows.token = nil
 		case xml.EndElement:
-			rowIterator.inElement = xmlElement.Name.Local
-			if rowIterator.row == 0 && rowIterator.rows.curRow > 1 {
-				rowIterator.row = rowIterator.rows.curRow
-			}
-			if rowIterator.inElement == "row" && rowIterator.row+1 < rowIterator.rows.curRow {
-				return rowIterator.columns, rowIterator.err
-			}
-			if rowIterator.inElement == "sheetData" {
+			if xmlElement.Name.Local == "sheetData" {
 				return rowIterator.columns, rowIterator.err
 			}
 		}
@@ -173,29 +189,25 @@ func (err ErrSheetNotExist) Error() string {
 
 // rowXMLIterator defined runtime use field for the worksheet row SAX parser.
 type rowXMLIterator struct {
-	err                 error
-	inElement           string
-	attrR, cellCol, row int
-	columns             []string
-	rows                *Rows
-	d                   *xlsxSST
+	err       error
+	inElement string
+	cellCol   int
+	columns   []string
 }
 
 // rowXMLHandler parse the row XML element of the worksheet.
-func rowXMLHandler(rowIterator *rowXMLIterator, xmlElement *xml.StartElement, raw bool) {
-	rowIterator.err = nil
+func (rows *Rows) rowXMLHandler(rowIterator *rowXMLIterator, xmlElement *xml.StartElement, raw bool) {
 	if rowIterator.inElement == "c" {
 		rowIterator.cellCol++
 		colCell := xlsxC{}
-		_ = rowIterator.rows.decoder.DecodeElement(&colCell, xmlElement)
+		_ = rows.decoder.DecodeElement(&colCell, xmlElement)
 		if colCell.R != "" {
 			if rowIterator.cellCol, _, rowIterator.err = CellNameToCoordinates(colCell.R); rowIterator.err != nil {
 				return
 			}
 		}
 		blank := rowIterator.cellCol - len(rowIterator.columns)
-		val, _ := colCell.getValueFrom(rowIterator.rows.f, rowIterator.d, raw)
-		if val != "" || colCell.F != nil {
+		if val, _ := colCell.getValueFrom(rows.f, rows.sst, raw); val != "" || colCell.F != nil {
 			rowIterator.columns = append(appendSpace(blank, rowIterator.columns), val)
 		}
 	}
@@ -236,48 +248,10 @@ func (f *File) Rows(sheet string) (*Rows, error) {
 		output, _ := xml.Marshal(worksheet)
 		f.saveFileList(name, f.replaceNameSpaceBytes(name, output))
 	}
-	var (
-		err       error
-		inElement string
-		row       int
-		rows      Rows
-		needClose bool
-		decoder   *xml.Decoder
-		tempFile  *os.File
-	)
-	if needClose, decoder, tempFile, err = f.xmlDecoder(name); needClose && err == nil {
-		defer tempFile.Close()
-	}
-	for {
-		token, _ := decoder.Token()
-		if token == nil {
-			break
-		}
-		switch xmlElement := token.(type) {
-		case xml.StartElement:
-			inElement = xmlElement.Name.Local
-			if inElement == "row" {
-				row++
-				for _, attr := range xmlElement.Attr {
-					if attr.Name.Local == "r" {
-						row, err = strconv.Atoi(attr.Value)
-						if err != nil {
-							return &rows, err
-						}
-					}
-				}
-				rows.totalRows = row
-			}
-		case xml.EndElement:
-			if xmlElement.Name.Local == "sheetData" {
-				rows.f = f
-				rows.sheet = name
-				_, rows.decoder, rows.tempFile, err = f.xmlDecoder(name)
-				return &rows, err
-			}
-		}
-	}
-	return &rows, nil
+	var err error
+	rows := Rows{f: f, sheet: name}
+	rows.needClose, rows.decoder, rows.tempFile, err = f.xmlDecoder(name)
+	return &rows, err
 }
 
 // getFromStringItem build shared string item offset list from system temporary
diff --git a/rows_test.go b/rows_test.go
index f6a3da4..0ac9271 100644
--- a/rows_test.go
+++ b/rows_test.go
@@ -44,13 +44,6 @@ func TestRows(t *testing.T) {
 	}
 	assert.NoError(t, f.Close())
 
-	f = NewFile()
-	f.Pkg.Store("xl/worksheets/sheet1.xml", []byte(`<worksheet><sheetData><row r="1"><c r="A1" t="s"><v>1</v></c></row><row r="A"><c r="2" t="str"><v>B</v></c></row></sheetData></worksheet>`))
-	f.Sheet.Delete("xl/worksheets/sheet1.xml")
-	delete(f.checked, "xl/worksheets/sheet1.xml")
-	_, err = f.Rows("Sheet1")
-	assert.EqualError(t, err, `strconv.Atoi: parsing "A": invalid syntax`)
-
 	f.Pkg.Store("xl/worksheets/sheet1.xml", nil)
 	_, err = f.Rows("Sheet1")
 	assert.NoError(t, err)
@@ -82,7 +75,6 @@ func TestRowsIterator(t *testing.T) {
 	for rows.Next() {
 		rowCount++
 		assert.Equal(t, rowCount, rows.CurrentRow())
-		assert.Equal(t, expectedNumRow, rows.TotalRows())
 		require.True(t, rowCount <= expectedNumRow, "rowCount is greater than expected")
 	}
 	assert.Equal(t, expectedNumRow, rowCount)
@@ -186,7 +178,7 @@ func TestColumns(t *testing.T) {
 	assert.NoError(t, err)
 
 	rows.decoder = f.xmlNewDecoder(bytes.NewReader([]byte(`<worksheet><sheetData><row r="A"><c r="A1" t="s"><v>1</v></c></row><row r="A"><c r="2" t="str"><v>B</v></c></row></sheetData></worksheet>`)))
-	rows.stashRow, rows.curRow = 0, 1
+	assert.True(t, rows.Next())
 	_, err = rows.Columns()
 	assert.EqualError(t, err, `strconv.Atoi: parsing "A": invalid syntax`)
 
@@ -194,8 +186,8 @@ func TestColumns(t *testing.T) {
 	_, err = rows.Columns()
 	assert.NoError(t, err)
 
-	rows.curRow = 3
 	rows.decoder = f.xmlNewDecoder(bytes.NewReader([]byte(`<worksheet><sheetData><row r="1"><c r="A" t="s"><v>1</v></c></row></sheetData></worksheet>`)))
+	assert.True(t, rows.Next())
 	_, err = rows.Columns()
 	assert.EqualError(t, err, newCellNameToCoordinatesError("A", newInvalidCellNameError("A")).Error())
 
-- 
cgit v1.2.1