Implement columns iterator

author: Jérôme Pogeant <p-jerome@hotmail.fr> 2020-06-16 11:53:22 +0200
committer: Jérôme Pogeant <p-jerome@hotmail.fr> 2020-06-17 16:45:04 +0200
commit: e9a4007c17f0db01b52bb40ab744c25e0f9e9673 (patch)
tree: 77a52d49e75072770a1d839387647564a9932d19
parent: 8ce48b859a3803647a247a53b3c9fd49192fd4a7 (diff)
2 files changed, 291 insertions, 0 deletions
diff --git a/col.go b/col.go
index 6f76800..561cec9 100644
--- a/col.go
+++ b/col.go
@@ -10,7 +10,10 @@
 package excelize
 
 import (
+	"bytes"
+	"encoding/xml"
 	"errors"
+	"fmt"
 	"math"
 	"strings"
 
@@ -24,6 +27,170 @@ const (
 	EMU                    int     = 9525
 )
 
+// Cols defines an iterator to a sheet
+type Cols struct {
+	err                                  error
+	curCol, totalCol, stashCol, totalRow int
+	sheet                                string
+	cols                                 []xlsxCols
+	f                                    *File
+	decoder                              *xml.Decoder
+}
+
+// GetCols return all the columns in a sheet by given worksheet name (case sensitive). For example:
+//
+//    cols, err := f.Cols("Sheet1")
+//    if err != nil {
+//        fmt.Println(err)
+//        return
+//    }
+//    for cols.Next() {
+//        col, err := cols.Rows()
+//        if err != nil {
+//            fmt.Println(err)
+//        }
+//        for _, rowCell := range col {
+//            fmt.Print(rowCell, "\t")
+//        }
+//        fmt.Println()
+//    }
+//
+func (f *File) GetCols(sheet string) ([][]string, error) {
+	cols, err := f.Cols(sheet)
+	if err != nil {
+		return nil, err
+	}
+
+	results := make([][]string, 0, 64)
+
+	for cols.Next() {
+		if cols.Error() != nil {
+			break
+		}
+
+		col, err := cols.Rows()
+		if err != nil {
+			break
+		}
+
+		results = append(results, col)
+	}
+
+	return results, nil
+}
+
+// Next will return true if the next col element is found.
+func (cols *Cols) Next() bool {
+	cols.curCol++
+
+	return cols.curCol <= cols.totalCol
+}
+
+// Error will return an error when the next col element is found.
+func (cols *Cols) Error() error {
+	return cols.err
+}
+
+// Rows return the current column's row values
+func (cols *Cols) Rows() ([]string, error) {
+	var (
+		err  error
+		rows []string
+	)
+
+	if cols.stashCol >= cols.curCol {
+		return rows, err
+	}
+
+	for i := 1; i <= cols.totalRow; i++ {
+		colName, _ := ColumnNumberToName(cols.curCol)
+		val, _ := cols.f.GetCellValue(cols.sheet, fmt.Sprintf("%s%d", colName, i))
+		rows = append(rows, val)
+	}
+
+	return rows, nil
+}
+
+// Cols returns a columns iterator, used for streaming/reading data for a worksheet with a large data. For example:
+//
+//    cols, err := f.Cols("Sheet1")
+//    if err != nil {
+//        fmt.Println(err)
+//        return
+//    }
+//    for cols.Next() {
+//        col, err := cols.Rows()
+//        if err != nil {
+//            fmt.Println(err)
+//        }
+//        for _, rowCell := range col {
+//            fmt.Print(rowCell, "\t")
+//        }
+//        fmt.Println()
+//    }
+//
+func (f *File) Cols(sheet string) (*Cols, error) {
+	name, ok := f.sheetMap[trimSheetName(sheet)]
+	if !ok {
+		return nil, ErrSheetNotExist{sheet}
+	}
+
+	if f.Sheet[name] != nil {
+		output, _ := xml.Marshal(f.Sheet[name])
+		f.saveFileList(name, replaceRelationshipsNameSpaceBytes(output))
+	}
+
+	var (
+		inElement        string
+		cols             Cols
+		colsNum, rowsNum []int
+	)
+	decoder := f.xmlNewDecoder(bytes.NewReader(f.readXML(name)))
+
+	for {
+		token, _ := decoder.Token()
+		if token == nil {
+			break
+		}
+
+		switch startElement := token.(type) {
+		case xml.StartElement:
+			inElement = startElement.Name.Local
+			if inElement == "dimension" {
+				colsNum = make([]int, 0)
+				rowsNum = make([]int, 0)
+
+				for _, attr := range startElement.Attr {
+					if attr.Name.Local == "ref" {
+						sheetCoordinates := attr.Value
+						if i := strings.Index(sheetCoordinates, ":"); i <= -1 {
+							return &cols, errors.New("Sheet coordinates are wrong")
+						}
+
+						coordinates := strings.Split(sheetCoordinates, ":")
+						for _, coordinate := range coordinates {
+							c, r, _ := SplitCellName(coordinate)
+							columnNum, _ := ColumnNameToNumber(c)
+							colsNum = append(colsNum, columnNum)
+							rowsNum = append(rowsNum, r)
+						}
+					}
+				}
+
+				cols.totalCol = colsNum[1] - (colsNum[0] - 1)
+				cols.totalRow = rowsNum[1] - (rowsNum[0] - 1)
+			}
+		default:
+		}
+	}
+
+	cols.f = f
+	cols.sheet = trimSheetName(sheet)
+	cols.decoder = f.xmlNewDecoder(bytes.NewReader(f.readXML(name)))
+
+	return &cols, nil
+}
+
 // GetColVisible provides a function to get visible of a single column by given
 // worksheet name and column name. For example, get visible state of column D
 // in Sheet1:
diff --git a/col_test.go b/col_test.go
index fcb1619..a130f2b 100644
--- a/col_test.go
+++ b/col_test.go
@@ -1,12 +1,136 @@
 package excelize
 
 import (
+	"bytes"
 	"path/filepath"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 )
 
+func TestCols(t *testing.T) {
+	const sheet2 = "Sheet2"
+
+	f, err := OpenFile(filepath.Join("test", "Book1.xlsx"))
+	if !assert.NoError(t, err) {
+		t.FailNow()
+	}
+
+	cols, err := f.Cols(sheet2)
+	if !assert.NoError(t, err) {
+		t.FailNow()
+	}
+
+	var collectedRows [][]string
+	for cols.Next() {
+		rows, err := cols.Rows()
+		assert.NoError(t, err)
+		collectedRows = append(collectedRows, trimSliceSpace(rows))
+	}
+	if !assert.NoError(t, cols.Error()) {
+		t.FailNow()
+	}
+
+	returnedColumns, err := f.GetCols(sheet2)
+	assert.NoError(t, err)
+	for i := range returnedColumns {
+		returnedColumns[i] = trimSliceSpace(returnedColumns[i])
+	}
+	if !assert.Equal(t, collectedRows, returnedColumns) {
+		t.FailNow()
+	}
+
+	f = NewFile()
+	cells := []string{"C2", "C3", "C4"}
+	for _, cell := range cells {
+		assert.NoError(t, f.SetCellValue("Sheet1", cell, 1))
+	}
+	_, err = f.Rows("Sheet1")
+	assert.NoError(t, err)
+
+	f.Sheet["xl/worksheets/sheet1.xml"] = &xlsxWorksheet{
+		Dimension: &xlsxDimension{
+			Ref: "C2:C4",
+		},
+	}
+	_, err = f.Rows("Sheet1")
+	assert.NoError(t, err)
+}
+
+func TestColumnsIterator(t *testing.T) {
+	const (
+		sheet2         = "Sheet2"
+		expectedNumCol = 4
+	)
+
+	f, err := OpenFile(filepath.Join("test", "Book1.xlsx"))
+	require.NoError(t, err)
+
+	cols, err := f.Cols(sheet2)
+	require.NoError(t, err)
+
+	var colCount int
+	for cols.Next() {
+		colCount++
+		require.True(t, colCount <= expectedNumCol, "colCount is greater than expected")
+	}
+	assert.Equal(t, expectedNumCol, colCount)
+
+	f = NewFile()
+	cells := []string{"C2", "C3", "C4", "D2", "D3", "D4"}
+	for _, cell := range cells {
+		assert.NoError(t, f.SetCellValue("Sheet1", cell, 1))
+	}
+	f.Sheet["xl/worksheets/sheet1.xml"] = &xlsxWorksheet{
+		Dimension: &xlsxDimension{
+			Ref: "C2:D4",
+		},
+	}
+	cols, err = f.Cols("Sheet1")
+	require.NoError(t, err)
+
+	colCount = 0
+	for cols.Next() {
+		colCount++
+		require.True(t, colCount <= 2, "colCount is greater than expected")
+	}
+	assert.Equal(t, 2, colCount)
+}
+
+func TestColsError(t *testing.T) {
+	xlsx, err := OpenFile(filepath.Join("test", "Book1.xlsx"))
+	if !assert.NoError(t, err) {
+		t.FailNow()
+	}
+	_, err = xlsx.Cols("SheetN")
+	assert.EqualError(t, err, "sheet SheetN is not exist")
+}
+
+func TestColsRows(t *testing.T) {
+	f := NewFile()
+	f.NewSheet("Sheet1")
+
+	cols, err := f.Cols("Sheet1")
+	assert.EqualError(t, err, `Sheet coordinates are wrong`)
+
+	assert.NoError(t, f.SetCellValue("Sheet1", "A1", 1))
+	f.Sheet["xl/worksheets/sheet1.xml"] = &xlsxWorksheet{
+		Dimension: &xlsxDimension{
+			Ref: "A1:A1",
+		},
+	}
+
+	cols.stashCol, cols.curCol = 0, 1
+	cols, err = f.Cols("Sheet1")
+	assert.NoError(t, err)
+
+	// Test if token is nil
+	cols.decoder = f.xmlNewDecoder(bytes.NewReader(nil))
+	_, err = cols.Rows()
+	assert.NoError(t, err)
+}
+
 func TestColumnVisibility(t *testing.T) {
 	t.Run("TestBook1", func(t *testing.T) {
 		f, err := prepareTestBook1()
author	Jérôme Pogeant <p-jerome@hotmail.fr>	2020-06-16 11:53:22 +0200
committer	Jérôme Pogeant <p-jerome@hotmail.fr>	2020-06-17 16:45:04 +0200
commit	e9a4007c17f0db01b52bb40ab744c25e0f9e9673 (patch)
tree	77a52d49e75072770a1d839387647564a9932d19
parent	8ce48b859a3803647a247a53b3c9fd49192fd4a7 (diff)