diff options
author | George Abbott <otg@gabbott.dev> | 2023-01-29 19:59:00 +0000 |
---|---|---|
committer | George Abbott <otg@gabbott.dev> | 2023-01-29 19:59:00 +0000 |
commit | c4d71b0fdb4bef63e1553843efa9cb9ea9270806 (patch) | |
tree | 42edd240b10f2d986beb0c8fadda56fe3b5acce1 | |
parent | e9869d42a8bfb41d2a71bd75d08ad6773ae00a18 (diff) |
added rows_generic
-rw-r--r-- | rows_generic.go | 301 |
1 files changed, 301 insertions, 0 deletions
diff --git a/rows_generic.go b/rows_generic.go new file mode 100644 index 0000000..fff3019 --- /dev/null +++ b/rows_generic.go @@ -0,0 +1,301 @@ +package excelize + +import ( + // "bytes" + "encoding/xml" + "errors" + // "fmt" + // "io" + // "math" + "strings" + "time" + "os" + "strconv" + // "github.com/mohae/deepcopy" +) + +func (f *File) GetRowsGeneric(sheet string, opts ...Options) ([][]any, error) { + rows, err := f.RowsGeneric(sheet) + if err != nil { + return nil, err + } + results, cur, max := make([][]any, 0, 64), 0, 0 + for rows.Next() { + cur++ + row, err := rows.ColumnsGeneric(opts...) + if err != nil { + break + } + results = append(results, row) + if len(row) > 0 { + max = cur + } + } + return results[:max], rows.Close() +} + +type RowsGeneric struct { + err error + curRow, seekRow int + needClose, rawCellValue bool + sheet string + f *File + tempFile *os.File + sst *xlsxSST + decoder *xml.Decoder + token xml.Token + curRowOpts, seekRowOpts RowOpts +} + +func (f *File) RowsGeneric(sheet string) (*RowsGeneric, error) { + name, ok := f.getSheetXMLPath(sheet) + if !ok { + return nil, ErrSheetNotExist{sheet} + } + if ws, ok := f.Sheet.Load(name); ok && ws != nil { + worksheet := ws.(*xlsxWorksheet) + worksheet.Lock() + defer worksheet.Unlock() + // flush data + output, _ := xml.Marshal(worksheet) + f.saveFileList(name, f.replaceNameSpaceBytes(name, output)) + } + var err error + rows := RowsGeneric{f: f, sheet: name} + rows.needClose, rows.decoder, rows.tempFile, err = f.xmlDecoder(name) + return &rows, err +} + +// Next will return true if find the next row element. +func (rows *RowsGeneric) Next() bool { + rows.seekRow++ + if rows.curRow >= rows.seekRow { + rows.curRowOpts = rows.seekRowOpts + return true + } + for { + token, _ := rows.decoder.Token() + if token == nil { + return false + } + switch xmlElement := token.(type) { + case xml.StartElement: + if xmlElement.Name.Local == "row" { + rows.curRow++ + if rowNum, _ := attrValToInt("r", xmlElement.Attr); rowNum != 0 { + rows.curRow = rowNum + } + rows.token = token + rows.curRowOpts = extractRowOpts(xmlElement.Attr) + return true + } + case xml.EndElement: + if xmlElement.Name.Local == "sheetData" { + return false + } + } + } +} + +func (rows *RowsGeneric) Close() error { + if rows.tempFile != nil { + return rows.tempFile.Close() + } + return nil +} + +// Columns return the current row's column values. This fetches the worksheet +// data as a stream, returns each cell in a row as is, and will not skip empty +// rows in the tail of the worksheet. +func (rows *RowsGeneric) ColumnsGeneric(opts ...Options) ([]any, error) { + if rows.curRow > rows.seekRow { + return nil, nil + } + var rowIterator rowXMLIteratorGeneric + var token xml.Token + rows.rawCellValue = parseOptions(opts...).RawCellValue + if rows.sst, rowIterator.err = rows.f.sharedStringsReader(); rowIterator.err != nil { + return rowIterator.cells, rowIterator.err + } + for { + if rows.token != nil { + token = rows.token + } else if token, _ = rows.decoder.Token(); token == nil { + break + } + switch xmlElement := token.(type) { + case xml.StartElement: + rowIterator.inElement = xmlElement.Name.Local + if rowIterator.inElement == "row" { + rowNum := 0 + if rowNum, rowIterator.err = attrValToInt("r", xmlElement.Attr); rowNum != 0 { + rows.curRow = rowNum + } else if rows.token == nil { + rows.curRow++ + } + rows.token = token + rows.seekRowOpts = extractRowOpts(xmlElement.Attr) + if rows.curRow > rows.seekRow { + rows.token = nil + return rowIterator.cells, rowIterator.err + } + } + if rows.rowXMLHandlerGeneric(&rowIterator, &xmlElement, rows.rawCellValue); rowIterator.err != nil { + rows.token = nil + return rowIterator.cells, rowIterator.err + } + rows.token = nil + case xml.EndElement: + if xmlElement.Name.Local == "sheetData" { + return rowIterator.cells, rowIterator.err + } + } + } + return rowIterator.cells, rowIterator.err +} + +type rowXMLIteratorGeneric struct { + err error + inElement string + cellCol, cellRow int + cells []any +} + + +// rowXMLHandler parse the row XML element of the worksheet. +func (rows *RowsGeneric) rowXMLHandlerGeneric(rowIterator *rowXMLIteratorGeneric, xmlElement *xml.StartElement, raw bool) { + if rowIterator.inElement == "c" { + rowIterator.cellCol++ + colCell := xlsxC{} + _ = rows.decoder.DecodeElement(&colCell, xmlElement) + if colCell.R != "" { + if rowIterator.cellCol, _, rowIterator.err = CellNameToCoordinates(colCell.R); rowIterator.err != nil { + return + } + } + blank := rowIterator.cellCol - len(rowIterator.cells) + if val, _ := colCell.getValueFromGeneric(rows.f, rows.sst, raw); val != "" || colCell.F != nil { + rowIterator.cells = append(appendSpaceGeneric(blank, rowIterator.cells), val) + } + } +} + +// appendSpace append blank characters to slice by given length and source slice. +// This will append strings to it, then. Not sure why this exists, but hey. +func appendSpaceGeneric(l int, s []any) []any{ + for i := 1; i < l; i++ { + s = append(s, "") + } + return s +} +// This should be cell_generic.go but that can be a TODO +func (c *xlsxC) getValueFromGeneric(f *File, d *xlsxSST, raw bool) (any, error) { + f.Lock() + defer f.Unlock() + switch c.T { + case "b": // done, not tested + return c.getCellBoolGeneric(f, raw) + case "d": // done, not tested + return c.getCellDateGeneric(f, raw) + case "s": // Don't touch, strings are a-ok + if c.V != "" { + xlsxSI := 0 + xlsxSI, _ = strconv.Atoi(c.V) + if _, ok := f.tempFiles.Load(defaultXMLPathSharedStrings); ok { + return f.formattedValue(c.S, f.getFromStringItem(xlsxSI), raw) + } + if len(d.SI) > xlsxSI { + return f.formattedValue(c.S, d.SI[xlsxSI].String(), raw) + } + } + return f.formattedValue(c.S, c.V, raw) + case "inlineStr": // as are inline strs. So don't touch. + if c.IS != nil { + return f.formattedValue(c.S, c.IS.String(), raw) + } + return f.formattedValue(c.S, c.V, raw) + default: // For integrals and floatings? These need to go into int or float64. + // We need to change it to return an int or a decimal. + return c.getCellNumeric(f, raw) +// if isNum, precision, decimal := isNumeric(c.V); isNum && !raw { +// if precision > 15 { +// c.V = strconv.FormatFloat(decimal, 'G', 15, 64) +// } else { +// c.V = strconv.FormatFloat(decimal, 'f', -1, 64) +// } +// } +// return f.formattedValue(c.S, c.V, raw) + } +} + +// NOTE: This function returns any. Now, 99% of the time it should be +// bool. But I can't be asked to handle errors, or rewrite formattedValue, and +// so it might return a string. Just be warned, future me. +func (c *xlsxC) getCellBoolGeneric(f *File, raw bool) (any, error) { + if c.V == "1" { + return true, nil + } + if c.V == "0" { + return false, nil + } + + return f.formattedValue(c.S, c.V, raw) +} + +// TODO: I probably actually have to like, do something, to get a valid +// date from this. I think Go has a type for dates which gets validly written +// and accepted in the writing functions. +// Actually, this function is really dirty. It actually sets the c.V value to +// the format of Excel time (in float64 form?) and then sends it on its way to +// f.formattedValue I think. Which means unless I change formattedValue, this +// will return a string, as that fn returns a string. +// So then, TODO: Fix formattedValue to return a date value (or just fix this +// function not to call formattedValue, and just to return a proper date like +// it should. I don't want to return a float64, but a time.Time I believe. +func (c *xlsxC) getCellDateGeneric(f *File, raw bool) (time.Time, error) { + // if !raw { + layout := "20060102T150405.999" + if strings.HasSuffix(c.V, "Z") { + layout = "20060102T150405Z" + if strings.Contains(c.V, "-") { + layout = "2006-01-02T15:04:05Z" + } + } else if strings.Contains(c.V, "-") { + layout = "2006-01-02 15:04:05Z" + } + if timestamp, err := time.Parse(layout, strings.ReplaceAll(c.V, ",", ".")); err == nil { + return timestamp, nil + excelTime, _ := timeToExcelTime(timestamp, false) + c.V = strconv.FormatFloat(excelTime, 'G', 15, 64) + } // else { + //errmess := fmt.Sprintf("getCellDateGeneric: failed to parse %s", c.V) + //return time.Now(), errors.New(errmess) + // } + + /* This is the error case */ + return time.Now(), errors.New("getCellDateGeneric failed") + + + // } + // I don't think this vv is needed? + // return f.formattedValue(c.S, c.V, raw) +} + +// For now, we could just try parse as a float, and if it doesn't work parse as +// an int, and if it doesn't work cry. +func (c *xlsxC) getCellNumeric(f *File, raw bool) (any, error) { + // This can either be a float or an int. If it is a float, return a + // float64. Otherwise, return an int. + /* TODO: implement according to above comment. */ + if isNum, precision, decimal := isNumeric(c.V); isNum && !raw { + if precision > 15 { + // Return here a float64 + c.V = strconv.FormatFloat(decimal, 'G', 15, 64) + } else { + // I think, return here an int? + c.V = strconv.FormatFloat(decimal, 'f', -1, 64) + } + } + return f.formattedValue(c.S, c.V, raw) + +} |