From 790c363cceaaa09e91ad579e2d25cb13c1582bba Mon Sep 17 00:00:00 2001 From: xuri Date: Sat, 18 Sep 2021 23:20:24 +0800 Subject: This closes #833, closes #845, and closes #1022, breaking changes - Close spreadsheet and row's iterator required - New options `WorksheetUnzipMemLimit` have been added - Improve streaming reading performance, memory usage decrease about 93.7% --- lib.go | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 61 insertions(+), 10 deletions(-) (limited to 'lib.go') diff --git a/lib.go b/lib.go index 31b64a5..c8e957c 100644 --- a/lib.go +++ b/lib.go @@ -18,14 +18,15 @@ import ( "encoding/xml" "fmt" "io" + "io/ioutil" + "os" "regexp" "strconv" "strings" ) -// ReadZipReader can be used to read the spreadsheet in memory without touching the -// filesystem. -func ReadZipReader(r *zip.Reader, o *Options) (map[string][]byte, int, error) { +// ReadZipReader extract spreadsheet with given options. +func (f *File) ReadZipReader(r *zip.Reader) (map[string][]byte, int, error) { var ( err error docPart = map[string]string{ @@ -37,25 +38,49 @@ func ReadZipReader(r *zip.Reader, o *Options) (map[string][]byte, int, error) { unzipSize int64 ) for _, v := range r.File { - unzipSize += v.FileInfo().Size() - if unzipSize > o.UnzipSizeLimit { - return fileList, worksheets, newUnzipSizeLimitError(o.UnzipSizeLimit) + fileSize := v.FileInfo().Size() + unzipSize += fileSize + if unzipSize > f.options.UnzipSizeLimit { + return fileList, worksheets, newUnzipSizeLimitError(f.options.UnzipSizeLimit) } fileName := strings.Replace(v.Name, "\\", "/", -1) if partName, ok := docPart[strings.ToLower(fileName)]; ok { fileName = partName } - if fileList[fileName], err = readFile(v); err != nil { - return nil, 0, err - } if strings.HasPrefix(fileName, "xl/worksheets/sheet") { worksheets++ + if fileSize > f.options.WorksheetUnzipMemLimit && !v.FileInfo().IsDir() { + if tempFile, err := f.unzipToTemp(v); err == nil { + f.tempFiles.Store(fileName, tempFile) + continue + } + } + } + if fileList[fileName], err = readFile(v); err != nil { + return nil, 0, err } } return fileList, worksheets, nil } -// readXML provides a function to read XML content as string. +// unzipToTemp unzip the zip entity to the system temporary directory and +// returned the unzipped file path. +func (f *File) unzipToTemp(zipFile *zip.File) (string, error) { + tmp, err := ioutil.TempFile(os.TempDir(), "excelize-") + if err != nil { + return "", err + } + rc, err := zipFile.Open() + if err != nil { + return tmp.Name(), err + } + _, err = io.Copy(tmp, rc) + rc.Close() + tmp.Close() + return tmp.Name(), err +} + +// readXML provides a function to read XML content as bytes. func (f *File) readXML(name string) []byte { if content, _ := f.Pkg.Load(name); content != nil { return content.([]byte) @@ -66,6 +91,32 @@ func (f *File) readXML(name string) []byte { return []byte{} } +// readBytes read file as bytes by given path. +func (f *File) readBytes(name string) []byte { + content := f.readXML(name) + if len(content) != 0 { + return content + } + file, err := f.readTemp(name) + if err != nil { + return content + } + content, _ = ioutil.ReadAll(file) + f.Pkg.Store(name, content) + file.Close() + return content +} + +// readTemp read file from system temporary directory by given path. +func (f *File) readTemp(name string) (file *os.File, err error) { + path, ok := f.tempFiles.Load(name) + if !ok { + return + } + file, err = os.Open(path.(string)) + return +} + // saveFileList provides a function to update given file content in file list // of spreadsheet. func (f *File) saveFileList(name string, content []byte) { -- cgit v1.2.1