From 2cfcf9eb5ff2f332dad0c6adead53ef0500001db Mon Sep 17 00:00:00 2001 From: xuri Date: Wed, 16 Jun 2021 15:03:50 +0000 Subject: encode the escaped string literal which not permitted in an XML 1.0 document --- cell.go | 3 ++- lib.go | 46 ++++++++++++++++++++++++++++++++++++++++++---- lib_test.go | 13 +++++++++++++ 3 files changed, 57 insertions(+), 5 deletions(-) diff --git a/cell.go b/cell.go index f94b81e..4dec093 100644 --- a/cell.go +++ b/cell.go @@ -288,6 +288,7 @@ func (f *File) setSharedString(val string) int { } sst.Count++ sst.UniqueCount++ + val = bstrMarshal(val) t := xlsxT{Val: val} // Leading and ending space(s) character detection. if len(val) > 0 && (val[0] == 32 || val[len(val)-1] == 32) { @@ -315,7 +316,7 @@ func setCellStr(value string) (t string, v string, ns xml.Attr) { } } t = "str" - v = value + v = bstrMarshal(value) return } diff --git a/lib.go b/lib.go index baa62bf..5c9bbf6 100644 --- a/lib.go +++ b/lib.go @@ -456,6 +456,11 @@ func isNumeric(s string) (bool, int) { return true, p } +var ( + bstrExp = regexp.MustCompile(`_x[a-zA-Z\d]{4}_`) + bstrEscapeExp = regexp.MustCompile(`x[a-zA-Z\d]{4}_`) +) + // bstrUnmarshal parses the binary basic string, this will trim escaped string // literal which not permitted in an XML 1.0 document. The basic string // variant type can store any valid Unicode character. Unicode characters @@ -468,15 +473,13 @@ func isNumeric(s string) (bool, int) { // initial underscore shall itself be escaped (i.e. stored as _x005F_). For // example: The string literal _x0008_ would be stored as _x005F_x0008_. func bstrUnmarshal(s string) (result string) { - bstrExp := regexp.MustCompile(`_x[a-zA-Z0-9]{4}_`) - escapeExp := regexp.MustCompile(`x[a-zA-Z0-9]{4}_`) matches, l, cursor := bstrExp.FindAllStringSubmatchIndex(s, -1), len(s), 0 for _, match := range matches { result += s[cursor:match[0]] subStr := s[match[0]:match[1]] if subStr == "_x005F_" { cursor = match[1] - if l > match[1]+6 && !escapeExp.MatchString(s[match[1]:match[1]+6]) { + if l > match[1]+6 && !bstrEscapeExp.MatchString(s[match[1]:match[1]+6]) { result += subStr continue } @@ -487,7 +490,7 @@ func bstrUnmarshal(s string) (result string) { cursor = match[1] v, err := strconv.Unquote(`"\u` + s[match[0]+2:match[1]-1] + `"`) if err != nil { - if l > match[1]+6 && escapeExp.MatchString(s[match[1]:match[1]+6]) { + if l > match[1]+6 && bstrEscapeExp.MatchString(s[match[1]:match[1]+6]) { result += subStr[:6] cursor = match[1] + 6 continue @@ -512,6 +515,41 @@ func bstrUnmarshal(s string) (result string) { return result } +// bstrMarshal encode the escaped string literal which not permitted in an XML +// 1.0 document. +func bstrMarshal(s string) (result string) { + matches, l, cursor := bstrExp.FindAllStringSubmatchIndex(s, -1), len(s), 0 + for _, match := range matches { + result += s[cursor:match[0]] + subStr := s[match[0]:match[1]] + if subStr == "_x005F_" { + cursor = match[1] + if match[1]+6 <= l && bstrEscapeExp.MatchString(s[match[1]:match[1]+6]) { + _, err := strconv.Unquote(`"\u` + s[match[1]+1:match[1]+5] + `"`) + if err == nil { + result += subStr + "x005F" + subStr + continue + } + } + result += subStr + "x005F_" + continue + } + if bstrExp.MatchString(subStr) { + cursor = match[1] + _, err := strconv.Unquote(`"\u` + s[match[0]+2:match[1]-1] + `"`) + if err == nil { + result += "_x005F" + subStr + continue + } + result += subStr + } + } + if cursor < l { + result += s[cursor:] + } + return result +} + // Stack defined an abstract data type that serves as a collection of elements. type Stack struct { list *list.List diff --git a/lib_test.go b/lib_test.go index ad20946..315688f 100644 --- a/lib_test.go +++ b/lib_test.go @@ -258,3 +258,16 @@ func TestBstrUnmarshal(t *testing.T) { assert.Equal(t, expected, bstrUnmarshal(bstr)) } } + +func TestBstrMarshal(t *testing.T) { + bstrs := map[string]string{ + "*_xG05F_*": "*_xG05F_*", + "*_x0008_*": "*_x005F_x0008_*", + "*_x005F_*": "*_x005F_x005F_*", + "*_x005F_xG006_*": "*_x005F_x005F_xG006_*", + "*_x005F_x0006_*": "*_x005F_x005F_x005F_x0006_*", + } + for bstr, expected := range bstrs { + assert.Equal(t, expected, bstrMarshal(bstr)) + } +} -- cgit v1.2.1