From 0faa29f2c11a6d906f340397eca5c4bc6d0f1dc6 Mon Sep 17 00:00:00 2001 From: George Abbott Date: Fri, 3 Nov 2023 21:06:29 +0000 Subject: Commit all --- README.md | 4 +++ concat.go | 20 +++++++++++ copy.go | 37 +++++++++++++++++++ dupltxref.go | 35 ++++++++++++++++++ eqpct.go | 41 +++++++++++++++++++++ go.mod | 23 ++++++++++++ go.sum | 63 +++++++++++++++++++++++++++++++++ header.go | 22 ++++++++++++ locate.go | 33 +++++++++++++++++ missing.go | 37 +++++++++++++++++++ split.go | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 429 insertions(+) create mode 100644 README.md create mode 100644 concat.go create mode 100644 copy.go create mode 100644 dupltxref.go create mode 100644 eqpct.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 header.go create mode 100644 locate.go create mode 100644 missing.go create mode 100644 split.go diff --git a/README.md b/README.md new file mode 100644 index 0000000..1d39106 --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ +The main badtudexo project. This contains all the functions that manipulate the +files, but that can't be compiled into standalone binaries. + +For the code to make the binaries, see ../bt-*. diff --git a/concat.go b/concat.go new file mode 100644 index 0000000..a516dae --- /dev/null +++ b/concat.go @@ -0,0 +1,20 @@ +package badtudexo + +import ( + st "saggytrousers" + "errors" +) + +// Take two sheets, and concatenate them. +// Note: this done wrong at the moment, it should take [][]string not []string. +func Concat(fst, snd []string) []string { + return append(append([]string{}, fst...), snd...) +} + +func ConcatCheckHeaders(fst, snd, fsthd, sndhd []string) ([]string, error) { + if !st.SliceEq(fsthd, sndhd) { + return []string{}, errors.New("Header mismatch") + } + + return Concat(fst, snd), nil +} diff --git a/copy.go b/copy.go new file mode 100644 index 0000000..a91d30e --- /dev/null +++ b/copy.go @@ -0,0 +1,37 @@ +package badtudexo + +import ( + xl "anyxcelize" + "fmt" + st "saggytrousers" + "errors" +) + +func Copy(file *xl.File, sheet, target string, failOnEmpty bool) (error, bool) { + /* Grab all rows from the sheet. */ + st.Log(true, "Grabbing rows...") + rows, err := file.GetRowsGeneric(sheet) + if err != nil { + return errors.New(fmt.Sprintf("bt.Copy: failed to get rows with error %v", err)), false + } + st.Log(true, " done!\n") + + if len(rows) == 0 { + return errors.New(fmt.Sprintf("bt.Copy: rows empty")), true + } + + /* Copy into new file. */ + saved := xl.NewFile() + saved.SetSheetName("Sheet1", sheet) + count := 1 + st.Log(false, fmt.Sprintf("len(rows) = %v\n", len(rows))) + for _, row := range rows { + loc := fmt.Sprintf("A%v", count) + st.Log(false, fmt.Sprintf("%v:%v: %v\n", sheet, loc, row)) + saved.SetSheetRow(sheet, loc, &row) + count++ + } + + /* Save file. */ + return saved.SaveAs(target), false +} diff --git a/dupltxref.go b/dupltxref.go new file mode 100644 index 0000000..da5b597 --- /dev/null +++ b/dupltxref.go @@ -0,0 +1,35 @@ +package badtudexo + +// This finds a list of duplicate entries, including the index of the +// duplicate, and its value, in a given row. Note that it does not return both +// entries as duplicates, only the second. But that should be good enough for +// now. + +type DuplicateEntry[T any] struct { + Value T + Index int +} + +func DuplicateNew[T any](value T, index int) DuplicateEntry[T] { + return DuplicateEntry[T] { value, index } +} + +// O(N^2) algorithm: iterates through the list for each value, then again to +// find if they are in it up to that point. Hence this does not scale well. +func Duplicate[T comparable](values []T) []DuplicateEntry[T] { + var dupls []DuplicateEntry[T] + for i, v := range values { + for ii, iv := range values { + // Only iterate to before the current index. + if ii >= i { break } + + if v == iv { + // We have a duplicate! + dupls = append(dupls, DuplicateNew(v, i)) + break + } + } + } + + return dupls +} diff --git a/eqpct.go b/eqpct.go new file mode 100644 index 0000000..d830049 --- /dev/null +++ b/eqpct.go @@ -0,0 +1,41 @@ +package badtudexo + +import ( + "errors" + "fmt" +) + +// Given a list of base, percent and the result and a tolerance return a list +// of indices where outside this tolerance pct. For instance, given a base of +// [100, 50, 200], percents of [ 10, 10, 10 ], and a tolerance either way of +// 1%, return all in result which are not between 9% and 11% of the +// corresponding index of base: if res were [10, 2, 19], return [1], as the +// 2nd (1st w/ zero-indexing) elem is the only one outside the allowed pcts. +func EqPct(base, pct, res []float64, tol float64) ([]int, error) { + var ret []int + var allgood bool = true + if len(base) != len(pct) { + return nil, errors.New("len(base) != len(pct)") + } + + if len(pct) != len(res) { + return nil, errors.New("len(pct) != len(res)") + } + + for i := 0; i < len(pct); i++ { + min := base[i] * ((pct[i] - tol) / 100) + max := base[i] * ((pct[i] + tol) / 100) + + if res[i] < min || res[i] > max { + // fmt.Printf("Min [%v] < Res [%v] > Max [%v]\n", min, res[i], max) + allgood = false + ret = append(ret, i) + } + } + + if allgood { + // fmt.Println("EqPct: all good!") + } + + return ret, nil +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..5e288ef --- /dev/null +++ b/go.mod @@ -0,0 +1,23 @@ +module badtudexo + +go 1.19 + +replace saggytrousers => ../saggytrousers + +require saggytrousers v0.0.0-00010101000000-000000000000 + +require anyxcelize v0.0.0-00010101000000-000000000000 // indirect + +require ( + git.gabbott.dev/george/excelize/v2 v2.99.1 + github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect + github.com/richardlehane/mscfb v1.0.4 // indirect + github.com/richardlehane/msoleps v1.0.3 // indirect + github.com/xuri/efp v0.0.0-20220603152613-6918739fd470 // indirect + github.com/xuri/nfp v0.0.0-20220409054826-5e722a1d9e22 // indirect + golang.org/x/crypto v0.5.0 // indirect + golang.org/x/net v0.5.0 // indirect + golang.org/x/text v0.6.0 // indirect +) + +replace anyxcelize => ../../anyxcelize diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..22585c7 --- /dev/null +++ b/go.sum @@ -0,0 +1,63 @@ +git.gabbott.dev/george/excelize/v2 v2.99.1 h1:pencpuy3D+7TgEV+BraIj5SUSkcErop3Pes05CDp71M= +git.gabbott.dev/george/excelize/v2 v2.99.1/go.mod h1:JqW30EL316o52Ya+R3++QL7oW13PBwAqYVbSprqTVz4= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/richardlehane/mscfb v1.0.4 h1:WULscsljNPConisD5hR0+OyZjwK46Pfyr6mPu5ZawpM= +github.com/richardlehane/mscfb v1.0.4/go.mod h1:YzVpcZg9czvAuhk9T+a3avCpcFPMUWm7gK3DypaEsUk= +github.com/richardlehane/msoleps v1.0.1/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= +github.com/richardlehane/msoleps v1.0.3 h1:aznSZzrwYRl3rLKRT3gUk9am7T/mLNSnJINvN0AQoVM= +github.com/richardlehane/msoleps v1.0.3/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/xuri/efp v0.0.0-20220603152613-6918739fd470 h1:6932x8ltq1w4utjmfMPVj09jdMlkY0aiA6+Skbtl3/c= +github.com/xuri/efp v0.0.0-20220603152613-6918739fd470/go.mod h1:ybY/Jr0T0GTCnYjKqmdwxyxn2BQf2RcQIIvex5QldPI= +github.com/xuri/nfp v0.0.0-20220409054826-5e722a1d9e22 h1:OAmKAfT06//esDdpi/DZ8Qsdt4+M5+ltca05dA5bG2M= +github.com/xuri/nfp v0.0.0-20220409054826-5e722a1d9e22/go.mod h1:WwHg+CVyzlv/TX9xqBFXEZAuxOPxn2k1GNHwG41IIUQ= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.2.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= +golang.org/x/crypto v0.5.0 h1:U/0M97KRkSFvyD/3FSmdP5W5swImpNgle/EHFhOsQPE= +golang.org/x/crypto v0.5.0/go.mod h1:NK/OQwhpMQP3MwtdjgLlYHnH9ebylxKWv3e0fK+mkQU= +golang.org/x/image v0.0.0-20220902085622-e7cb96979f69 h1:Lj6HJGCSn5AjxRAH2+r35Mir4icalbqku+CLUtjnvXY= +golang.org/x/image v0.0.0-20220902085622-e7cb96979f69/go.mod h1:doUCurBvlfPMKfmIpRIywoHmhN3VyhnoFDbvIEWF4hY= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= +golang.org/x/net v0.5.0 h1:GyT4nK/YDHSqa1c4753ouYCDajOYKTja9Xb/OHtgvSw= +golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.6.0 h1:3XmdazWV+ubf7QgHSTWeykHOci5oeekaGJBLkrkaw4k= +golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/header.go b/header.go new file mode 100644 index 0000000..d553945 --- /dev/null +++ b/header.go @@ -0,0 +1,22 @@ +package badtudexo + +import ( + xl "git.gabbott.dev/george/excelize/v2" + // st "saggytrousers" +) + + +// Return the header of a given file, which must be the first row. +func GetHeader(file *xl.File, sheet string) ([]string, error) { + rows, err := file.Rows(sheet) + if err != nil { + return []string{}, err + } + + rows.Next() + header, err := rows.Columns() + if err != nil { + return []string{}, err + } + return header, nil +} diff --git a/locate.go b/locate.go new file mode 100644 index 0000000..fc9b1b6 --- /dev/null +++ b/locate.go @@ -0,0 +1,33 @@ +package badtudexo + +import ( + st "saggytrousers" +) + +/* locate.go + * The function Locate takes a header, and attempts to locate where a given + * column is in this. For instance, Locate(header, "Unique ID") + * might return 1, as the Unique ID field is in second position. + */ + + +// Locates the index where the needle is found; returns -1 if not found. +func Locate(header []string, needle string) int { + return st.Locate(header, needle) +} + +// Returns the index where the needle is found, returns -1 if not found; +// matches exactly. +func LocateExact[T comparable](header []T, needle T) int { + return st.LocateExact(header, needle) +} + +// Locate the value, or ask where it is from the user. +func LocateOrAsk(header []string, needle, prompt string) int { + v := st.Locate(header, needle) + if v >= 0 { + return v + } + + return st.ChooseFromHeaderPrompt(header, prompt) +} diff --git a/missing.go b/missing.go new file mode 100644 index 0000000..92f92aa --- /dev/null +++ b/missing.go @@ -0,0 +1,37 @@ +package badtudexo + +import ( + st "saggytrousers" +) + +type MissingValue[T any] struct { + Value T + Index int +} + +func MissingValueNew[T any](value T, index int) MissingValue[T] { + return MissingValue[T]{ value, index } +} + +// Iterates through `check`, and returns all that are _not_ present in `src`, returning both the index in the array, and the value itself. +func Missing[T comparable](check, src []T) []MissingValue[T] { + var missing []MissingValue[T] + for i, v := range check { + if !st.InSlice(src, v) { + mv := MissingValueNew(v, i) + missing = append(missing, mv) + } + } + + return missing +} + +// Iterates through `src`, and returns whether all are present in `check`. +func Present[T comparable](check, src []T) bool { + for _, v := range src { + if !st.InSlice(check, v) { + return false + } + } + return true +} diff --git a/split.go b/split.go new file mode 100644 index 0000000..bfcf474 --- /dev/null +++ b/split.go @@ -0,0 +1,114 @@ +package badtudexo + +import ( + "fmt" + "strings" + "os" + xl "git.gabbott.dev/george/excelize/v2" + st "saggytrousers" +) + +type SplitAlgorithm int +const ( + IndivAlgorithm SplitAlgorithm = iota + 1 + GroupAlgorithm +) + +const SaveDirectory string = "bt-split" + +// rows [][]any must NOT include the header. +func indivSplit(rows [][]any, header []string, index int, dir, fn string) { + st.Log(false, "bt.indivSplit: index (%v), dir (%v), fn (%v).", index, dir, fn) + var uniq []any + saveDir := fmt.Sprintf("%s/%s", dir, SaveDirectory) + st.Log(false, "bt.indivSplit: saveDir = %v\n", saveDir) + os.Mkdir(saveDir, 0777) + + // First pass: get all the unique values. + for _, row := range rows { + v := row[index] + if !st.InSliceAny(uniq, v) { + uniq = append(uniq, v) + } + } + + // Rest of the passes: for each unique value, grab them and chuck all + // entries matching it to a file, then save it. + count, total := 0, 0 + for _, v := range uniq { + //if (n == 0) { /* change for _, ... to: for n, ... if using + // // Skip the first one, as it is the header. + // continue + //} + pos := 1 + suffix := deriveSplitSuffix(v) + filename := st.CreateFilepath(dir, SaveDirectory, fn, suffix) + sheetname := st.MakeValidSheetName(st.CreateFilename(fn, suffix)) + + + // Create the new file, and append the header to it. + file := xl.NewFile() + file.SetSheetName("Sheet1", sheetname) + st.AppendToFile(file, sheetname, header, pos) + pos++ + + // Now add each matching row to the file as well. + for _, row := range rows { + sv := row[index] + if sv == v { + st.AppendToFile(file, sheetname, row, pos) + pos++ + } + } + + // Finally, save the file, close it, and we can do it all again + // for the next unique value. + filename = fmt.Sprintf("%s.xlsx", filename) + if err := file.SaveAs(filename); err != nil { + st.ErrLog(true, "Unable to save [%s]\n", filename) + } else { + st.Log(true, "Saved [%s]\n", filename) + count++ + } + total++ + } + st.Log(true, "Saved %v of %v files successfully.\n", count, total) +} + +func deriveSplitSuffix(str any) string { + s:= fmt.Sprintf("%v", str) + if strings.HasSuffix(s, "UTC") { + return s[0:10] + } + + return s +} + +// func groupSplit(rows [][]string, header []string, index int, dir, fn string) map[string]*xl.File { +// } +// +// func groupSave(m map[string]*xl.File) { +// } + + + +// Takes rows, and splits on the given index. Then saves each of these splits +// as a separate file at the specified location and with the specified name. +// rows must NOT include the header. +func Split(rows [][]any, header []string, index int, + dir, filename string, + algo SplitAlgorithm) { + + switch algo { + case IndivAlgorithm: + st.Log(false, "badtudexo.Split: algorithm = IndivAlgorithm\n") + indivSplit(rows, header, index, dir, filename) + // case GroupAlgorithm: + // // TODO: implement these again. + // m := groupSplit(rows, header, index, dir, filename) + // groupSave(m) + default: + st.ErrLog(true, "badtudexo.Split received an invalid algorithm.\n") + os.Exit(-1) + } +} -- cgit v1.2.1