diff options
author | George Abbott <george@gabbott.dev> | 2023-11-03 21:06:29 +0000 |
---|---|---|
committer | George Abbott <george@gabbott.dev> | 2023-11-03 21:06:29 +0000 |
commit | 0faa29f2c11a6d906f340397eca5c4bc6d0f1dc6 (patch) | |
tree | 1f67a2956dba869c3c35a1af4c533530e45f9b57 |
-rw-r--r-- | README.md | 4 | ||||
-rw-r--r-- | concat.go | 20 | ||||
-rw-r--r-- | copy.go | 37 | ||||
-rw-r--r-- | dupltxref.go | 35 | ||||
-rw-r--r-- | eqpct.go | 41 | ||||
-rw-r--r-- | go.mod | 23 | ||||
-rw-r--r-- | go.sum | 63 | ||||
-rw-r--r-- | header.go | 22 | ||||
-rw-r--r-- | locate.go | 33 | ||||
-rw-r--r-- | missing.go | 37 | ||||
-rw-r--r-- | split.go | 114 |
11 files changed, 429 insertions, 0 deletions
diff --git a/README.md b/README.md new file mode 100644 index 0000000..1d39106 --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ +The main badtudexo project. This contains all the functions that manipulate the +files, but that can't be compiled into standalone binaries. + +For the code to make the binaries, see ../bt-*. diff --git a/concat.go b/concat.go new file mode 100644 index 0000000..a516dae --- /dev/null +++ b/concat.go @@ -0,0 +1,20 @@ +package badtudexo + +import ( + st "saggytrousers" + "errors" +) + +// Take two sheets, and concatenate them. +// Note: this done wrong at the moment, it should take [][]string not []string. +func Concat(fst, snd []string) []string { + return append(append([]string{}, fst...), snd...) +} + +func ConcatCheckHeaders(fst, snd, fsthd, sndhd []string) ([]string, error) { + if !st.SliceEq(fsthd, sndhd) { + return []string{}, errors.New("Header mismatch") + } + + return Concat(fst, snd), nil +} @@ -0,0 +1,37 @@ +package badtudexo + +import ( + xl "anyxcelize" + "fmt" + st "saggytrousers" + "errors" +) + +func Copy(file *xl.File, sheet, target string, failOnEmpty bool) (error, bool) { + /* Grab all rows from the sheet. */ + st.Log(true, "Grabbing rows...") + rows, err := file.GetRowsGeneric(sheet) + if err != nil { + return errors.New(fmt.Sprintf("bt.Copy: failed to get rows with error %v", err)), false + } + st.Log(true, " done!\n") + + if len(rows) == 0 { + return errors.New(fmt.Sprintf("bt.Copy: rows empty")), true + } + + /* Copy into new file. */ + saved := xl.NewFile() + saved.SetSheetName("Sheet1", sheet) + count := 1 + st.Log(false, fmt.Sprintf("len(rows) = %v\n", len(rows))) + for _, row := range rows { + loc := fmt.Sprintf("A%v", count) + st.Log(false, fmt.Sprintf("%v:%v: %v\n", sheet, loc, row)) + saved.SetSheetRow(sheet, loc, &row) + count++ + } + + /* Save file. */ + return saved.SaveAs(target), false +} diff --git a/dupltxref.go b/dupltxref.go new file mode 100644 index 0000000..da5b597 --- /dev/null +++ b/dupltxref.go @@ -0,0 +1,35 @@ +package badtudexo + +// This finds a list of duplicate entries, including the index of the +// duplicate, and its value, in a given row. Note that it does not return both +// entries as duplicates, only the second. But that should be good enough for +// now. + +type DuplicateEntry[T any] struct { + Value T + Index int +} + +func DuplicateNew[T any](value T, index int) DuplicateEntry[T] { + return DuplicateEntry[T] { value, index } +} + +// O(N^2) algorithm: iterates through the list for each value, then again to +// find if they are in it up to that point. Hence this does not scale well. +func Duplicate[T comparable](values []T) []DuplicateEntry[T] { + var dupls []DuplicateEntry[T] + for i, v := range values { + for ii, iv := range values { + // Only iterate to before the current index. + if ii >= i { break } + + if v == iv { + // We have a duplicate! + dupls = append(dupls, DuplicateNew(v, i)) + break + } + } + } + + return dupls +} diff --git a/eqpct.go b/eqpct.go new file mode 100644 index 0000000..d830049 --- /dev/null +++ b/eqpct.go @@ -0,0 +1,41 @@ +package badtudexo + +import ( + "errors" + "fmt" +) + +// Given a list of base, percent and the result and a tolerance return a list +// of indices where outside this tolerance pct. For instance, given a base of +// [100, 50, 200], percents of [ 10, 10, 10 ], and a tolerance either way of +// 1%, return all in result which are not between 9% and 11% of the +// corresponding index of base: if res were [10, 2, 19], return [1], as the +// 2nd (1st w/ zero-indexing) elem is the only one outside the allowed pcts. +func EqPct(base, pct, res []float64, tol float64) ([]int, error) { + var ret []int + var allgood bool = true + if len(base) != len(pct) { + return nil, errors.New("len(base) != len(pct)") + } + + if len(pct) != len(res) { + return nil, errors.New("len(pct) != len(res)") + } + + for i := 0; i < len(pct); i++ { + min := base[i] * ((pct[i] - tol) / 100) + max := base[i] * ((pct[i] + tol) / 100) + + if res[i] < min || res[i] > max { + // fmt.Printf("Min [%v] < Res [%v] > Max [%v]\n", min, res[i], max) + allgood = false + ret = append(ret, i) + } + } + + if allgood { + // fmt.Println("EqPct: all good!") + } + + return ret, nil +} @@ -0,0 +1,23 @@ +module badtudexo + +go 1.19 + +replace saggytrousers => ../saggytrousers + +require saggytrousers v0.0.0-00010101000000-000000000000 + +require anyxcelize v0.0.0-00010101000000-000000000000 // indirect + +require ( + git.gabbott.dev/george/excelize/v2 v2.99.1 + github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect + github.com/richardlehane/mscfb v1.0.4 // indirect + github.com/richardlehane/msoleps v1.0.3 // indirect + github.com/xuri/efp v0.0.0-20220603152613-6918739fd470 // indirect + github.com/xuri/nfp v0.0.0-20220409054826-5e722a1d9e22 // indirect + golang.org/x/crypto v0.5.0 // indirect + golang.org/x/net v0.5.0 // indirect + golang.org/x/text v0.6.0 // indirect +) + +replace anyxcelize => ../../anyxcelize @@ -0,0 +1,63 @@ +git.gabbott.dev/george/excelize/v2 v2.99.1 h1:pencpuy3D+7TgEV+BraIj5SUSkcErop3Pes05CDp71M= +git.gabbott.dev/george/excelize/v2 v2.99.1/go.mod h1:JqW30EL316o52Ya+R3++QL7oW13PBwAqYVbSprqTVz4= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/richardlehane/mscfb v1.0.4 h1:WULscsljNPConisD5hR0+OyZjwK46Pfyr6mPu5ZawpM= +github.com/richardlehane/mscfb v1.0.4/go.mod h1:YzVpcZg9czvAuhk9T+a3avCpcFPMUWm7gK3DypaEsUk= +github.com/richardlehane/msoleps v1.0.1/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= +github.com/richardlehane/msoleps v1.0.3 h1:aznSZzrwYRl3rLKRT3gUk9am7T/mLNSnJINvN0AQoVM= +github.com/richardlehane/msoleps v1.0.3/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/xuri/efp v0.0.0-20220603152613-6918739fd470 h1:6932x8ltq1w4utjmfMPVj09jdMlkY0aiA6+Skbtl3/c= +github.com/xuri/efp v0.0.0-20220603152613-6918739fd470/go.mod h1:ybY/Jr0T0GTCnYjKqmdwxyxn2BQf2RcQIIvex5QldPI= +github.com/xuri/nfp v0.0.0-20220409054826-5e722a1d9e22 h1:OAmKAfT06//esDdpi/DZ8Qsdt4+M5+ltca05dA5bG2M= +github.com/xuri/nfp v0.0.0-20220409054826-5e722a1d9e22/go.mod h1:WwHg+CVyzlv/TX9xqBFXEZAuxOPxn2k1GNHwG41IIUQ= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.2.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= +golang.org/x/crypto v0.5.0 h1:U/0M97KRkSFvyD/3FSmdP5W5swImpNgle/EHFhOsQPE= +golang.org/x/crypto v0.5.0/go.mod h1:NK/OQwhpMQP3MwtdjgLlYHnH9ebylxKWv3e0fK+mkQU= +golang.org/x/image v0.0.0-20220902085622-e7cb96979f69 h1:Lj6HJGCSn5AjxRAH2+r35Mir4icalbqku+CLUtjnvXY= +golang.org/x/image v0.0.0-20220902085622-e7cb96979f69/go.mod h1:doUCurBvlfPMKfmIpRIywoHmhN3VyhnoFDbvIEWF4hY= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= +golang.org/x/net v0.5.0 h1:GyT4nK/YDHSqa1c4753ouYCDajOYKTja9Xb/OHtgvSw= +golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.6.0 h1:3XmdazWV+ubf7QgHSTWeykHOci5oeekaGJBLkrkaw4k= +golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/header.go b/header.go new file mode 100644 index 0000000..d553945 --- /dev/null +++ b/header.go @@ -0,0 +1,22 @@ +package badtudexo + +import ( + xl "git.gabbott.dev/george/excelize/v2" + // st "saggytrousers" +) + + +// Return the header of a given file, which must be the first row. +func GetHeader(file *xl.File, sheet string) ([]string, error) { + rows, err := file.Rows(sheet) + if err != nil { + return []string{}, err + } + + rows.Next() + header, err := rows.Columns() + if err != nil { + return []string{}, err + } + return header, nil +} diff --git a/locate.go b/locate.go new file mode 100644 index 0000000..fc9b1b6 --- /dev/null +++ b/locate.go @@ -0,0 +1,33 @@ +package badtudexo + +import ( + st "saggytrousers" +) + +/* locate.go + * The function Locate takes a header, and attempts to locate where a given + * column is in this. For instance, Locate(header, "Unique ID") + * might return 1, as the Unique ID field is in second position. + */ + + +// Locates the index where the needle is found; returns -1 if not found. +func Locate(header []string, needle string) int { + return st.Locate(header, needle) +} + +// Returns the index where the needle is found, returns -1 if not found; +// matches exactly. +func LocateExact[T comparable](header []T, needle T) int { + return st.LocateExact(header, needle) +} + +// Locate the value, or ask where it is from the user. +func LocateOrAsk(header []string, needle, prompt string) int { + v := st.Locate(header, needle) + if v >= 0 { + return v + } + + return st.ChooseFromHeaderPrompt(header, prompt) +} diff --git a/missing.go b/missing.go new file mode 100644 index 0000000..92f92aa --- /dev/null +++ b/missing.go @@ -0,0 +1,37 @@ +package badtudexo + +import ( + st "saggytrousers" +) + +type MissingValue[T any] struct { + Value T + Index int +} + +func MissingValueNew[T any](value T, index int) MissingValue[T] { + return MissingValue[T]{ value, index } +} + +// Iterates through `check`, and returns all that are _not_ present in `src`, returning both the index in the array, and the value itself. +func Missing[T comparable](check, src []T) []MissingValue[T] { + var missing []MissingValue[T] + for i, v := range check { + if !st.InSlice(src, v) { + mv := MissingValueNew(v, i) + missing = append(missing, mv) + } + } + + return missing +} + +// Iterates through `src`, and returns whether all are present in `check`. +func Present[T comparable](check, src []T) bool { + for _, v := range src { + if !st.InSlice(check, v) { + return false + } + } + return true +} diff --git a/split.go b/split.go new file mode 100644 index 0000000..bfcf474 --- /dev/null +++ b/split.go @@ -0,0 +1,114 @@ +package badtudexo + +import ( + "fmt" + "strings" + "os" + xl "git.gabbott.dev/george/excelize/v2" + st "saggytrousers" +) + +type SplitAlgorithm int +const ( + IndivAlgorithm SplitAlgorithm = iota + 1 + GroupAlgorithm +) + +const SaveDirectory string = "bt-split" + +// rows [][]any must NOT include the header. +func indivSplit(rows [][]any, header []string, index int, dir, fn string) { + st.Log(false, "bt.indivSplit: index (%v), dir (%v), fn (%v).", index, dir, fn) + var uniq []any + saveDir := fmt.Sprintf("%s/%s", dir, SaveDirectory) + st.Log(false, "bt.indivSplit: saveDir = %v\n", saveDir) + os.Mkdir(saveDir, 0777) + + // First pass: get all the unique values. + for _, row := range rows { + v := row[index] + if !st.InSliceAny(uniq, v) { + uniq = append(uniq, v) + } + } + + // Rest of the passes: for each unique value, grab them and chuck all + // entries matching it to a file, then save it. + count, total := 0, 0 + for _, v := range uniq { + //if (n == 0) { /* change for _, ... to: for n, ... if using + // // Skip the first one, as it is the header. + // continue + //} + pos := 1 + suffix := deriveSplitSuffix(v) + filename := st.CreateFilepath(dir, SaveDirectory, fn, suffix) + sheetname := st.MakeValidSheetName(st.CreateFilename(fn, suffix)) + + + // Create the new file, and append the header to it. + file := xl.NewFile() + file.SetSheetName("Sheet1", sheetname) + st.AppendToFile(file, sheetname, header, pos) + pos++ + + // Now add each matching row to the file as well. + for _, row := range rows { + sv := row[index] + if sv == v { + st.AppendToFile(file, sheetname, row, pos) + pos++ + } + } + + // Finally, save the file, close it, and we can do it all again + // for the next unique value. + filename = fmt.Sprintf("%s.xlsx", filename) + if err := file.SaveAs(filename); err != nil { + st.ErrLog(true, "Unable to save [%s]\n", filename) + } else { + st.Log(true, "Saved [%s]\n", filename) + count++ + } + total++ + } + st.Log(true, "Saved %v of %v files successfully.\n", count, total) +} + +func deriveSplitSuffix(str any) string { + s:= fmt.Sprintf("%v", str) + if strings.HasSuffix(s, "UTC") { + return s[0:10] + } + + return s +} + +// func groupSplit(rows [][]string, header []string, index int, dir, fn string) map[string]*xl.File { +// } +// +// func groupSave(m map[string]*xl.File) { +// } + + + +// Takes rows, and splits on the given index. Then saves each of these splits +// as a separate file at the specified location and with the specified name. +// rows must NOT include the header. +func Split(rows [][]any, header []string, index int, + dir, filename string, + algo SplitAlgorithm) { + + switch algo { + case IndivAlgorithm: + st.Log(false, "badtudexo.Split: algorithm = IndivAlgorithm\n") + indivSplit(rows, header, index, dir, filename) + // case GroupAlgorithm: + // // TODO: implement these again. + // m := groupSplit(rows, header, index, dir, filename) + // groupSave(m) + default: + st.ErrLog(true, "badtudexo.Split received an invalid algorithm.\n") + os.Exit(-1) + } +} |