diff options
author | George Abbott <george@gabbott.dev> | 2023-11-03 19:07:26 +0000 |
---|---|---|
committer | George Abbott <george@gabbott.dev> | 2023-11-03 19:07:26 +0000 |
commit | 4d5183b967b149a5aa4f5238abe022ef1d5ee12a (patch) | |
tree | 2ae072fabc068ae5be72bfc9502c9a266f3a34ef |
-rw-r--r-- | go.mod | 27 | ||||
-rw-r--r-- | go.sum | 63 | ||||
-rw-r--r-- | helpusage.go | 32 | ||||
-rw-r--r-- | main.go | 147 |
4 files changed, 269 insertions, 0 deletions
@@ -0,0 +1,27 @@ +module bt-split + +go 1.19 + +require ( + badtudexo v0.0.0-00010101000000-000000000000 + git.gabbott.dev/george/excelize/v2 v2.99.1 + saggytrousers v0.0.0-00010101000000-000000000000 +) + +require ( + anyxcelize v0.0.0-00010101000000-000000000000 // indirect + github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect + github.com/richardlehane/mscfb v1.0.4 // indirect + github.com/richardlehane/msoleps v1.0.3 // indirect + github.com/xuri/efp v0.0.0-20220603152613-6918739fd470 // indirect + github.com/xuri/nfp v0.0.0-20220409054826-5e722a1d9e22 // indirect + golang.org/x/crypto v0.5.0 // indirect + golang.org/x/net v0.5.0 // indirect + golang.org/x/text v0.6.0 // indirect +) + +replace saggytrousers => ../saggytrousers + +replace anyxcelize => ../../anyxcelize + +replace badtudexo => ../badtudexo @@ -0,0 +1,63 @@ +git.gabbott.dev/george/excelize/v2 v2.99.1 h1:pencpuy3D+7TgEV+BraIj5SUSkcErop3Pes05CDp71M= +git.gabbott.dev/george/excelize/v2 v2.99.1/go.mod h1:JqW30EL316o52Ya+R3++QL7oW13PBwAqYVbSprqTVz4= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/richardlehane/mscfb v1.0.4 h1:WULscsljNPConisD5hR0+OyZjwK46Pfyr6mPu5ZawpM= +github.com/richardlehane/mscfb v1.0.4/go.mod h1:YzVpcZg9czvAuhk9T+a3avCpcFPMUWm7gK3DypaEsUk= +github.com/richardlehane/msoleps v1.0.1/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= +github.com/richardlehane/msoleps v1.0.3 h1:aznSZzrwYRl3rLKRT3gUk9am7T/mLNSnJINvN0AQoVM= +github.com/richardlehane/msoleps v1.0.3/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/xuri/efp v0.0.0-20220603152613-6918739fd470 h1:6932x8ltq1w4utjmfMPVj09jdMlkY0aiA6+Skbtl3/c= +github.com/xuri/efp v0.0.0-20220603152613-6918739fd470/go.mod h1:ybY/Jr0T0GTCnYjKqmdwxyxn2BQf2RcQIIvex5QldPI= +github.com/xuri/nfp v0.0.0-20220409054826-5e722a1d9e22 h1:OAmKAfT06//esDdpi/DZ8Qsdt4+M5+ltca05dA5bG2M= +github.com/xuri/nfp v0.0.0-20220409054826-5e722a1d9e22/go.mod h1:WwHg+CVyzlv/TX9xqBFXEZAuxOPxn2k1GNHwG41IIUQ= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.2.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= +golang.org/x/crypto v0.5.0 h1:U/0M97KRkSFvyD/3FSmdP5W5swImpNgle/EHFhOsQPE= +golang.org/x/crypto v0.5.0/go.mod h1:NK/OQwhpMQP3MwtdjgLlYHnH9ebylxKWv3e0fK+mkQU= +golang.org/x/image v0.0.0-20220902085622-e7cb96979f69 h1:Lj6HJGCSn5AjxRAH2+r35Mir4icalbqku+CLUtjnvXY= +golang.org/x/image v0.0.0-20220902085622-e7cb96979f69/go.mod h1:doUCurBvlfPMKfmIpRIywoHmhN3VyhnoFDbvIEWF4hY= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= +golang.org/x/net v0.5.0 h1:GyT4nK/YDHSqa1c4753ouYCDajOYKTja9Xb/OHtgvSw= +golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.6.0 h1:3XmdazWV+ubf7QgHSTWeykHOci5oeekaGJBLkrkaw4k= +golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/helpusage.go b/helpusage.go new file mode 100644 index 0000000..ff25a2d --- /dev/null +++ b/helpusage.go @@ -0,0 +1,32 @@ +package main + +import ( + "fmt" + "os" +) + +func usage() { + fmt.Println("bt-split: an Excel file splitter") + fmt.Println("Pass in an excel file, and then follow the instructions.") + fmt.Println("The output is given in the bt-split/ subdirectory.\n") + fmt.Println("Flags:") + fmt.Println("--sheet-name, -s The name of the sheet to output.") + fmt.Println(" Can contain %s which is replaced") + fmt.Println(" with the data that is split on.") + fmt.Println("--output-sheet, -o The name of the sheet to output on.") + fmt.Println(" Defaults to same name as the file.") + fmt.Println("--output-dir, -d The name of the directory to output in.") + fmt.Println(" Defaults to BadTudexo.") + fmt.Println("--algorithm, -a Select the algorithm to use.") + fmt.Println(" Valid options: indiv,group.") + fmt.Println(" The default is: indiv.") + fmt.Println("--algo-info More info about the algoriths.") + + os.Exit(0) + +} + +func version() { + fmt.Printf("bt-split: v0.0.1\n") + os.Exit(0) +} @@ -0,0 +1,147 @@ +package main + +/* bt-split: split an Excel file into separate files based on the value of a + * given row. + * This script is not to deal with all cases, but the general one only. + * Assumptions: + * - The topmost row of the spreadsheet must be the header of the table. + */ + +/* Problems to resolve: + * + * - Make a better name for the sheet than "BadTudexo". Potentially make it an + * argument --sheet-name, -s that can be passed to supplant BadTudexo. + * + * - Add in an [indivmc] (indiv multi-core) which spins off a Goroutine for + * each iteration of the loop. Also, reimplement the [group] algorithm since + * I accidentally nuked it. + * + * - Potentially work to make the different functions use Go-routines. This + * will make them much faster as they are parallelised. Particularly indiv + * can do this well. + */ + +import ( + "fmt" + "os" + "time" + "strings" + "path/filepath" + xl "anyxcelize" + // "git.gabbott.dev/george/excelize/v2" + st "saggytrousers" + bt "badtudexo" +) + +// Uninitialized Globals +var sheet string /* As passed in with --sheet-name */ +var outDir string /* The directory to write resultant files */ +var outfileName string /* The name of the output file, with %s to replace */ +var idxSplitOn int /* The index in header, rows of the col to split on */ +var header []string +var rows [][]string + +// Flags and values from argparsing +// var fNoVerbose bool = false +// var altSheetName string /* If set, this is used instead of SheetName. */ +var filename string /* The filename passed as an argument to this. */ +var algorithm bt.SplitAlgorithm = bt.IndivAlgorithm + + +func parseArgs(args []string) { + if len(args) < 2 { + fmt.Println("Pass either a filename, or --help or --version.") + os.Exit(-1) + } + + for i := 0; i < len(args); i++ { + arg := args[i] + switch arg { + case "--help", "help", "-h": + usage() + case "--version", "-v": + version() +// case "--output-filename", "-f": +// case "--output-sheetname", "-o": + case "--sheet-name", "-s": + i++ + sheet = args[i] + case "--algorithm", "-a": + i++ + alg := args[i] + switch alg { + case "indiv": + algorithm = bt.IndivAlgorithm + case "group": + algorithm = bt.GroupAlgorithm +// case "indivmc": +// algorithm = bt.IndivMcAlgorithm + default: + st.Log(true, "Invalid algorithm passed to --algorithm.\n") + os.Exit(-1) + } + default: + filename = arg + } + } +} + + +func main() { + st.LogInit(fmt.Sprintf("%v/.bt-split-log", os.Getenv("HOME")), "bt-split") + wd, wderr := os.Getwd() + st.Log(false, "\nMetadata: \n\tPID: %v\n\tPPID: %v\n\tCWD: %v (err: %v)\n\tEnviron: %v\n", + os.Getpid(), os.Getppid(), wd, wderr, os.Environ(), + ) + beginning := time.Now() + args := os.Args + parseArgs(args) + + file, err := xl.OpenFile(filename) + if err != nil { + st.ErrLog(true, "Cannot open file [%s], err [%v].\n", filename, err) + return + } + + // Get the directory of the input file (basename) which is where the + // output shall be put. + // NOTE: potentially does not work on Windows? + outDir = filepath.Dir(filename) + + // TODO: add in a check here, or in a version of the getString + // function, that ensures the output has a %s contained. Otherwise all + // the filenames will be the same. + prompt := "Enter the name of the output file, with %s = the element to split by: " + good := false + for !good { + outfileName = st.GetString(prompt) + if strings.Contains(outfileName, "%s") { good = true } + } + + + + /* Ask user to select the sheet */ + sheet = st.SelectSheet(file, sheet) + + /* Get the header, and ask the user to select the column to split on */ + header := st.GetHeader(file, sheet) + idxSplitOn := st.ChooseFromHeaderPrompt(header, "Please choose the column to split the Excel file on: ") + + /* Now load all rows in to actually start processing. */ + st.Log(true, "Loading all rows. This might take a little while if the file is big.\n") + loadStart := time.Now() + rows := st.GetRows(file, sheet) // panics if fails, which is ok + loadTime := time.Since(loadStart) + + // Do the splits. + start := time.Now() + + bt.Split(rows, header, idxSplitOn, outDir, outfileName, algorithm) + + st.Log(true, fmt.Sprintf("Loading the file took: %v\n", loadTime)) + st.Log(true, fmt.Sprintf("Splitting&Saving took: %v\n", time.Since(start))) + st.Log(true, fmt.Sprintf("Total time elapsed: %v\n", time.Since(loadStart))) + st.Log(true, fmt.Sprintf("From beginning to end: %v\n", time.Since(beginning))) + st.Log(false, "PID %v is terminating.\n", os.Getpid()) + st.LogFree() +} |