summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorge Abbott <george@gabbott.dev>2023-11-03 19:07:26 +0000
committerGeorge Abbott <george@gabbott.dev>2023-11-03 19:07:26 +0000
commit4d5183b967b149a5aa4f5238abe022ef1d5ee12a (patch)
tree2ae072fabc068ae5be72bfc9502c9a266f3a34ef
Commit allHEADmaster
-rw-r--r--go.mod27
-rw-r--r--go.sum63
-rw-r--r--helpusage.go32
-rw-r--r--main.go147
4 files changed, 269 insertions, 0 deletions
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..23d2421
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,27 @@
+module bt-split
+
+go 1.19
+
+require (
+ badtudexo v0.0.0-00010101000000-000000000000
+ git.gabbott.dev/george/excelize/v2 v2.99.1
+ saggytrousers v0.0.0-00010101000000-000000000000
+)
+
+require (
+ anyxcelize v0.0.0-00010101000000-000000000000 // indirect
+ github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect
+ github.com/richardlehane/mscfb v1.0.4 // indirect
+ github.com/richardlehane/msoleps v1.0.3 // indirect
+ github.com/xuri/efp v0.0.0-20220603152613-6918739fd470 // indirect
+ github.com/xuri/nfp v0.0.0-20220409054826-5e722a1d9e22 // indirect
+ golang.org/x/crypto v0.5.0 // indirect
+ golang.org/x/net v0.5.0 // indirect
+ golang.org/x/text v0.6.0 // indirect
+)
+
+replace saggytrousers => ../saggytrousers
+
+replace anyxcelize => ../../anyxcelize
+
+replace badtudexo => ../badtudexo
diff --git a/go.sum b/go.sum
new file mode 100644
index 0000000..22585c7
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,63 @@
+git.gabbott.dev/george/excelize/v2 v2.99.1 h1:pencpuy3D+7TgEV+BraIj5SUSkcErop3Pes05CDp71M=
+git.gabbott.dev/george/excelize/v2 v2.99.1/go.mod h1:JqW30EL316o52Ya+R3++QL7oW13PBwAqYVbSprqTVz4=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw=
+github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/richardlehane/mscfb v1.0.4 h1:WULscsljNPConisD5hR0+OyZjwK46Pfyr6mPu5ZawpM=
+github.com/richardlehane/mscfb v1.0.4/go.mod h1:YzVpcZg9czvAuhk9T+a3avCpcFPMUWm7gK3DypaEsUk=
+github.com/richardlehane/msoleps v1.0.1/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg=
+github.com/richardlehane/msoleps v1.0.3 h1:aznSZzrwYRl3rLKRT3gUk9am7T/mLNSnJINvN0AQoVM=
+github.com/richardlehane/msoleps v1.0.3/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/xuri/efp v0.0.0-20220603152613-6918739fd470 h1:6932x8ltq1w4utjmfMPVj09jdMlkY0aiA6+Skbtl3/c=
+github.com/xuri/efp v0.0.0-20220603152613-6918739fd470/go.mod h1:ybY/Jr0T0GTCnYjKqmdwxyxn2BQf2RcQIIvex5QldPI=
+github.com/xuri/nfp v0.0.0-20220409054826-5e722a1d9e22 h1:OAmKAfT06//esDdpi/DZ8Qsdt4+M5+ltca05dA5bG2M=
+github.com/xuri/nfp v0.0.0-20220409054826-5e722a1d9e22/go.mod h1:WwHg+CVyzlv/TX9xqBFXEZAuxOPxn2k1GNHwG41IIUQ=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.2.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
+golang.org/x/crypto v0.5.0 h1:U/0M97KRkSFvyD/3FSmdP5W5swImpNgle/EHFhOsQPE=
+golang.org/x/crypto v0.5.0/go.mod h1:NK/OQwhpMQP3MwtdjgLlYHnH9ebylxKWv3e0fK+mkQU=
+golang.org/x/image v0.0.0-20220902085622-e7cb96979f69 h1:Lj6HJGCSn5AjxRAH2+r35Mir4icalbqku+CLUtjnvXY=
+golang.org/x/image v0.0.0-20220902085622-e7cb96979f69/go.mod h1:doUCurBvlfPMKfmIpRIywoHmhN3VyhnoFDbvIEWF4hY=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
+golang.org/x/net v0.5.0 h1:GyT4nK/YDHSqa1c4753ouYCDajOYKTja9Xb/OHtgvSw=
+golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.6.0 h1:3XmdazWV+ubf7QgHSTWeykHOci5oeekaGJBLkrkaw4k=
+golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/helpusage.go b/helpusage.go
new file mode 100644
index 0000000..ff25a2d
--- /dev/null
+++ b/helpusage.go
@@ -0,0 +1,32 @@
+package main
+
+import (
+ "fmt"
+ "os"
+)
+
+func usage() {
+ fmt.Println("bt-split: an Excel file splitter")
+ fmt.Println("Pass in an excel file, and then follow the instructions.")
+ fmt.Println("The output is given in the bt-split/ subdirectory.\n")
+ fmt.Println("Flags:")
+ fmt.Println("--sheet-name, -s The name of the sheet to output.")
+ fmt.Println(" Can contain %s which is replaced")
+ fmt.Println(" with the data that is split on.")
+ fmt.Println("--output-sheet, -o The name of the sheet to output on.")
+ fmt.Println(" Defaults to same name as the file.")
+ fmt.Println("--output-dir, -d The name of the directory to output in.")
+ fmt.Println(" Defaults to BadTudexo.")
+ fmt.Println("--algorithm, -a Select the algorithm to use.")
+ fmt.Println(" Valid options: indiv,group.")
+ fmt.Println(" The default is: indiv.")
+ fmt.Println("--algo-info More info about the algoriths.")
+
+ os.Exit(0)
+
+}
+
+func version() {
+ fmt.Printf("bt-split: v0.0.1\n")
+ os.Exit(0)
+}
diff --git a/main.go b/main.go
new file mode 100644
index 0000000..dd310d1
--- /dev/null
+++ b/main.go
@@ -0,0 +1,147 @@
+package main
+
+/* bt-split: split an Excel file into separate files based on the value of a
+ * given row.
+ * This script is not to deal with all cases, but the general one only.
+ * Assumptions:
+ * - The topmost row of the spreadsheet must be the header of the table.
+ */
+
+/* Problems to resolve:
+ *
+ * - Make a better name for the sheet than "BadTudexo". Potentially make it an
+ * argument --sheet-name, -s that can be passed to supplant BadTudexo.
+ *
+ * - Add in an [indivmc] (indiv multi-core) which spins off a Goroutine for
+ * each iteration of the loop. Also, reimplement the [group] algorithm since
+ * I accidentally nuked it.
+ *
+ * - Potentially work to make the different functions use Go-routines. This
+ * will make them much faster as they are parallelised. Particularly indiv
+ * can do this well.
+ */
+
+import (
+ "fmt"
+ "os"
+ "time"
+ "strings"
+ "path/filepath"
+ xl "anyxcelize"
+ // "git.gabbott.dev/george/excelize/v2"
+ st "saggytrousers"
+ bt "badtudexo"
+)
+
+// Uninitialized Globals
+var sheet string /* As passed in with --sheet-name */
+var outDir string /* The directory to write resultant files */
+var outfileName string /* The name of the output file, with %s to replace */
+var idxSplitOn int /* The index in header, rows of the col to split on */
+var header []string
+var rows [][]string
+
+// Flags and values from argparsing
+// var fNoVerbose bool = false
+// var altSheetName string /* If set, this is used instead of SheetName. */
+var filename string /* The filename passed as an argument to this. */
+var algorithm bt.SplitAlgorithm = bt.IndivAlgorithm
+
+
+func parseArgs(args []string) {
+ if len(args) < 2 {
+ fmt.Println("Pass either a filename, or --help or --version.")
+ os.Exit(-1)
+ }
+
+ for i := 0; i < len(args); i++ {
+ arg := args[i]
+ switch arg {
+ case "--help", "help", "-h":
+ usage()
+ case "--version", "-v":
+ version()
+// case "--output-filename", "-f":
+// case "--output-sheetname", "-o":
+ case "--sheet-name", "-s":
+ i++
+ sheet = args[i]
+ case "--algorithm", "-a":
+ i++
+ alg := args[i]
+ switch alg {
+ case "indiv":
+ algorithm = bt.IndivAlgorithm
+ case "group":
+ algorithm = bt.GroupAlgorithm
+// case "indivmc":
+// algorithm = bt.IndivMcAlgorithm
+ default:
+ st.Log(true, "Invalid algorithm passed to --algorithm.\n")
+ os.Exit(-1)
+ }
+ default:
+ filename = arg
+ }
+ }
+}
+
+
+func main() {
+ st.LogInit(fmt.Sprintf("%v/.bt-split-log", os.Getenv("HOME")), "bt-split")
+ wd, wderr := os.Getwd()
+ st.Log(false, "\nMetadata: \n\tPID: %v\n\tPPID: %v\n\tCWD: %v (err: %v)\n\tEnviron: %v\n",
+ os.Getpid(), os.Getppid(), wd, wderr, os.Environ(),
+ )
+ beginning := time.Now()
+ args := os.Args
+ parseArgs(args)
+
+ file, err := xl.OpenFile(filename)
+ if err != nil {
+ st.ErrLog(true, "Cannot open file [%s], err [%v].\n", filename, err)
+ return
+ }
+
+ // Get the directory of the input file (basename) which is where the
+ // output shall be put.
+ // NOTE: potentially does not work on Windows?
+ outDir = filepath.Dir(filename)
+
+ // TODO: add in a check here, or in a version of the getString
+ // function, that ensures the output has a %s contained. Otherwise all
+ // the filenames will be the same.
+ prompt := "Enter the name of the output file, with %s = the element to split by: "
+ good := false
+ for !good {
+ outfileName = st.GetString(prompt)
+ if strings.Contains(outfileName, "%s") { good = true }
+ }
+
+
+
+ /* Ask user to select the sheet */
+ sheet = st.SelectSheet(file, sheet)
+
+ /* Get the header, and ask the user to select the column to split on */
+ header := st.GetHeader(file, sheet)
+ idxSplitOn := st.ChooseFromHeaderPrompt(header, "Please choose the column to split the Excel file on: ")
+
+ /* Now load all rows in to actually start processing. */
+ st.Log(true, "Loading all rows. This might take a little while if the file is big.\n")
+ loadStart := time.Now()
+ rows := st.GetRows(file, sheet) // panics if fails, which is ok
+ loadTime := time.Since(loadStart)
+
+ // Do the splits.
+ start := time.Now()
+
+ bt.Split(rows, header, idxSplitOn, outDir, outfileName, algorithm)
+
+ st.Log(true, fmt.Sprintf("Loading the file took: %v\n", loadTime))
+ st.Log(true, fmt.Sprintf("Splitting&Saving took: %v\n", time.Since(start)))
+ st.Log(true, fmt.Sprintf("Total time elapsed: %v\n", time.Since(loadStart)))
+ st.Log(true, fmt.Sprintf("From beginning to end: %v\n", time.Since(beginning)))
+ st.Log(false, "PID %v is terminating.\n", os.Getpid())
+ st.LogFree()
+}