summaryrefslogtreecommitdiff
path: root/main.go
diff options
context:
space:
mode:
Diffstat (limited to 'main.go')
-rw-r--r--main.go147
1 files changed, 147 insertions, 0 deletions
diff --git a/main.go b/main.go
new file mode 100644
index 0000000..dd310d1
--- /dev/null
+++ b/main.go
@@ -0,0 +1,147 @@
+package main
+
+/* bt-split: split an Excel file into separate files based on the value of a
+ * given row.
+ * This script is not to deal with all cases, but the general one only.
+ * Assumptions:
+ * - The topmost row of the spreadsheet must be the header of the table.
+ */
+
+/* Problems to resolve:
+ *
+ * - Make a better name for the sheet than "BadTudexo". Potentially make it an
+ * argument --sheet-name, -s that can be passed to supplant BadTudexo.
+ *
+ * - Add in an [indivmc] (indiv multi-core) which spins off a Goroutine for
+ * each iteration of the loop. Also, reimplement the [group] algorithm since
+ * I accidentally nuked it.
+ *
+ * - Potentially work to make the different functions use Go-routines. This
+ * will make them much faster as they are parallelised. Particularly indiv
+ * can do this well.
+ */
+
+import (
+ "fmt"
+ "os"
+ "time"
+ "strings"
+ "path/filepath"
+ xl "anyxcelize"
+ // "git.gabbott.dev/george/excelize/v2"
+ st "saggytrousers"
+ bt "badtudexo"
+)
+
+// Uninitialized Globals
+var sheet string /* As passed in with --sheet-name */
+var outDir string /* The directory to write resultant files */
+var outfileName string /* The name of the output file, with %s to replace */
+var idxSplitOn int /* The index in header, rows of the col to split on */
+var header []string
+var rows [][]string
+
+// Flags and values from argparsing
+// var fNoVerbose bool = false
+// var altSheetName string /* If set, this is used instead of SheetName. */
+var filename string /* The filename passed as an argument to this. */
+var algorithm bt.SplitAlgorithm = bt.IndivAlgorithm
+
+
+func parseArgs(args []string) {
+ if len(args) < 2 {
+ fmt.Println("Pass either a filename, or --help or --version.")
+ os.Exit(-1)
+ }
+
+ for i := 0; i < len(args); i++ {
+ arg := args[i]
+ switch arg {
+ case "--help", "help", "-h":
+ usage()
+ case "--version", "-v":
+ version()
+// case "--output-filename", "-f":
+// case "--output-sheetname", "-o":
+ case "--sheet-name", "-s":
+ i++
+ sheet = args[i]
+ case "--algorithm", "-a":
+ i++
+ alg := args[i]
+ switch alg {
+ case "indiv":
+ algorithm = bt.IndivAlgorithm
+ case "group":
+ algorithm = bt.GroupAlgorithm
+// case "indivmc":
+// algorithm = bt.IndivMcAlgorithm
+ default:
+ st.Log(true, "Invalid algorithm passed to --algorithm.\n")
+ os.Exit(-1)
+ }
+ default:
+ filename = arg
+ }
+ }
+}
+
+
+func main() {
+ st.LogInit(fmt.Sprintf("%v/.bt-split-log", os.Getenv("HOME")), "bt-split")
+ wd, wderr := os.Getwd()
+ st.Log(false, "\nMetadata: \n\tPID: %v\n\tPPID: %v\n\tCWD: %v (err: %v)\n\tEnviron: %v\n",
+ os.Getpid(), os.Getppid(), wd, wderr, os.Environ(),
+ )
+ beginning := time.Now()
+ args := os.Args
+ parseArgs(args)
+
+ file, err := xl.OpenFile(filename)
+ if err != nil {
+ st.ErrLog(true, "Cannot open file [%s], err [%v].\n", filename, err)
+ return
+ }
+
+ // Get the directory of the input file (basename) which is where the
+ // output shall be put.
+ // NOTE: potentially does not work on Windows?
+ outDir = filepath.Dir(filename)
+
+ // TODO: add in a check here, or in a version of the getString
+ // function, that ensures the output has a %s contained. Otherwise all
+ // the filenames will be the same.
+ prompt := "Enter the name of the output file, with %s = the element to split by: "
+ good := false
+ for !good {
+ outfileName = st.GetString(prompt)
+ if strings.Contains(outfileName, "%s") { good = true }
+ }
+
+
+
+ /* Ask user to select the sheet */
+ sheet = st.SelectSheet(file, sheet)
+
+ /* Get the header, and ask the user to select the column to split on */
+ header := st.GetHeader(file, sheet)
+ idxSplitOn := st.ChooseFromHeaderPrompt(header, "Please choose the column to split the Excel file on: ")
+
+ /* Now load all rows in to actually start processing. */
+ st.Log(true, "Loading all rows. This might take a little while if the file is big.\n")
+ loadStart := time.Now()
+ rows := st.GetRows(file, sheet) // panics if fails, which is ok
+ loadTime := time.Since(loadStart)
+
+ // Do the splits.
+ start := time.Now()
+
+ bt.Split(rows, header, idxSplitOn, outDir, outfileName, algorithm)
+
+ st.Log(true, fmt.Sprintf("Loading the file took: %v\n", loadTime))
+ st.Log(true, fmt.Sprintf("Splitting&Saving took: %v\n", time.Since(start)))
+ st.Log(true, fmt.Sprintf("Total time elapsed: %v\n", time.Since(loadStart)))
+ st.Log(true, fmt.Sprintf("From beginning to end: %v\n", time.Since(beginning)))
+ st.Log(false, "PID %v is terminating.\n", os.Getpid())
+ st.LogFree()
+}