diff options
Diffstat (limited to 'main.go')
-rw-r--r-- | main.go | 147 |
1 files changed, 147 insertions, 0 deletions
@@ -0,0 +1,147 @@ +package main + +/* bt-split: split an Excel file into separate files based on the value of a + * given row. + * This script is not to deal with all cases, but the general one only. + * Assumptions: + * - The topmost row of the spreadsheet must be the header of the table. + */ + +/* Problems to resolve: + * + * - Make a better name for the sheet than "BadTudexo". Potentially make it an + * argument --sheet-name, -s that can be passed to supplant BadTudexo. + * + * - Add in an [indivmc] (indiv multi-core) which spins off a Goroutine for + * each iteration of the loop. Also, reimplement the [group] algorithm since + * I accidentally nuked it. + * + * - Potentially work to make the different functions use Go-routines. This + * will make them much faster as they are parallelised. Particularly indiv + * can do this well. + */ + +import ( + "fmt" + "os" + "time" + "strings" + "path/filepath" + xl "anyxcelize" + // "git.gabbott.dev/george/excelize/v2" + st "saggytrousers" + bt "badtudexo" +) + +// Uninitialized Globals +var sheet string /* As passed in with --sheet-name */ +var outDir string /* The directory to write resultant files */ +var outfileName string /* The name of the output file, with %s to replace */ +var idxSplitOn int /* The index in header, rows of the col to split on */ +var header []string +var rows [][]string + +// Flags and values from argparsing +// var fNoVerbose bool = false +// var altSheetName string /* If set, this is used instead of SheetName. */ +var filename string /* The filename passed as an argument to this. */ +var algorithm bt.SplitAlgorithm = bt.IndivAlgorithm + + +func parseArgs(args []string) { + if len(args) < 2 { + fmt.Println("Pass either a filename, or --help or --version.") + os.Exit(-1) + } + + for i := 0; i < len(args); i++ { + arg := args[i] + switch arg { + case "--help", "help", "-h": + usage() + case "--version", "-v": + version() +// case "--output-filename", "-f": +// case "--output-sheetname", "-o": + case "--sheet-name", "-s": + i++ + sheet = args[i] + case "--algorithm", "-a": + i++ + alg := args[i] + switch alg { + case "indiv": + algorithm = bt.IndivAlgorithm + case "group": + algorithm = bt.GroupAlgorithm +// case "indivmc": +// algorithm = bt.IndivMcAlgorithm + default: + st.Log(true, "Invalid algorithm passed to --algorithm.\n") + os.Exit(-1) + } + default: + filename = arg + } + } +} + + +func main() { + st.LogInit(fmt.Sprintf("%v/.bt-split-log", os.Getenv("HOME")), "bt-split") + wd, wderr := os.Getwd() + st.Log(false, "\nMetadata: \n\tPID: %v\n\tPPID: %v\n\tCWD: %v (err: %v)\n\tEnviron: %v\n", + os.Getpid(), os.Getppid(), wd, wderr, os.Environ(), + ) + beginning := time.Now() + args := os.Args + parseArgs(args) + + file, err := xl.OpenFile(filename) + if err != nil { + st.ErrLog(true, "Cannot open file [%s], err [%v].\n", filename, err) + return + } + + // Get the directory of the input file (basename) which is where the + // output shall be put. + // NOTE: potentially does not work on Windows? + outDir = filepath.Dir(filename) + + // TODO: add in a check here, or in a version of the getString + // function, that ensures the output has a %s contained. Otherwise all + // the filenames will be the same. + prompt := "Enter the name of the output file, with %s = the element to split by: " + good := false + for !good { + outfileName = st.GetString(prompt) + if strings.Contains(outfileName, "%s") { good = true } + } + + + + /* Ask user to select the sheet */ + sheet = st.SelectSheet(file, sheet) + + /* Get the header, and ask the user to select the column to split on */ + header := st.GetHeader(file, sheet) + idxSplitOn := st.ChooseFromHeaderPrompt(header, "Please choose the column to split the Excel file on: ") + + /* Now load all rows in to actually start processing. */ + st.Log(true, "Loading all rows. This might take a little while if the file is big.\n") + loadStart := time.Now() + rows := st.GetRows(file, sheet) // panics if fails, which is ok + loadTime := time.Since(loadStart) + + // Do the splits. + start := time.Now() + + bt.Split(rows, header, idxSplitOn, outDir, outfileName, algorithm) + + st.Log(true, fmt.Sprintf("Loading the file took: %v\n", loadTime)) + st.Log(true, fmt.Sprintf("Splitting&Saving took: %v\n", time.Since(start))) + st.Log(true, fmt.Sprintf("Total time elapsed: %v\n", time.Since(loadStart))) + st.Log(true, fmt.Sprintf("From beginning to end: %v\n", time.Since(beginning))) + st.Log(false, "PID %v is terminating.\n", os.Getpid()) + st.LogFree() +} |