package main /* bt-split: split an Excel file into separate files based on the value of a * given row. * This script is not to deal with all cases, but the general one only. * Assumptions: * - The topmost row of the spreadsheet must be the header of the table. */ /* Problems to resolve: * * - Make a better name for the sheet than "BadTudexo". Potentially make it an * argument --sheet-name, -s that can be passed to supplant BadTudexo. * * - Add in an [indivmc] (indiv multi-core) which spins off a Goroutine for * each iteration of the loop. Also, reimplement the [group] algorithm since * I accidentally nuked it. * * - Potentially work to make the different functions use Go-routines. This * will make them much faster as they are parallelised. Particularly indiv * can do this well. */ import ( "fmt" "os" "time" "strings" "path/filepath" xl "anyxcelize" // "git.gabbott.dev/george/excelize/v2" st "saggytrousers" bt "badtudexo" ) // Uninitialized Globals var sheet string /* As passed in with --sheet-name */ var outDir string /* The directory to write resultant files */ var outfileName string /* The name of the output file, with %s to replace */ var idxSplitOn int /* The index in header, rows of the col to split on */ var header []string var rows [][]string // Flags and values from argparsing // var fNoVerbose bool = false // var altSheetName string /* If set, this is used instead of SheetName. */ var filename string /* The filename passed as an argument to this. */ var algorithm bt.SplitAlgorithm = bt.IndivAlgorithm func parseArgs(args []string) { if len(args) < 2 { fmt.Println("Pass either a filename, or --help or --version.") os.Exit(-1) } for i := 0; i < len(args); i++ { arg := args[i] switch arg { case "--help", "help", "-h": usage() case "--version", "-v": version() // case "--output-filename", "-f": // case "--output-sheetname", "-o": case "--sheet-name", "-s": i++ sheet = args[i] case "--algorithm", "-a": i++ alg := args[i] switch alg { case "indiv": algorithm = bt.IndivAlgorithm case "group": algorithm = bt.GroupAlgorithm // case "indivmc": // algorithm = bt.IndivMcAlgorithm default: st.Log(true, "Invalid algorithm passed to --algorithm.\n") os.Exit(-1) } default: filename = arg } } } func main() { st.LogInit(fmt.Sprintf("%v/.bt-split-log", os.Getenv("HOME")), "bt-split") wd, wderr := os.Getwd() st.Log(false, "\nMetadata: \n\tPID: %v\n\tPPID: %v\n\tCWD: %v (err: %v)\n\tEnviron: %v\n", os.Getpid(), os.Getppid(), wd, wderr, os.Environ(), ) beginning := time.Now() args := os.Args parseArgs(args) file, err := xl.OpenFile(filename) if err != nil { st.ErrLog(true, "Cannot open file [%s], err [%v].\n", filename, err) return } // Get the directory of the input file (basename) which is where the // output shall be put. // NOTE: potentially does not work on Windows? outDir = filepath.Dir(filename) // TODO: add in a check here, or in a version of the getString // function, that ensures the output has a %s contained. Otherwise all // the filenames will be the same. prompt := "Enter the name of the output file, with %s = the element to split by: " good := false for !good { outfileName = st.GetString(prompt) if strings.Contains(outfileName, "%s") { good = true } } /* Ask user to select the sheet */ sheet = st.SelectSheet(file, sheet) /* Get the header, and ask the user to select the column to split on */ header := st.GetHeader(file, sheet) idxSplitOn := st.ChooseFromHeaderPrompt(header, "Please choose the column to split the Excel file on: ") /* Now load all rows in to actually start processing. */ st.Log(true, "Loading all rows. This might take a little while if the file is big.\n") loadStart := time.Now() rows := st.GetRows(file, sheet) // panics if fails, which is ok loadTime := time.Since(loadStart) // Do the splits. start := time.Now() bt.Split(rows, header, idxSplitOn, outDir, outfileName, algorithm) st.Log(true, fmt.Sprintf("Loading the file took: %v\n", loadTime)) st.Log(true, fmt.Sprintf("Splitting&Saving took: %v\n", time.Since(start))) st.Log(true, fmt.Sprintf("Total time elapsed: %v\n", time.Since(loadStart))) st.Log(true, fmt.Sprintf("From beginning to end: %v\n", time.Since(beginning))) st.Log(false, "PID %v is terminating.\n", os.Getpid()) st.LogFree() }