1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
|
package main
/* bt-split: split an Excel file into separate files based on the value of a
* given row.
* This script is not to deal with all cases, but the general one only.
* Assumptions:
* - The topmost row of the spreadsheet must be the header of the table.
*/
/* Problems to resolve:
*
* - Make a better name for the sheet than "BadTudexo". Potentially make it an
* argument --sheet-name, -s that can be passed to supplant BadTudexo.
*
* - Add in an [indivmc] (indiv multi-core) which spins off a Goroutine for
* each iteration of the loop. Also, reimplement the [group] algorithm since
* I accidentally nuked it.
*
* - Potentially work to make the different functions use Go-routines. This
* will make them much faster as they are parallelised. Particularly indiv
* can do this well.
*/
import (
"fmt"
"os"
"time"
"strings"
"path/filepath"
xl "anyxcelize"
// "git.gabbott.dev/george/excelize/v2"
st "saggytrousers"
bt "badtudexo"
)
// Uninitialized Globals
var sheet string /* As passed in with --sheet-name */
var outDir string /* The directory to write resultant files */
var outfileName string /* The name of the output file, with %s to replace */
var idxSplitOn int /* The index in header, rows of the col to split on */
var header []string
var rows [][]string
// Flags and values from argparsing
// var fNoVerbose bool = false
// var altSheetName string /* If set, this is used instead of SheetName. */
var filename string /* The filename passed as an argument to this. */
var algorithm bt.SplitAlgorithm = bt.IndivAlgorithm
func parseArgs(args []string) {
if len(args) < 2 {
fmt.Println("Pass either a filename, or --help or --version.")
os.Exit(-1)
}
for i := 0; i < len(args); i++ {
arg := args[i]
switch arg {
case "--help", "help", "-h":
usage()
case "--version", "-v":
version()
// case "--output-filename", "-f":
// case "--output-sheetname", "-o":
case "--sheet-name", "-s":
i++
sheet = args[i]
case "--algorithm", "-a":
i++
alg := args[i]
switch alg {
case "indiv":
algorithm = bt.IndivAlgorithm
case "group":
algorithm = bt.GroupAlgorithm
// case "indivmc":
// algorithm = bt.IndivMcAlgorithm
default:
st.Log(true, "Invalid algorithm passed to --algorithm.\n")
os.Exit(-1)
}
default:
filename = arg
}
}
}
func main() {
st.LogInit(fmt.Sprintf("%v/.bt-split-log", os.Getenv("HOME")), "bt-split")
wd, wderr := os.Getwd()
st.Log(false, "\nMetadata: \n\tPID: %v\n\tPPID: %v\n\tCWD: %v (err: %v)\n\tEnviron: %v\n",
os.Getpid(), os.Getppid(), wd, wderr, os.Environ(),
)
beginning := time.Now()
args := os.Args
parseArgs(args)
file, err := xl.OpenFile(filename)
if err != nil {
st.ErrLog(true, "Cannot open file [%s], err [%v].\n", filename, err)
return
}
// Get the directory of the input file (basename) which is where the
// output shall be put.
// NOTE: potentially does not work on Windows?
outDir = filepath.Dir(filename)
// TODO: add in a check here, or in a version of the getString
// function, that ensures the output has a %s contained. Otherwise all
// the filenames will be the same.
prompt := "Enter the name of the output file, with %s = the element to split by: "
good := false
for !good {
outfileName = st.GetString(prompt)
if strings.Contains(outfileName, "%s") { good = true }
}
/* Ask user to select the sheet */
sheet = st.SelectSheet(file, sheet)
/* Get the header, and ask the user to select the column to split on */
header := st.GetHeader(file, sheet)
idxSplitOn := st.ChooseFromHeaderPrompt(header, "Please choose the column to split the Excel file on: ")
/* Now load all rows in to actually start processing. */
st.Log(true, "Loading all rows. This might take a little while if the file is big.\n")
loadStart := time.Now()
rows := st.GetRows(file, sheet) // panics if fails, which is ok
loadTime := time.Since(loadStart)
// Do the splits.
start := time.Now()
bt.Split(rows, header, idxSplitOn, outDir, outfileName, algorithm)
st.Log(true, fmt.Sprintf("Loading the file took: %v\n", loadTime))
st.Log(true, fmt.Sprintf("Splitting&Saving took: %v\n", time.Since(start)))
st.Log(true, fmt.Sprintf("Total time elapsed: %v\n", time.Since(loadStart)))
st.Log(true, fmt.Sprintf("From beginning to end: %v\n", time.Since(beginning)))
st.Log(false, "PID %v is terminating.\n", os.Getpid())
st.LogFree()
}
|