-
Notifications
You must be signed in to change notification settings - Fork 7
/
data_gen.sh
executable file
·47 lines (36 loc) · 977 Bytes
/
data_gen.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# Generate test data on the large server
# You will need to have the tpcds dsdgen program built in the current directory
# Usage: ./data_gen.sh $CPU $SCALE
PROJECT=scale_$2
GCS_BUCKET=data_$PROJECT
DATA_DIR=~/$GCS_BUCKET
# Delete existing data dir
rm -rf $DATA_DIR
# Create mount point
mkdir $DATA_DIR
# Remove existing bucket
gsutil rb -f gs://$GCS_BUCKET
# Create bucket
gsutil mb -c standard -l us-east1 gs://$GCS_BUCKET
# Mount GCS bucket to VM
gcsfuse $GCS_BUCKET $DATA_DIR
gen() {
CPU=$1
SCALE=$2
SEED=2019
seq 1 $CPU \
| xargs -t -P$CPU -I__ \
./dsdgen \
-SCALE $SCALE \
-DELIMITER \| \
-PARALLEL $CPU \
-CHILD __ \
-TERMINATE N \
-RNGSEED $SEED \
-DIR $DATA_DIR
}
gen $1 $2
# Rename to help with 'wildcard' loading
cd $DATA_DIR
for f in customer_[0-9]*_*; do mv $f prefix_fix_${f#file_[0-9]*_}; done
for f in store_[0-9]*_*; do mv $f prefix_fix_${f#file_[0-9]*_}; done