1
1
#! /bin/bash
2
2
3
3
function usage {
4
- echo " Usage: tpcds-setup.sh scale_factor [temp_directory]"
5
- exit 1
4
+
5
+ echo " Usage: tpcds-setup.sh [--cli --server --port --tempdir ] scale_factor"
6
+ echo " This script will generate and optimize data for Hive server benchmark testing."
7
+ echo " "
8
+ echo -e " --cli\t\tCLI to use for Hive. Options are 'beeline' or 'hive'. Default is 'hive'."
9
+ echo " "
10
+ echo -e " --server\tOptional parameter when using beeline CLI. This is the server for the\n\t\tdatabase connection sring."
11
+ echo " "
12
+ echo -e " --port\tOptional parameter when using beeline CLI. This is the port that Hive is listening on."
13
+ echo " "
14
+ echo -e " --tempdir\tOptional parameter for data generation path."
15
+ echo " "
16
+ echo -e " scale_factor\tScale factor for data generation in GB."
17
+ exit 1
6
18
}
7
19
20
+ # Get options
21
+ while test $# -gt 0; do
22
+ case " $1 " in
23
+ -h|--help)
24
+ usage
25
+ exit 0
26
+ ;;
27
+ --cli)
28
+ shift
29
+ CLITYPE=" $1 "
30
+ shift
31
+ ;;
32
+ --server)
33
+ shift
34
+ SERVER=" $1 "
35
+ shift
36
+ ;;
37
+ --port)
38
+ shift
39
+ PORT=" $1 "
40
+ shift
41
+ ;;
42
+ --tempdir)
43
+ shift
44
+ DIR=" $1 "
45
+ shift
46
+ ;;
47
+ * )
48
+ SCALE=" $1 "
49
+ shift
50
+ ;;
51
+ esac
52
+ done
53
+
8
54
function runcommand {
9
55
if [ " X$DEBUG_SCRIPT " != " X" ]; then
10
56
$1
@@ -17,6 +63,20 @@ if [ ! -f tpcds-gen/target/tpcds-gen-1.0-SNAPSHOT.jar ]; then
17
63
echo " Please build the data generator with ./tpcds-build.sh first"
18
64
exit 1
19
65
fi
66
+
67
+ # if no CLI is supplied, default to hive
68
+ if [ " X$CLITYPE " == " X" ]; then
69
+ $CLITYPE =" hive"
70
+ fi
71
+
72
+ if [ " $CLITYPE " == " beeline" ]; then
73
+ if [ " X$SERVER " == " X" ] || [ " X$PORT " == " X" ]; then
74
+ echo " Server and port must be supplied if attempting to run beeline CLI"
75
+ usage
76
+ exit 1
77
+ fi
78
+ fi
79
+
20
80
which hive > /dev/null 2>&1
21
81
if [ $? -ne 0 ]; then
22
82
echo " Script must be run where Hive is installed"
27
87
DIMS=" date_dim time_dim item customer customer_demographics household_demographics customer_address store promotion warehouse ship_mode reason income_band call_center web_page catalog_page web_site"
28
88
FACTS=" store_sales store_returns web_sales web_returns catalog_sales catalog_returns inventory"
29
89
30
- # Get the parameters.
31
- SCALE=$1
32
- DIR=$2
33
90
if [ " X$BUCKET_DATA " != " X" ]; then
34
91
BUCKETS=13
35
92
RETURN_BUCKETS=13
@@ -70,7 +127,11 @@ hadoop fs -chmod -R 777 /${DIR}/${SCALE}
70
127
71
128
echo " TPC-DS text data generation complete."
72
129
73
- HIVE=" beeline -n hive -u 'jdbc:hive2://localhost:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2?tez.queue.name=default' "
130
+ if [ " $CLITYPE " == " beeline" ]; then
131
+ HIVE=" beeline -u jdbc:hive2://${SERVER} :${PORT} -i settings/load-flat.sql --hivevar DB=tpcds_text_${SCALE} --hivevar LOCATION=${DIR} /${SCALE} -f ddl-tpcds/text/alltables.sql"
132
+ else
133
+ HIVE=" hive -i settings/load-flat.sql -f ddl-tpcds/text/alltables.sql -d DB=tpcds_text_${SCALE} -d LOCATION=${DIR} /${SCALE} "
134
+ fi
74
135
75
136
# Create the text/flat tables as external tables. These will be later be converted to ORCFile.
76
137
echo " Loading text data into external tables."
@@ -98,24 +159,40 @@ REDUCERS=$((test ${SCALE} -gt ${MAX_REDUCERS} && echo ${MAX_REDUCERS}) || echo $
98
159
# Populate the smaller tables.
99
160
for t in ${DIMS}
100
161
do
101
- COMMAND="$HIVE -i settings/load-partitioned.sql -f ddl-tpcds/bin_partitioned/${t} .sql \
102
- --hivevar DB=tpcds_bin_partitioned_${FORMAT} _${SCALE} -- hivevar SOURCE= tpcds_text_${SCALE} \
162
+ if [ "$CLITYPE " == "beeline" ]; then
163
+ COMMAND="beeline -u jdbc:hive2 ://${SERVER} : ${PORT} - i settings/ load- partitioned.sql - f ddl- tpcds/ bin_partitioned/ ${t} .sql \
164
+ --hivevar DB=${DATABASE} -- hivevar SOURCE= tpcds_text_${SCALE} \
103
165
--hivevar SCALE=${SCALE} \
104
- --hivevar REDUCERS=${REDUCERS} \
105
- --hivevar FILE=${FORMAT} "
106
- echo -e "${t} : \n\t@$COMMAND $SILENCE && echo 'Optimizing table $t ($i / $total ).'" >> $LOAD_FILE
107
- i=`expr $i + 1 `
166
+ --hivevar REDUCERS=${REDUCERS} \
167
+ --hivevar FILE=${FORMAT} "
168
+ else
169
+ COMMAND="hive -i settings/load-partitioned.sql -f ddl-tpcds/bin_partitioned/${t} .sql \
170
+ -d DB=tpcds_bin_partitioned_${FORMAT} _${SCALE} - d SOURCE= tpcds_text_${SCALE} \
171
+ -d SCALE=${SCALE} \
172
+ -d REDUCERS=${REDUCERS} \
173
+ -d FILE=${FORMAT} "
174
+ fi
175
+ echo -e "${t} : \n\t@$COMMAND $SILENCE && echo 'Optimizing table $t ($i / $total ).'" >> $LOAD_FILE
176
+ i=`expr $i + 1 `
108
177
done
109
178
110
179
for t in ${FACTS}
111
180
do
112
- COMMAND="$HIVE -i settings/load-partitioned.sql -f ddl-tpcds/bin_partitioned/${t} .sql \
113
- --hivevar DB=tpcds_bin_partitioned_${FORMAT} _${SCALE} \
181
+ if [ "$CLITYPE " == "beeline" ]; then
182
+ COMMAND="beeline -u jdbc:hive2 ://${SERVER} : ${PORT} - i settings/ load- partitioned.sql - f ddl- tpcds/ bin_partitioned/ ${t} .sql \
183
+ --hivevar DB=tpcds_bin_partitioned_${FORMAT} _${SCALE} \
114
184
--hivevar SCALE=${SCALE} \
115
- --hivevar SOURCE=tpcds_text_${SCALE} -- hivevar BUCKETS= ${BUCKETS} \
116
- --hivevar RETURN_BUCKETS=${RETURN_BUCKETS} -- hivevar REDUCERS= ${REDUCERS} -- hivevar FILE= ${FORMAT} "
117
- echo -e "${t} : \n\t@$COMMAND $SILENCE && echo 'Optimizing table $t ($i / $total ).'" >> $LOAD_FILE
118
- i=`expr $i + 1 `
185
+ --hivevar SOURCE=tpcds_text_${SCALE} -- hivevar BUCKETS= ${BUCKETS} \
186
+ --hivevar RETURN_BUCKETS=${RETURN_BUCKETS} -- hivevar REDUCERS= ${REDUCERS} -- hivevar FILE= ${FORMAT} "
187
+ else
188
+ COMMAND="hive -i settings/load-partitioned.sql -f ddl-tpcds/bin_partitioned/${t} .sql \
189
+ -d DB=tpcds_bin_partitioned_${FORMAT} _${SCALE} \
190
+ -d SCALE=${SCALE} \
191
+ -d SOURCE=tpcds_text_${SCALE} - d BUCKETS= ${BUCKETS} \
192
+ -d RETURN_BUCKETS=${RETURN_BUCKETS} - d REDUCERS= ${REDUCERS} - d FILE= ${FORMAT} "
193
+ fi
194
+ echo -e "${t} : \n\t@$COMMAND $SILENCE && echo 'Optimizing table $t ($i / $total ).'" >> $LOAD_FILE
195
+ i=`expr $i + 1 `
119
196
done
120
197
121
198
make -j 1 -f $LOAD_FILE
0 commit comments