Skip to content

Commit 961d7e4

Browse files
author
achilds
committed
Beeline CLI enabled testbench setup.
1 parent 25b6e6b commit 961d7e4

File tree

2 files changed

+188
-38
lines changed

2 files changed

+188
-38
lines changed

tpcds-setup.sh

+95-18
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,56 @@
11
#!/bin/bash
22

33
function usage {
4-
echo "Usage: tpcds-setup.sh scale_factor [temp_directory]"
5-
exit 1
4+
5+
echo " Usage: tpcds-setup.sh [--cli --server --port --tempdir ] scale_factor"
6+
echo " This script will generate and optimize data for Hive server benchmark testing."
7+
echo " "
8+
echo -e " --cli\t\tCLI to use for Hive. Options are 'beeline' or 'hive'. Default is 'hive'."
9+
echo " "
10+
echo -e " --server\tOptional parameter when using beeline CLI. This is the server for the\n\t\tdatabase connection sring."
11+
echo " "
12+
echo -e " --port\tOptional parameter when using beeline CLI. This is the port that Hive is listening on."
13+
echo " "
14+
echo -e " --tempdir\tOptional parameter for data generation path."
15+
echo " "
16+
echo -e " scale_factor\tScale factor for data generation in GB."
17+
exit 1
618
}
719

20+
# Get options
21+
while test $# -gt 0; do
22+
case "$1" in
23+
-h|--help)
24+
usage
25+
exit 0
26+
;;
27+
--cli)
28+
shift
29+
CLITYPE="$1"
30+
shift
31+
;;
32+
--server)
33+
shift
34+
SERVER="$1"
35+
shift
36+
;;
37+
--port)
38+
shift
39+
PORT="$1"
40+
shift
41+
;;
42+
--tempdir)
43+
shift
44+
DIR="$1"
45+
shift
46+
;;
47+
*)
48+
SCALE="$1"
49+
shift
50+
;;
51+
esac
52+
done
53+
854
function runcommand {
955
if [ "X$DEBUG_SCRIPT" != "X" ]; then
1056
$1
@@ -17,6 +63,20 @@ if [ ! -f tpcds-gen/target/tpcds-gen-1.0-SNAPSHOT.jar ]; then
1763
echo "Please build the data generator with ./tpcds-build.sh first"
1864
exit 1
1965
fi
66+
67+
# if no CLI is supplied, default to hive
68+
if [ "X$CLITYPE" == "X" ]; then
69+
$CLITYPE="hive"
70+
fi
71+
72+
if [ "$CLITYPE" == "beeline" ]; then
73+
if [ "X$SERVER" == "X" ] || [ "X$PORT" == "X" ]; then
74+
echo "Server and port must be supplied if attempting to run beeline CLI"
75+
usage
76+
exit 1
77+
fi
78+
fi
79+
2080
which hive > /dev/null 2>&1
2181
if [ $? -ne 0 ]; then
2282
echo "Script must be run where Hive is installed"
@@ -27,9 +87,6 @@ fi
2787
DIMS="date_dim time_dim item customer customer_demographics household_demographics customer_address store promotion warehouse ship_mode reason income_band call_center web_page catalog_page web_site"
2888
FACTS="store_sales store_returns web_sales web_returns catalog_sales catalog_returns inventory"
2989

30-
# Get the parameters.
31-
SCALE=$1
32-
DIR=$2
3390
if [ "X$BUCKET_DATA" != "X" ]; then
3491
BUCKETS=13
3592
RETURN_BUCKETS=13
@@ -70,7 +127,11 @@ hadoop fs -chmod -R 777 /${DIR}/${SCALE}
70127

71128
echo "TPC-DS text data generation complete."
72129

73-
HIVE="beeline -n hive -u 'jdbc:hive2://localhost:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2?tez.queue.name=default' "
130+
if [ "$CLITYPE" == "beeline" ]; then
131+
HIVE="beeline -u jdbc:hive2://${SERVER}:${PORT} -i settings/load-flat.sql --hivevar DB=tpcds_text_${SCALE} --hivevar LOCATION=${DIR}/${SCALE} -f ddl-tpcds/text/alltables.sql"
132+
else
133+
HIVE="hive -i settings/load-flat.sql -f ddl-tpcds/text/alltables.sql -d DB=tpcds_text_${SCALE} -d LOCATION=${DIR}/${SCALE}"
134+
fi
74135

75136
# Create the text/flat tables as external tables. These will be later be converted to ORCFile.
76137
echo "Loading text data into external tables."
@@ -98,24 +159,40 @@ REDUCERS=$((test ${SCALE} -gt ${MAX_REDUCERS} && echo ${MAX_REDUCERS}) || echo $
98159
# Populate the smaller tables.
99160
for t in ${DIMS}
100161
do
101-
COMMAND="$HIVE -i settings/load-partitioned.sql -f ddl-tpcds/bin_partitioned/${t}.sql \
102-
--hivevar DB=tpcds_bin_partitioned_${FORMAT}_${SCALE} --hivevar SOURCE=tpcds_text_${SCALE} \
162+
if [ "$CLITYPE" == "beeline" ]; then
163+
COMMAND="beeline -u jdbc:hive2://${SERVER}:${PORT} -i settings/load-partitioned.sql -f ddl-tpcds/bin_partitioned/${t}.sql \
164+
--hivevar DB=${DATABASE} --hivevar SOURCE=tpcds_text_${SCALE} \
103165
--hivevar SCALE=${SCALE} \
104-
--hivevar REDUCERS=${REDUCERS} \
105-
--hivevar FILE=${FORMAT}"
106-
echo -e "${t}:\n\t@$COMMAND $SILENCE && echo 'Optimizing table $t ($i/$total).'" >> $LOAD_FILE
107-
i=`expr $i + 1`
166+
--hivevar REDUCERS=${REDUCERS} \
167+
--hivevar FILE=${FORMAT}"
168+
else
169+
COMMAND="hive -i settings/load-partitioned.sql -f ddl-tpcds/bin_partitioned/${t}.sql \
170+
-d DB=tpcds_bin_partitioned_${FORMAT}_${SCALE} -d SOURCE=tpcds_text_${SCALE} \
171+
-d SCALE=${SCALE} \
172+
-d REDUCERS=${REDUCERS} \
173+
-d FILE=${FORMAT}"
174+
fi
175+
echo -e "${t}:\n\t@$COMMAND $SILENCE && echo 'Optimizing table $t ($i/$total).'" >> $LOAD_FILE
176+
i=`expr $i + 1`
108177
done
109178
110179
for t in ${FACTS}
111180
do
112-
COMMAND="$HIVE -i settings/load-partitioned.sql -f ddl-tpcds/bin_partitioned/${t}.sql \
113-
--hivevar DB=tpcds_bin_partitioned_${FORMAT}_${SCALE} \
181+
if [ "$CLITYPE" == "beeline" ]; then
182+
COMMAND="beeline -u jdbc:hive2://${SERVER}:${PORT} -i settings/load-partitioned.sql -f ddl-tpcds/bin_partitioned/${t}.sql \
183+
--hivevar DB=tpcds_bin_partitioned_${FORMAT}_${SCALE} \
114184
--hivevar SCALE=${SCALE} \
115-
--hivevar SOURCE=tpcds_text_${SCALE} --hivevar BUCKETS=${BUCKETS} \
116-
--hivevar RETURN_BUCKETS=${RETURN_BUCKETS} --hivevar REDUCERS=${REDUCERS} --hivevar FILE=${FORMAT}"
117-
echo -e "${t}:\n\t@$COMMAND $SILENCE && echo 'Optimizing table $t ($i/$total).'" >> $LOAD_FILE
118-
i=`expr $i + 1`
185+
--hivevar SOURCE=tpcds_text_${SCALE} --hivevar BUCKETS=${BUCKETS} \
186+
--hivevar RETURN_BUCKETS=${RETURN_BUCKETS} --hivevar REDUCERS=${REDUCERS} --hivevar FILE=${FORMAT}"
187+
else
188+
COMMAND="hive -i settings/load-partitioned.sql -f ddl-tpcds/bin_partitioned/${t}.sql \
189+
-d DB=tpcds_bin_partitioned_${FORMAT}_${SCALE} \
190+
-d SCALE=${SCALE} \
191+
-d SOURCE=tpcds_text_${SCALE} -d BUCKETS=${BUCKETS} \
192+
-d RETURN_BUCKETS=${RETURN_BUCKETS} -d REDUCERS=${REDUCERS} -d FILE=${FORMAT}"
193+
fi
194+
echo -e "${t}:\n\t@$COMMAND $SILENCE && echo 'Optimizing table $t ($i/$total).'" >> $LOAD_FILE
195+
i=`expr $i + 1`
119196
done
120197
121198
make -j 1 -f $LOAD_FILE

tpch-setup.sh

+93-20
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,57 @@
11
#!/bin/bash
22

33
function usage {
4-
echo "Usage: tpch-setup.sh scale_factor [temp_directory]"
5-
exit 1
4+
5+
echo " Usage: tpch-setup.sh [--cli --server --port --tempdir ] scale_factor"
6+
echo " This script will generate and optimize data for Hive server benchmark testing."
7+
echo " "
8+
echo -e " --cli\t\tCLI to use for Hive. Options are 'beeline' or 'hive'. Default is 'hive'."
9+
echo " "
10+
echo -e " --server\tOptional parameter when using beeline CLI. This is the server for the\n\t\tdatabase connection sring."
11+
echo " "
12+
echo -e " --port\tOptional parameter when using beeline CLI. This is the port that Hive is listening on."
13+
echo " "
14+
echo -e " --tempdir\tOptional parameter for data generation path."
15+
echo " "
16+
echo -e " scale_factor\tScale factor for data generation in GB."
17+
exit 1
18+
619
}
720

21+
# Get options
22+
while test $# -gt 0; do
23+
case "$1" in
24+
-h|--help)
25+
usage
26+
exit 0
27+
;;
28+
--cli)
29+
shift
30+
CLITYPE="$1"
31+
shift
32+
;;
33+
--server)
34+
shift
35+
SERVER="$1"
36+
shift
37+
;;
38+
--port)
39+
shift
40+
PORT="$1"
41+
shift
42+
;;
43+
--tempdir)
44+
shift
45+
DIR="$1"
46+
shift
47+
;;
48+
*)
49+
SCALE="$1"
50+
shift
51+
;;
52+
esac
53+
done
54+
855
function runcommand {
956
if [ "X$DEBUG_SCRIPT" != "X" ]; then
1057
$1
@@ -17,6 +64,20 @@ if [ ! -f tpch-gen/target/tpch-gen-1.0-SNAPSHOT.jar ]; then
1764
echo "Please build the data generator with ./tpch-build.sh first"
1865
exit 1
1966
fi
67+
68+
# if no CLI is supplied, default to hive
69+
if [ "X$CLITYPE" == "X" ]; then
70+
$CLITYPE="hive"
71+
fi
72+
73+
if [ "$CLITYPE" == "beeline" ]; then
74+
if [ "X$SERVER" == "X" ] || [ "X$PORT" == "X" ]; then
75+
echo "Server and port must be supplied if attempting to run beeline CLI"
76+
usage
77+
exit 1
78+
fi
79+
fi
80+
2081
which hive > /dev/null 2>&1
2182
if [ $? -ne 0 ]; then
2283
echo "Script must be run where Hive is installed"
@@ -25,10 +86,6 @@ fi
2586

2687
# Tables in the TPC-H schema.
2788
TABLES="part partsupp supplier customer orders lineitem nation region"
28-
29-
# Get the parameters.
30-
SCALE=$1
31-
DIR=$2
3289
BUCKETS=13
3390
if [ "X$DEBUG_SCRIPT" != "X" ]; then
3491
set -x
@@ -62,8 +119,12 @@ echo "TPC-H text data generation complete."
62119

63120
# Create the text/flat tables as external tables. These will be later be converted to ORCFile.
64121
echo "Loading text data into external tables."
65-
runcommand "hive -i settings/load-flat.sql -f ddl-tpch/bin_flat/alltables.sql -d DB=tpch_text_${SCALE} -d LOCATION=${DIR}/${SCALE}"
66122

123+
if [ "$CLITYPE" == "beeline" ]; then
124+
runcommand "beeline -u jdbc:hive2://${SERVER}:${PORT} -i settings/load-flat.sql --silent=true --hivevar DB=tpch_text_${SCALE} --hivevar LOCATION=${DIR}/${SCALE} -f ddl-tpch/bin_flat/alltables.sql"
125+
else
126+
runcommand "hive -i settings/load-flat.sql -f ddl-tpch/bin_flat/alltables.sql -d DB=tpch_text_${SCALE} -d LOCATION=${DIR}/${SCALE}"
127+
fi
67128
# Create the optimized tables.
68129
i=1
69130
total=8
@@ -80,20 +141,32 @@ REDUCERS=$((test ${SCALE} -gt ${MAX_REDUCERS} && echo ${MAX_REDUCERS}) || echo $
80141
81142
for t in ${TABLES}
82143
do
83-
echo "Optimizing table $t ($i/$total)."
84-
COMMAND="hive -i settings/load-${SCHEMA_TYPE}.sql -f ddl-tpch/bin_${SCHEMA_TYPE}/${t}.sql \
85-
-d DB=${DATABASE} \
86-
-d SOURCE=tpch_text_${SCALE} -d BUCKETS=${BUCKETS} \
87-
-d SCALE=${SCALE} -d REDUCERS=${REDUCERS} \
88-
-d FILE=orc"
89-
runcommand "$COMMAND"
90-
if [ $? -ne 0 ]; then
91-
echo "Command failed, try 'export DEBUG_SCRIPT=ON' and re-running"
92-
exit 1
93-
fi
94-
i=`expr $i + 1`
144+
echo "Optimizing table $t ($i/$total)."
145+
if [ "$CLITYPE" == "beeline" ]; then
146+
COMMAND="beeline -u jdbc:hive2://${SERVER}:${PORT} -i settings/load-${SCHEMA_TYPE}.sql -f ddl-tpch/bin_${SCHEMA_TYPE}/${t}.sql \
147+
--silent=true --hivevar DB=${DATABASE} \
148+
--hivevar SOURCE=tpch_text_${SCALE} --hivevar BUCKETS=${BUCKETS} \
149+
--hivevar SCALE=${SCALE} --hivevar REDUCERS=${REDUCERS} \
150+
--hivevar FILE=orc"
151+
else
152+
COMMAND="hive -i settings/load-${SCHEMA_TYPE}.sql -f ddl-tpch/bin_${SCHEMA_TYPE}/${t}.sql \
153+
-d DB=${DATABASE} \
154+
-d SOURCE=tpch_text_${SCALE} -d BUCKETS=${BUCKETS} \
155+
-d SCALE=${SCALE} -d REDUCERS=${REDUCERS} \
156+
-d FILE=orc"
157+
fi
158+
runcommand "$COMMAND"
159+
if [ $? -ne 0 ]; then
160+
echo "Command failed, try 'export DEBUG_SCRIPT=ON' and re-running"
161+
exit 1
162+
fi
163+
i=`expr $i + 1`
95164
done
96165
97-
hive -i settings/load-${SCHEMA_TYPE}.sql -f ddl-tpch/bin_${SCHEMA_TYPE}/analyze.sql --database ${DATABASE};
166+
if [ "$CLITYPE" == "beeline" ]; then
167+
beeline -u jdbc:hive2://${SERVER}:${PORT}/${DATABASE} -i settings/load-${SCHEMA_TYPE}.sql -f ddl-tpch/bin_${SCHEMA_TYPE}/analyze.sql;
168+
else
169+
hive -i settings/load-${SCHEMA_TYPE}.sql -f ddl-tpch/bin_${SCHEMA_TYPE}/analyze.sql --database ${DATABASE};
170+
fi
98171
99172
echo "Data loaded into database ${DATABASE}."

0 commit comments

Comments
 (0)