SSB 基准测试
概述
SSB(Star Schema Benchmark)是由麻省大学的研究人员基于 TPC-H 基准测试做了大量修改实现的,主要用于对决策分析应用的性能进行基准测试。
SSB 数据集总共有 5 张表,其中包含:
- 1 张事实表:lineorder
- 4 张维度表:customer、supplier、part、date
表中的字段信息和表之间的关系见下图:
SSB 总共包含 4 组共 13 条查询用例。
SSB dbgen 工具使用
我们可以使用 dbgen 工具来生成 SSB 的数据集和查询集,github 仓库地址为 [electrum/ssb-dbgen: Star Schema Benchmark dbgen](electrum/ssb-dbgen: Star Schema Benchmark dbgen)。
生成数据集
首先需要安装 SSB 数据集和查询集生成工具 dbgen,,安装过程如下:
1# 安装相关依赖
2sudo yum install -y git gcc make
3# clone 仓库到本地
4git clone https://github.com/electrum/ssb-dbgen
5# 编译
6make machine=LINUX
按照如下方式生成数据集:
1# 生成 lineorder.tbl
2dbgen -s 1 -T l
3# 生成 customer.tbl
4dbgen -s 1 -T c
5# 生成 supplier.tbl
6dbgen -s 1 -T s
7# 生成 part.tbl
8dbgen -s 1 -T p
9# 生成 date.tbl
10dbgen -s 1 -T d
也可以一次性生成所有数据集:
1dbgen -s 1 -T a
其中 -s
参数表示 SF(Scale Factor),SF 越大,数据集规模越大。
查询用例
1-- PROMPT Q1.1
2SELECT
3 SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS REVENUE
4FROM
5 LINEORDER,
6 DATES
7WHERE
8 LO_ORDERDATE = D_DATEKEY
9 AND D_YEAR = 1993
10 AND LO_DISCOUNT BETWEEN 1
11 AND 3
12 AND LO_QUANTITY < 25;
13
14-- PROMPT Q1.2
15SELECT
16 SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS REVENUE
17FROM
18 LINEORDER,
19 DATES
20WHERE
21 LO_ORDERDATE = D_DATEKEY
22 AND D_YEARMONTH = 'Jan1994'
23 AND LO_DISCOUNT BETWEEN 4
24 AND 6
25 AND LO_QUANTITY BETWEEN 26
26 AND 35;
27
28-- PROMPT Q1.3
29SELECT
30 SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS REVENUE
31FROM
32 LINEORDER,
33 DATES
34WHERE
35 LO_ORDERDATE = D_DATEKEY
36 AND D_WEEKNUMINYEAR = 6
37 AND D_YEAR = 1994
38 AND LO_DISCOUNT BETWEEN 5
39 AND 7
40 AND LO_QUANTITY BETWEEN 26
41 AND 35;
42
43-- PROMPT Q2.1
44SELECT
45 SUM(LO_REVENUE),
46 D_YEAR,
47 P_BRAND
48FROM
49 LINEORDER,
50 DATES,
51 PART,
52 SUPPLIER
53WHERE
54 LO_ORDERDATE = D_DATEKEY
55 AND LO_PARTKEY = P_PARTKEY
56 AND LO_SUPPKEY = S_SUPPKEY
57 AND P_CATEGORY = 'MFGR#12'
58 AND S_REGION = 'AMERICA'
59GROUP BY
60 D_YEAR,
61 P_BRAND
62ORDER BY
63 D_YEAR,
64 P_BRAND;
65
66-- PROMPT Q2.2
67SELECT
68 SUM(LO_REVENUE),
69 D_YEAR,
70 P_BRAND
71FROM
72 LINEORDER,
73 DATES,
74 PART,
75 SUPPLIER
76WHERE
77 LO_ORDERDATE = D_DATEKEY
78 AND LO_PARTKEY = P_PARTKEY
79 AND LO_SUPPKEY = S_SUPPKEY
80 AND P_BRAND BETWEEN 'MFGR#2221'
81 AND 'MFGR#2228'
82 AND S_REGION = 'ASIA'
83GROUP BY
84 D_YEAR,
85 P_BRAND
86ORDER BY
87 D_YEAR,
88 P_BRAND;
89
90-- PROMPT Q2.3
91SELECT
92 SUM(LO_REVENUE),
93 D_YEAR,
94 P_BRAND
95FROM
96 LINEORDER,
97 DATES,
98 PART,
99 SUPPLIER
100WHERE
101 LO_ORDERDATE = D_DATEKEY
102 AND LO_PARTKEY = P_PARTKEY
103 AND LO_SUPPKEY = S_SUPPKEY
104 AND P_BRAND = 'MFGR#2239'
105 AND S_REGION = 'EUROPE'
106GROUP BY
107 D_YEAR,
108 P_BRAND
109ORDER BY
110 D_YEAR,
111 P_BRAND;
112
113-- PROMPT Q3.1
114SELECT
115 C_NATION,
116 S_NATION,
117 D_YEAR,
118 SUM(LO_REVENUE) AS REVENUE
119FROM
120 CUSTOMER,
121 LINEORDER,
122 SUPPLIER,
123 DATES
124WHERE
125 LO_CUSTKEY = C_CUSTKEY
126 AND LO_SUPPKEY = S_SUPPKEY
127 AND LO_ORDERDATE = D_DATEKEY
128 AND C_REGION = 'ASIA'
129 AND S_REGION = 'ASIA'
130 AND D_YEAR >= 1992
131 AND D_YEAR <= 1997
132GROUP BY
133 C_NATION,
134 S_NATION,
135 D_YEAR
136ORDER BY
137 D_YEAR ASC,
138 REVENUE DESC;
139
140-- PROMPT Q3.2
141SELECT
142 C_CITY,
143 S_CITY,
144 D_YEAR,
145 SUM(LO_REVENUE) AS REVENUE
146FROM
147 CUSTOMER,
148 LINEORDER,
149 SUPPLIER,
150 DATES
151WHERE
152 LO_CUSTKEY = C_CUSTKEY
153 AND LO_SUPPKEY = S_SUPPKEY
154 AND LO_ORDERDATE = D_DATEKEY
155 AND C_NATION = 'UNITED STATES'
156 AND S_NATION = 'UNITED STATES'
157 AND D_YEAR >= 1992
158 AND D_YEAR <= 1997
159GROUP BY
160 C_CITY,
161 S_CITY,
162 D_YEAR
163ORDER BY
164 D_YEAR ASC,
165 REVENUE DESC;
166
167-- PROMPT Q3.3
168SELECT
169 C_CITY,
170 S_CITY,
171 D_YEAR,
172 SUM(LO_REVENUE) AS REVENUE
173FROM
174 CUSTOMER,
175 LINEORDER,
176 SUPPLIER,
177 DATES
178WHERE
179 LO_CUSTKEY = C_CUSTKEY
180 AND LO_SUPPKEY = S_SUPPKEY
181 AND LO_ORDERDATE = D_DATEKEY
182 AND (
183 C_CITY = 'UNITED KI1'
184 OR C_CITY = 'UNITED KI5'
185 )
186 AND (
187 S_CITY = 'UNITED KI1'
188 OR S_CITY = 'UNITED KI5'
189 )
190 AND D_YEAR >= 1992
191 AND D_YEAR <= 1997
192GROUP BY
193 C_CITY,
194 S_CITY,
195 D_YEAR
196ORDER BY
197 D_YEAR ASC,
198 REVENUE DESC;
199
200-- PROMPT Q3.4
201SELECT
202 C_CITY,
203 S_CITY,
204 D_YEAR,
205 SUM(LO_REVENUE) AS REVENUE
206FROM
207 CUSTOMER,
208 LINEORDER,
209 SUPPLIER,
210 DATES
211WHERE
212 LO_CUSTKEY = C_CUSTKEY
213 AND LO_SUPPKEY = S_SUPPKEY
214 AND LO_ORDERDATE = D_DATEKEY
215 AND (
216 C_CITY = 'UNITED KI1'
217 OR C_CITY = 'UNITED KI5'
218 )
219 AND (
220 S_CITY = 'UNITED KI1'
221 OR S_CITY = 'UNITED KI5'
222 )
223 AND D_YEARMONTH = 'Dec1997'
224GROUP BY
225 C_CITY,
226 S_CITY,
227 D_YEAR
228ORDER BY
229 D_YEAR ASC,
230 REVENUE DESC;
231
232-- PROMPT Q4.1
233SELECT
234 D_YEAR,
235 C_NATION,
236 SUM(LO_REVENUE - LO_SUPPLYCOST) AS PROFIT
237FROM
238 DATES,
239 CUSTOMER,
240 SUPPLIER,
241 PART,
242 LINEORDER
243WHERE
244 LO_CUSTKEY = C_CUSTKEY
245 AND LO_SUPPKEY = S_SUPPKEY
246 AND LO_PARTKEY = P_PARTKEY
247 AND LO_ORDERDATE = D_DATEKEY
248 AND C_REGION = 'AMERICA'
249 AND S_REGION = 'AMERICA'
250 AND (
251 P_MFGR = 'MFGR#1'
252 OR P_MFGR = 'MFGR#2'
253 )
254GROUP BY
255 D_YEAR,
256 C_NATION
257ORDER BY
258 D_YEAR,
259 C_NATION;
260
261-- PROMPT Q4.2
262SELECT
263 D_YEAR,
264 S_NATION,
265 P_CATEGORY,
266 SUM(LO_REVENUE - LO_SUPPLYCOST) AS PROFIT
267FROM
268 DATES,
269 CUSTOMER,
270 SUPPLIER,
271 PART,
272 LINEORDER
273WHERE
274 LO_CUSTKEY = C_CUSTKEY
275 AND LO_SUPPKEY = S_SUPPKEY
276 AND LO_PARTKEY = P_PARTKEY
277 AND LO_ORDERDATE = D_DATEKEY
278 AND C_REGION = 'AMERICA'
279 AND S_REGION = 'AMERICA'
280 AND (
281 D_YEAR = 1997
282 OR D_YEAR = 1998
283 )
284 AND (
285 P_MFGR = 'MFGR#1'
286 OR P_MFGR = 'MFGR#2'
287 )
288GROUP BY
289 D_YEAR,
290 S_NATION,
291 P_CATEGORY
292ORDER BY
293 D_YEAR,
294 S_NATION,
295 P_CATEGORY;
296
297-- PROMPT Q4.3
298SELECT
299 D_YEAR,
300 S_CITY,
301 P_BRAND,
302 SUM(LO_REVENUE - LO_SUPPLYCOST) AS PROFIT
303FROM
304 DATES,
305 CUSTOMER,
306 SUPPLIER,
307 PART,
308 LINEORDER
309WHERE
310 LO_CUSTKEY = C_CUSTKEY
311 AND LO_SUPPKEY = S_SUPPKEY
312 AND LO_PARTKEY = P_PARTKEY
313 AND LO_ORDERDATE = D_DATEKEY
314 AND S_NATION = 'UNITED STATES'
315 AND (
316 D_YEAR = 1997
317 OR D_YEAR = 1998
318 )
319 AND P_CATEGORY = 'MFGR#14'
320GROUP BY
321 D_YEAR,
322 S_CITY,
323 P_BRAND
324ORDER BY
325 D_YEAR,
326 S_CITY,
327 P_BRAND;
基准测试
MySQL
使用如下 SQL 语句在 MySQL 中创建相关的数据库和表:
1CREATE DATABASE IF NOT EXISTS ssb;
2
3USE ssb;
4
5CREATE TABLE customer
6(
7 c_custkey INTEGER,
8 c_name VARCHAR(25) NOT NULL,
9 c_address VARCHAR(40) NOT NULL,
10 c_city VARCHAR(10) NOT NULL,
11 c_nation VARCHAR(15) NOT NULL,
12 c_region VARCHAR(12) NOT NULL,
13 c_phone VARCHAR(15) NOT NULL,
14 c_mktsegment VARCHAR(10) NOT NULL
15);
16
17CREATE TABLE dates
18(
19 d_datekey INTEGER,
20 d_date VARCHAR(18) NOT NULL,
21 d_dayofweek VARCHAR(18) NOT NULL,
22 d_month VARCHAR(9) NOT NULL,
23 d_year INTEGER NOT NULL,
24 d_yearmonthnum INTEGER,
25 d_yearmonth VARCHAR(7) NOT NULL,
26 d_daynuminweek INTEGER,
27 d_daynuminmonth INTEGER,
28 d_daynuminyear INTEGER,
29 d_monthnuminyear INTEGER,
30 d_weeknuminyear INTEGER,
31 d_sellingseason VARCHAR(12) NOT NULL,
32 d_lastdayinweekfl INTEGER,
33 d_lastdayinmonthfl INTEGER,
34 d_holidayfl INTEGER,
35 d_weekdayfl INTEGER
36);
37
38CREATE TABLE part
39(
40 p_partkey INTEGER,
41 p_name VARCHAR(22) NOT NULL,
42 p_mfgr VARCHAR(6) NOT NULL,
43 p_category VARCHAR(7) NOT NULL,
44 p_brand VARCHAR(9) NOT NULL,
45 p_color VARCHAR(11) NOT NULL,
46 p_type VARCHAR(25) NOT NULL,
47 p_size INTEGER NOT NULL,
48 p_container VARCHAR(10) NOT NULL
49);
50
51CREATE TABLE supplier
52(
53 s_suppkey INTEGER,
54 s_name VARCHAR(25) NOT NULL,
55 s_address VARCHAR(25) NOT NULL,
56 s_city VARCHAR(10) NOT NULL,
57 s_nation VARCHAR(15) NOT NULL,
58 s_region VARCHAR(12) NOT NULL,
59 s_phone VARCHAR(15) NOT NULL
60);
61
62CREATE TABLE lineorder
63(
64 lo_orderkey BIGINT,
65 lo_linenumber BIGINT,
66 lo_custkey INTEGER NOT NULL,
67 lo_partkey INTEGER NOT NULL,
68 lo_suppkey INTEGER NOT NULL,
69 lo_orderdate INTEGER NOT NULL,
70 lo_orderpriotity VARCHAR(15) NOT NULL,
71 lo_shippriotity INTEGER,
72 lo_quantity BIGINT,
73 lo_extendedprice BIGINT,
74 lo_ordtotalprice BIGINT,
75 lo_discount BIGINT,
76 lo_revenue BIGINT,
77 lo_supplycost BIGINT,
78 lo_tax BIGINT,
79 lo_commitdate INTEGER NOT NULL,
80 lo_shipmode VARCHAR(10) NOT NULL
81);
Presto
1CREATE TABLE IF NOT EXISTS CUSTOMER
2(
3 C_CUSTKEY INTEGER,
4 C_NAME VARCHAR(25),
5 C_ADDRESS VARCHAR(40),
6 C_CITY VARCHAR(10),
7 C_NATION VARCHAR(15),
8 C_REGION VARCHAR(12),
9 C_PHONE VARCHAR(15),
10 C_MKTSEGMENT VARCHAR(10)
11);
12
13CREATE TABLE DATES
14(
15 D_DATEKEY INTEGER,
16 D_DATE VARCHAR(18),
17 D_DAYOFWEEK VARCHAR(18),
18 D_MONTH VARCHAR(9),
19 D_YEAR INTEGER,
20 D_YEARMONTHNUM INTEGER,
21 D_YEARMONTH VARCHAR(7),
22 D_DAYNUMINWEEK INTEGER,
23 D_DAYNUMINMONTH INTEGER,
24 D_DAYNUMINYEAR INTEGER,
25 D_MONTHNUMINYEAR INTEGER,
26 D_WEEKNUMINYEAR INTEGER,
27 D_SELLINGSEASON VARCHAR(12),
28 D_LASTDAYINWEEKFL INTEGER,
29 D_LASTDAYINMONTHFL INTEGER,
30 D_HOLIDAYFL INTEGER,
31 D_WEEKDAYFL INTEGER
32);
33
34CREATE TABLE PART
35(
36 P_PARTKEY INTEGER,
37 P_NAME VARCHAR(22),
38 P_MFGR VARCHAR(6),
39 P_CATEGORY VARCHAR(7),
40 P_BRAND VARCHAR(9),
41 P_COLOR VARCHAR(11),
42 P_TYPE VARCHAR(25),
43 P_SIZE INTEGER,
44 P_CONTAINER VARCHAR(10)
45);
46
47CREATE TABLE SUPPLIER
48(
49 S_SUPPKEY INTEGER,
50 S_NAME VARCHAR(25),
51 S_ADDRESS VARCHAR(25),
52 S_CITY VARCHAR(10),
53 S_NATION VARCHAR(15),
54 S_REGION VARCHAR(12),
55 S_PHONE VARCHAR(15)
56);
57
58CREATE TABLE LINEORDER
59(
60 LO_ORDERKEY BIGINT,
61 LO_LINENUMBER BIGINT,
62 LO_CUSTKEY INTEGER,
63 LO_PARTKEY INTEGER,
64 LO_SUPPKEY INTEGER,
65 LO_ORDERDATE INTEGER,
66 LO_ORDERPRIOTITY VARCHAR(15),
67 LO_SHIPPRIOTITY INTEGER,
68 LO_QUANTITY BIGINT,
69 LO_EXTENDEDPRICE BIGINT,
70 LO_ORDTOTALPRICE BIGINT,
71 LO_DISCOUNT BIGINT,
72 LO_REVENUE BIGINT,
73 LO_SUPPLYCOST BIGINT,
74 LO_TAX BIGINT,
75 LO_COMMITDATE INTEGER,
76 LO_SHIPMODE VARCHAR(10)
77);
总结
参考资料
- P. O’Neil, E. O’Neil, X. Chen, and S. Revilak, “The star schema benchmark and augmented fact table indexing,” in Technology Conference on Performance Evaluation and Benchmarking, 2009, pp. 237–252.
- [electrum/ssb-dbgen: Star Schema Benchmark dbgen](electrum/ssb-dbgen: Star Schema Benchmark dbgen)