SSB 基准测试

概述

SSB(Star Schema Benchmark)是由麻省大学的研究人员基于 TPC-H 基准测试做了大量修改实现的,主要用于对决策分析应用的性能进行基准测试。

SSB 数据集总共有 5 张表,其中包含:

  • 1 张事实表:lineorder
  • 4 张维度表:customer、supplier、part、date

表中的字段信息和表之间的关系见下图:

StarSchemaBenchmark

SSB 总共包含 4 组共 13 条查询用例。

SSB dbgen 工具使用

我们可以使用 dbgen 工具来生成 SSB 的数据集和查询集,github 仓库地址为 [electrum/ssb-dbgen: Star Schema Benchmark dbgen](electrum/ssb-dbgen: Star Schema Benchmark dbgen)。

生成数据集

首先需要安装 SSB 数据集和查询集生成工具 dbgen,,安装过程如下:

1# 安装相关依赖
2sudo yum install -y git gcc make
3# clone 仓库到本地
4git clone https://github.com/electrum/ssb-dbgen
5# 编译
6make machine=LINUX

按照如下方式生成数据集:

 1# 生成 lineorder.tbl
 2dbgen -s 1 -T l
 3# 生成 customer.tbl
 4dbgen -s 1 -T c
 5# 生成 supplier.tbl
 6dbgen -s 1 -T s
 7# 生成 part.tbl
 8dbgen -s 1 -T p
 9# 生成 date.tbl
10dbgen -s 1 -T d

也可以一次性生成所有数据集:

1dbgen -s 1 -T a

其中 -s 参数表示 SF(Scale Factor),SF 越大,数据集规模越大。

查询用例

  1-- PROMPT Q1.1
  2SELECT
  3    SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS REVENUE
  4FROM
  5    LINEORDER,
  6    DATES
  7WHERE
  8    LO_ORDERDATE = D_DATEKEY
  9    AND D_YEAR = 1993
 10    AND LO_DISCOUNT BETWEEN 1
 11    AND 3
 12    AND LO_QUANTITY < 25;
 13
 14-- PROMPT Q1.2
 15SELECT
 16    SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS REVENUE
 17FROM
 18    LINEORDER,
 19    DATES
 20WHERE
 21    LO_ORDERDATE = D_DATEKEY
 22    AND D_YEARMONTH = 'Jan1994'
 23    AND LO_DISCOUNT BETWEEN 4
 24    AND 6
 25    AND LO_QUANTITY BETWEEN 26
 26    AND 35;
 27
 28-- PROMPT Q1.3
 29SELECT
 30    SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS REVENUE
 31FROM
 32    LINEORDER,
 33    DATES
 34WHERE
 35    LO_ORDERDATE = D_DATEKEY
 36    AND D_WEEKNUMINYEAR = 6
 37    AND D_YEAR = 1994
 38    AND LO_DISCOUNT BETWEEN 5
 39    AND 7
 40    AND LO_QUANTITY BETWEEN 26
 41    AND 35;
 42
 43-- PROMPT Q2.1
 44SELECT
 45    SUM(LO_REVENUE),
 46    D_YEAR,
 47    P_BRAND
 48FROM
 49    LINEORDER,
 50    DATES,
 51    PART,
 52    SUPPLIER
 53WHERE
 54    LO_ORDERDATE = D_DATEKEY
 55    AND LO_PARTKEY = P_PARTKEY
 56    AND LO_SUPPKEY = S_SUPPKEY
 57    AND P_CATEGORY = 'MFGR#12'
 58    AND S_REGION = 'AMERICA'
 59GROUP BY
 60    D_YEAR,
 61    P_BRAND
 62ORDER BY
 63    D_YEAR,
 64    P_BRAND;
 65
 66-- PROMPT Q2.2
 67SELECT
 68    SUM(LO_REVENUE),
 69    D_YEAR,
 70    P_BRAND
 71FROM
 72    LINEORDER,
 73    DATES,
 74    PART,
 75    SUPPLIER
 76WHERE
 77    LO_ORDERDATE = D_DATEKEY
 78    AND LO_PARTKEY = P_PARTKEY
 79    AND LO_SUPPKEY = S_SUPPKEY
 80    AND P_BRAND BETWEEN 'MFGR#2221'
 81    AND 'MFGR#2228'
 82    AND S_REGION = 'ASIA'
 83GROUP BY
 84    D_YEAR,
 85    P_BRAND
 86ORDER BY
 87    D_YEAR,
 88    P_BRAND;
 89
 90-- PROMPT Q2.3
 91SELECT
 92    SUM(LO_REVENUE),
 93    D_YEAR,
 94    P_BRAND
 95FROM
 96    LINEORDER,
 97    DATES,
 98    PART,
 99    SUPPLIER
100WHERE
101    LO_ORDERDATE = D_DATEKEY
102    AND LO_PARTKEY = P_PARTKEY
103    AND LO_SUPPKEY = S_SUPPKEY
104    AND P_BRAND = 'MFGR#2239'
105    AND S_REGION = 'EUROPE'
106GROUP BY
107    D_YEAR,
108    P_BRAND
109ORDER BY
110    D_YEAR,
111    P_BRAND;
112
113-- PROMPT Q3.1
114SELECT
115    C_NATION,
116    S_NATION,
117    D_YEAR,
118    SUM(LO_REVENUE) AS REVENUE
119FROM
120    CUSTOMER,
121    LINEORDER,
122    SUPPLIER,
123    DATES
124WHERE
125    LO_CUSTKEY = C_CUSTKEY
126    AND LO_SUPPKEY = S_SUPPKEY
127    AND LO_ORDERDATE = D_DATEKEY
128    AND C_REGION = 'ASIA'
129    AND S_REGION = 'ASIA'
130    AND D_YEAR >= 1992
131    AND D_YEAR <= 1997
132GROUP BY
133    C_NATION,
134    S_NATION,
135    D_YEAR
136ORDER BY
137    D_YEAR ASC,
138    REVENUE DESC;
139
140-- PROMPT Q3.2
141SELECT
142    C_CITY,
143    S_CITY,
144    D_YEAR,
145    SUM(LO_REVENUE) AS REVENUE
146FROM
147    CUSTOMER,
148    LINEORDER,
149    SUPPLIER,
150    DATES
151WHERE
152    LO_CUSTKEY = C_CUSTKEY
153    AND LO_SUPPKEY = S_SUPPKEY
154    AND LO_ORDERDATE = D_DATEKEY
155    AND C_NATION = 'UNITED STATES'
156    AND S_NATION = 'UNITED STATES'
157    AND D_YEAR >= 1992
158    AND D_YEAR <= 1997
159GROUP BY
160    C_CITY,
161    S_CITY,
162    D_YEAR
163ORDER BY
164    D_YEAR ASC,
165    REVENUE DESC;
166
167-- PROMPT Q3.3
168SELECT
169    C_CITY,
170    S_CITY,
171    D_YEAR,
172    SUM(LO_REVENUE) AS REVENUE
173FROM
174    CUSTOMER,
175    LINEORDER,
176    SUPPLIER,
177    DATES
178WHERE
179    LO_CUSTKEY = C_CUSTKEY
180    AND LO_SUPPKEY = S_SUPPKEY
181    AND LO_ORDERDATE = D_DATEKEY
182    AND (
183        C_CITY = 'UNITED KI1'
184        OR C_CITY = 'UNITED KI5'
185    )
186    AND (
187        S_CITY = 'UNITED KI1'
188        OR S_CITY = 'UNITED KI5'
189    )
190    AND D_YEAR >= 1992
191    AND D_YEAR <= 1997
192GROUP BY
193    C_CITY,
194    S_CITY,
195    D_YEAR
196ORDER BY
197    D_YEAR ASC,
198    REVENUE DESC;
199
200-- PROMPT Q3.4
201SELECT
202    C_CITY,
203    S_CITY,
204    D_YEAR,
205    SUM(LO_REVENUE) AS REVENUE
206FROM
207    CUSTOMER,
208    LINEORDER,
209    SUPPLIER,
210    DATES
211WHERE
212    LO_CUSTKEY = C_CUSTKEY
213    AND LO_SUPPKEY = S_SUPPKEY
214    AND LO_ORDERDATE = D_DATEKEY
215    AND (
216        C_CITY = 'UNITED KI1'
217        OR C_CITY = 'UNITED KI5'
218    )
219    AND (
220        S_CITY = 'UNITED KI1'
221        OR S_CITY = 'UNITED KI5'
222    )
223    AND D_YEARMONTH = 'Dec1997'
224GROUP BY
225    C_CITY,
226    S_CITY,
227    D_YEAR
228ORDER BY
229    D_YEAR ASC,
230    REVENUE DESC;
231
232-- PROMPT Q4.1
233SELECT
234    D_YEAR,
235    C_NATION,
236    SUM(LO_REVENUE - LO_SUPPLYCOST) AS PROFIT
237FROM
238    DATES,
239    CUSTOMER,
240    SUPPLIER,
241    PART,
242    LINEORDER
243WHERE
244    LO_CUSTKEY = C_CUSTKEY
245    AND LO_SUPPKEY = S_SUPPKEY
246    AND LO_PARTKEY = P_PARTKEY
247    AND LO_ORDERDATE = D_DATEKEY
248    AND C_REGION = 'AMERICA'
249    AND S_REGION = 'AMERICA'
250    AND (
251        P_MFGR = 'MFGR#1'
252        OR P_MFGR = 'MFGR#2'
253    )
254GROUP BY
255    D_YEAR,
256    C_NATION
257ORDER BY
258    D_YEAR,
259    C_NATION;
260
261-- PROMPT Q4.2
262SELECT
263    D_YEAR,
264    S_NATION,
265    P_CATEGORY,
266    SUM(LO_REVENUE - LO_SUPPLYCOST) AS PROFIT
267FROM
268    DATES,
269    CUSTOMER,
270    SUPPLIER,
271    PART,
272    LINEORDER
273WHERE
274    LO_CUSTKEY = C_CUSTKEY
275    AND LO_SUPPKEY = S_SUPPKEY
276    AND LO_PARTKEY = P_PARTKEY
277    AND LO_ORDERDATE = D_DATEKEY
278    AND C_REGION = 'AMERICA'
279    AND S_REGION = 'AMERICA'
280    AND (
281        D_YEAR = 1997
282        OR D_YEAR = 1998
283    )
284    AND (
285        P_MFGR = 'MFGR#1'
286        OR P_MFGR = 'MFGR#2'
287    )
288GROUP BY
289    D_YEAR,
290    S_NATION,
291    P_CATEGORY
292ORDER BY
293    D_YEAR,
294    S_NATION,
295    P_CATEGORY;
296
297-- PROMPT Q4.3
298SELECT
299    D_YEAR,
300    S_CITY,
301    P_BRAND,
302    SUM(LO_REVENUE - LO_SUPPLYCOST) AS PROFIT
303FROM
304    DATES,
305    CUSTOMER,
306    SUPPLIER,
307    PART,
308    LINEORDER
309WHERE
310    LO_CUSTKEY = C_CUSTKEY
311    AND LO_SUPPKEY = S_SUPPKEY
312    AND LO_PARTKEY = P_PARTKEY
313    AND LO_ORDERDATE = D_DATEKEY
314    AND S_NATION = 'UNITED STATES'
315    AND (
316        D_YEAR = 1997
317        OR D_YEAR = 1998
318    )
319    AND P_CATEGORY = 'MFGR#14'
320GROUP BY
321    D_YEAR,
322    S_CITY,
323    P_BRAND
324ORDER BY
325    D_YEAR,
326    S_CITY,
327    P_BRAND;

基准测试

MySQL

使用如下 SQL 语句在 MySQL 中创建相关的数据库和表:

 1CREATE DATABASE IF NOT EXISTS ssb;
 2
 3USE ssb;
 4
 5CREATE TABLE customer
 6(
 7    c_custkey    INTEGER,
 8    c_name       VARCHAR(25) NOT NULL,
 9    c_address    VARCHAR(40) NOT NULL,
10    c_city       VARCHAR(10) NOT NULL,
11    c_nation     VARCHAR(15) NOT NULL,
12    c_region     VARCHAR(12) NOT NULL,
13    c_phone      VARCHAR(15) NOT NULL,
14    c_mktsegment VARCHAR(10) NOT NULL
15);
16
17CREATE TABLE dates
18(
19    d_datekey          INTEGER,
20    d_date             VARCHAR(18) NOT NULL,
21    d_dayofweek        VARCHAR(18) NOT NULL,
22    d_month            VARCHAR(9)  NOT NULL,
23    d_year             INTEGER     NOT NULL,
24    d_yearmonthnum     INTEGER,
25    d_yearmonth        VARCHAR(7)  NOT NULL,
26    d_daynuminweek     INTEGER,
27    d_daynuminmonth    INTEGER,
28    d_daynuminyear     INTEGER,
29    d_monthnuminyear   INTEGER,
30    d_weeknuminyear    INTEGER,
31    d_sellingseason    VARCHAR(12) NOT NULL,
32    d_lastdayinweekfl  INTEGER,
33    d_lastdayinmonthfl INTEGER,
34    d_holidayfl        INTEGER,
35    d_weekdayfl        INTEGER
36);
37
38CREATE TABLE part
39(
40    p_partkey   INTEGER,
41    p_name      VARCHAR(22) NOT NULL,
42    p_mfgr      VARCHAR(6)  NOT NULL,
43    p_category  VARCHAR(7)  NOT NULL,
44    p_brand     VARCHAR(9)  NOT NULL,
45    p_color     VARCHAR(11) NOT NULL,
46    p_type      VARCHAR(25) NOT NULL,
47    p_size      INTEGER     NOT NULL,
48    p_container VARCHAR(10) NOT NULL
49);
50
51CREATE TABLE supplier
52(
53    s_suppkey INTEGER,
54    s_name    VARCHAR(25) NOT NULL,
55    s_address VARCHAR(25) NOT NULL,
56    s_city    VARCHAR(10) NOT NULL,
57    s_nation  VARCHAR(15) NOT NULL,
58    s_region  VARCHAR(12) NOT NULL,
59    s_phone   VARCHAR(15) NOT NULL
60);
61
62CREATE TABLE lineorder
63(
64    lo_orderkey      BIGINT,
65    lo_linenumber    BIGINT,
66    lo_custkey       INTEGER     NOT NULL,
67    lo_partkey       INTEGER     NOT NULL,
68    lo_suppkey       INTEGER     NOT NULL,
69    lo_orderdate     INTEGER     NOT NULL,
70    lo_orderpriotity VARCHAR(15) NOT NULL,
71    lo_shippriotity  INTEGER,
72    lo_quantity      BIGINT,
73    lo_extendedprice BIGINT,
74    lo_ordtotalprice BIGINT,
75    lo_discount      BIGINT,
76    lo_revenue       BIGINT,
77    lo_supplycost    BIGINT,
78    lo_tax           BIGINT,
79    lo_commitdate    INTEGER     NOT NULL,
80    lo_shipmode      VARCHAR(10) NOT NULL
81);

Presto

 1CREATE TABLE IF NOT EXISTS CUSTOMER 
 2( 
 3    C_CUSTKEY     INTEGER,
 4    C_NAME        VARCHAR(25),
 5    C_ADDRESS     VARCHAR(40),
 6    C_CITY        VARCHAR(10),
 7    C_NATION      VARCHAR(15),
 8    C_REGION      VARCHAR(12),
 9    C_PHONE       VARCHAR(15),
10    C_MKTSEGMENT  VARCHAR(10)
11);
12
13CREATE TABLE DATES 
14( 
15    D_DATEKEY          INTEGER,
16    D_DATE             VARCHAR(18),
17    D_DAYOFWEEK        VARCHAR(18),
18    D_MONTH            VARCHAR(9),
19    D_YEAR             INTEGER,
20    D_YEARMONTHNUM     INTEGER,
21    D_YEARMONTH        VARCHAR(7),
22    D_DAYNUMINWEEK     INTEGER,
23    D_DAYNUMINMONTH    INTEGER,
24    D_DAYNUMINYEAR     INTEGER,
25    D_MONTHNUMINYEAR   INTEGER,
26    D_WEEKNUMINYEAR    INTEGER,
27    D_SELLINGSEASON    VARCHAR(12),
28    D_LASTDAYINWEEKFL  INTEGER,
29    D_LASTDAYINMONTHFL INTEGER,
30    D_HOLIDAYFL        INTEGER,
31    D_WEEKDAYFL        INTEGER
32);
33                         
34CREATE TABLE PART  
35( 
36    P_PARTKEY     INTEGER,
37    P_NAME        VARCHAR(22),
38    P_MFGR        VARCHAR(6),
39    P_CATEGORY    VARCHAR(7),
40    P_BRAND       VARCHAR(9),
41    P_COLOR       VARCHAR(11),
42    P_TYPE        VARCHAR(25),
43    P_SIZE        INTEGER,
44    P_CONTAINER   VARCHAR(10)
45);
46
47CREATE TABLE SUPPLIER 
48( 
49    S_SUPPKEY     INTEGER,
50    S_NAME        VARCHAR(25),
51    S_ADDRESS     VARCHAR(25),
52    S_CITY        VARCHAR(10),
53    S_NATION      VARCHAR(15),
54    S_REGION      VARCHAR(12),
55    S_PHONE       VARCHAR(15)
56);
57
58CREATE TABLE LINEORDER 
59( 
60    LO_ORDERKEY       BIGINT,
61    LO_LINENUMBER     BIGINT,
62    LO_CUSTKEY        INTEGER,
63    LO_PARTKEY        INTEGER,
64    LO_SUPPKEY        INTEGER,
65    LO_ORDERDATE      INTEGER,
66    LO_ORDERPRIOTITY  VARCHAR(15),
67    LO_SHIPPRIOTITY   INTEGER,
68    LO_QUANTITY       BIGINT,
69    LO_EXTENDEDPRICE  BIGINT,
70    LO_ORDTOTALPRICE  BIGINT,
71    LO_DISCOUNT       BIGINT,
72    LO_REVENUE        BIGINT,
73    LO_SUPPLYCOST     BIGINT,
74    LO_TAX            BIGINT,
75    LO_COMMITDATE     INTEGER,
76    LO_SHIPMODE       VARCHAR(10)
77);

总结

参考资料

  1. P. O’Neil, E. O’Neil, X. Chen, and S. Revilak, “The star schema benchmark and augmented fact table indexing,” in Technology Conference on Performance Evaluation and Benchmarking, 2009, pp. 237–252.
  2. [electrum/ssb-dbgen: Star Schema Benchmark dbgen](electrum/ssb-dbgen: Star Schema Benchmark dbgen)
下一页
上一页