本次测试针对开源自建的Presto与阿里云云原生数据湖分析DLA Presto在OSS数据源上执行TPC-H查询的性能做了对比分析。您可以按照本文介绍自行测试对比,快速了解云原生数据湖分析(DLA)Presto引擎的性能。
- 准备TPC-H测试数据。关于如何生成TPC-H测试数据,具体请参考TPC-H官方文档。
TPC-H测试数据生成后,需要分别转换成Parquet和ORC格式,上传到OSS。
- 分别在开源Presto和DLA Presto中建表。示例如下:
对于开源Presto集群,可以在presto中建表(需要修改配置,默认不支持),也可以在hive中建表。以下是hive的建表语句。
CREATE SCHEMA if not exists tpch_1000x_orc location 'oss://path/to/tpch/tpch_1000x_orc/' ; CREATE EXTERNAL TABLE tpch_1000x_orc.lineitem ( l_orderkey bigint, l_partkey bigint, l_suppkey bigint, l_linenumber int, l_quantity double, l_extendedprice double, l_discount double, l_tax double, l_returnflag string, l_linestatus string, l_shipdate string, l_commitdate string, l_receiptdate string, l_shipinstruct string, l_shipmode string, l_comment string ) STORED AS ORC LOCATION 'oss://path/to/tpch/tpch_1000x_orc/lineitem_orc/' ; CREATE EXTERNAL TABLE tpch_1000x_orc.customer ( c_custkey bigint, c_name string, c_address string, c_nationkey int, c_phone string, c_acctbal double, c_mktsegment string, c_comment string ) STORED AS ORC LOCATION 'oss://path/to/tpch/tpch_1000x_orc/customer_orc/' ; CREATE EXTERNAL TABLE tpch_1000x_orc.nation ( n_nationkey int, n_name string, n_regionkey int, n_comment string ) STORED AS ORC LOCATION 'oss://path/to/tpch/tpch_1000x_orc/nation_orc/' ; CREATE EXTERNAL TABLE tpch_1000x_orc.orders ( o_orderkey bigint, o_custkey bigint, o_orderstatus string, o_totalprice double, o_orderdate string, o_orderpriority string, o_clerk string, o_shippriority int, o_comment string ) STORED AS ORC LOCATION 'oss://path/to/tpch/tpch_1000x_orc/orders_orc/' ; CREATE EXTERNAL TABLE tpch_1000x_orc.part ( p_partkey bigint, p_name string, p_mfgr string, p_brand string, p_type string, p_size int, p_container string, p_retailprice double, p_comment string ) STORED AS ORC LOCATION 'oss://path/to/tpch/tpch_1000x_orc/part_orc/' ; CREATE EXTERNAL TABLE tpch_1000x_orc.partsupp ( ps_partkey bigint, ps_suppkey bigint, ps_availqty int, ps_supplycost double, ps_comment string ) STORED AS ORC LOCATION 'oss://path/to/tpch/tpch_1000x_orc/partsupp_orc/' ; CREATE EXTERNAL TABLE tpch_1000x_orc.region ( r_regionkey int, r_name string, r_comment string ) STORED AS ORC LOCATION 'oss://path/to/tpch/tpch_1000x_orc/region_orc/' ; CREATE EXTERNAL TABLE tpch_1000x_orc.supplier ( s_suppkey bigint, s_name string, s_address string, s_nationkey int, s_phone string, s_acctbal double, s_comment string ) STORED AS ORC LOCATION 'oss://path/to/tpch/tpch_1000x_orc/supplier_orc/' ; CREATE SCHEMA if not exists tpch_1000x_parquet location 'oss://path/to/tpch/tpch_1000x_parquet/' ; CREATE EXTERNAL TABLE tpch_1000x_parquet.lineitem ( l_orderkey bigint, l_partkey bigint, l_suppkey bigint, l_linenumber int, l_quantity double, l_extendedprice double, l_discount double, l_tax double, l_returnflag string, l_linestatus string, l_shipdate string, l_commitdate string, l_receiptdate string, l_shipinstruct string, l_shipmode string, l_comment string ) STORED AS PARQUET LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/lineitem_parquet/' ; CREATE EXTERNAL TABLE tpch_1000x_parquet.customer ( c_custkey bigint, c_name string, c_address string, c_nationkey int, c_phone string, c_acctbal double, c_mktsegment string, c_comment string ) STORED AS PARQUET LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/customer_parquet/' ; CREATE EXTERNAL TABLE tpch_1000x_parquet.nation ( n_nationkey int, n_name string, n_regionkey int, n_comment string ) STORED AS PARQUET LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/nation_parquet/' ; CREATE EXTERNAL TABLE tpch_1000x_parquet.orders ( o_orderkey bigint, o_custkey bigint, o_orderstatus string, o_totalprice double, o_orderdate string, o_orderpriority string, o_clerk string, o_shippriority int, o_comment string ) STORED AS PARQUET LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/orders_parquet/' ; CREATE EXTERNAL TABLE tpch_1000x_parquet.part ( p_partkey bigint, p_name string, p_mfgr string, p_brand string, p_type string, p_size int, p_container string, p_retailprice double, p_comment string ) STORED AS PARQUET LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/part_parquet/' ; CREATE EXTERNAL TABLE tpch_1000x_parquet.partsupp ( ps_partkey bigint, ps_suppkey bigint, ps_availqty int, ps_supplycost double, ps_comment string ) STORED AS PARQUET LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/partsupp_parquet/' ; CREATE EXTERNAL TABLE tpch_1000x_parquet.region ( r_regionkey int, r_name string, r_comment string ) STORED AS PARQUET LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/region_parquet/' ; CREATE EXTERNAL TABLE tpch_1000x_parquet.supplier ( s_suppkey bigint, s_name string, s_address string, s_nationkey int, s_phone string, s_acctbal double, s_comment string ) STORED AS PARQUET LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/supplier_parquet/' ;
DLA Presto中的建表语句如下:create external table if not exists `tpch_1000x_orc`.`customer` ( `c_custkey` bigint, `c_name` string, `c_address` string, `c_nationkey` int, `c_phone` string, `c_acctbal` double, `c_mktsegment` string, `c_comment` string ) STORED AS `ORC` LOCATION 'oss://path/to/tpch/tpch_1000x_orc/customer_orc/'; CREATE EXTERNAL TABLE if not exists `tpch_1000x_orc`.`lineitem` ( `l_orderkey` bigint, `l_partkey` bigint, `l_suppkey` bigint, `l_linenumber` int, `l_quantity` double, `l_extendedprice` double, `l_discount` double, `l_tax` double, `l_returnflag` string, `l_linestatus` string, `l_shipdate` string, `l_commitdate` string, `l_receiptdate` string, `l_shipinstruct` string, `l_shipmode` string, `l_comment` string ) STORED AS `ORC` LOCATION 'oss://path/to/tpch/tpch_1000x_orc/lineitem_orc/'; create external table if not exists `tpch_1000x_orc`.`nation` ( `n_nationkey` int, `n_name` string, `n_regionkey` int, `n_comment` string ) STORED AS `ORC` LOCATION 'oss://path/to/tpch/tpch_1000x_orc/nation_orc/'; create external table if not exists `tpch_1000x_orc`.`orders` ( `o_orderkey` bigint, `o_custkey` bigint, `o_orderstatus` string, `o_totalprice` double, `o_orderdate` string, `o_orderpriority` string, `o_clerk` string, `o_shippriority` int, `o_comment` string ) STORED AS `ORC` LOCATION 'oss://path/to/tpch/tpch_1000x_orc/orders_orc/'; create external table if not exists `tpch_1000x_orc`.`part` ( `p_partkey` bigint, `p_name` string, `p_mfgr` string, `p_brand` string, `p_type` string, `p_size` int, `p_container` string, `p_retailprice` double, `p_comment` string ) STORED AS `ORC` LOCATION 'oss://path/to/tpch/tpch_1000x_orc/part_orc/'; create external table if not exists `tpch_1000x_orc`.`partsupp` ( `ps_partkey` bigint, `ps_suppkey` bigint, `ps_availqty` int, `ps_supplycost` double, `ps_comment` string ) STORED AS `ORC` LOCATION 'oss://path/to/tpch/tpch_1000x_orc/partsupp_orc/'; create external table if not exists `tpch_1000x_orc`.`region` ( `r_regionkey` int, `r_name` string, `r_comment` string ) STORED AS `ORC` LOCATION 'oss://path/to/tpch/tpch_1000x_orc/region_orc/'; create external table if not exists `tpch_1000x_orc`.`supplier` ( `s_suppkey` bigint, `s_name` string, `s_address` string, `s_nationkey` int, `s_phone` string, `s_acctbal` double, `s_comment` string ) STORED AS `ORC` LOCATION 'oss://path/to/tpch/tpch_1000x_orc/supplier_orc/'; create database if not exists`tpch_1000x_parquet` WITH DBPROPERTIES ( catalog = 'oss', location = 'oss://path/to/tpch/tpch_1000x_parquet/' ); create external table if not exists `tpch_1000x_parquet`.`customer` ( `c_custkey` bigint, `c_name` string, `c_address` string, `c_nationkey` int, `c_phone` string, `c_acctbal` double, `c_mktsegment` string, `c_comment` string ) STORED AS `PARQUET` LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/customer_parquet/'; CREATE EXTERNAL TABLE if not exists `tpch_1000x_parquet`.`lineitem` ( `l_orderkey` bigint, `l_partkey` bigint, `l_suppkey` bigint, `l_linenumber` int, `l_quantity` double, `l_extendedprice` double, `l_discount` double, `l_tax` double, `l_returnflag` string, `l_linestatus` string, `l_shipdate` string, `l_commitdate` string, `l_receiptdate` string, `l_shipinstruct` string, `l_shipmode` string, `l_comment` string ) STORED AS `PARQUET` LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/lineitem_parquet/'; create external table if not exists `tpch_1000x_parquet`.`nation` ( `n_nationkey` int, `n_name` string, `n_regionkey` int, `n_comment` string ) STORED AS `PARQUET` LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/nation_parquet/'; create external table if not exists `tpch_1000x_parquet`.`orders` ( `o_orderkey` bigint, `o_custkey` bigint, `o_orderstatus` string, `o_totalprice` double, `o_orderdate` string, `o_orderpriority` string, `o_clerk` string, `o_shippriority` int, `o_comment` string ) STORED AS `PARQUET` LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/orders_parquet/'; create external table if not exists `tpch_1000x_parquet`.`part` ( `p_partkey` bigint, `p_name` string, `p_mfgr` string, `p_brand` string, `p_type` string, `p_size` int, `p_container` string, `p_retailprice` double, `p_comment` string ) STORED AS `PARQUET` LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/part_parquet/'; create external table if not exists `tpch_1000x_parquet`.`partsupp` ( `ps_partkey` bigint, `ps_suppkey` bigint, `ps_availqty` int, `ps_supplycost` double, `ps_comment` string ) STORED AS `PARQUET` LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/partsupp_parquet/'; create external table if not exists `tpch_1000x_parquet`.`region` ( `r_regionkey` int, `r_name` string, `r_comment` string ) STORED AS `PARQUET` LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/region_parquet/'; create external table if not exists `tpch_1000x_parquet`.`supplier` ( `s_suppkey` bigint, `s_name` string, `s_address` string, `s_nationkey` int, `s_phone` string, `s_acctbal` double, `s_comment` string ) STORED AS `PARQUET` LOCATION 'oss://path/to/tpch/tpch_1000x_parquet/supplier_parquet/';
- 分别在开源Presto集群和DLA Presto上面执行TPC-H查询,并记录每条查询的执行时间。
说明
- 不要同时在开源Presto集群和DLA Presto上执行查询,避免因为OSS带宽的争抢造成测试结果不准确。
- DLA Presto需要添加以下hint指定查询在特定的集群中执行。
/*+cluster=your-cu-name*/