Power BI & Synapse Part 4 — Serverless SQL: How much will it cost me?

How much would you pay if you use Power BI in a combo with Synapse Analytics? I’ve put Power BI & Synapse Analytics Serverless SQL pool to an ultimate benchmarking test and here are the results!

Image for post
Image for post
Image by author

Serverless SQL pool — The next big thing

Scenario

Image for post
Image for post
Image by author
DROP VIEW IF EXISTS taxi201902csv;
GO

CREATE VIEW taxi201902csv AS
SELECT
VendorID
,cast(tpep_pickup_datetime as DATE) tpep_pickup_datetime
,cast(tpep_dropoff_datetime as DATE) tpep_dropoff_datetime
,passenger_count
,trip_distance
,RateCodeID
,store_and_fwd_flag
,PULocationID
,DOLocationID
,payment_type
,fare_amount
,extra
,mta_tax
,tip_amount
,tolls_amount
,improvement_surcharge
,total_amount
,congestion_surcharge
FROM
    OPENROWSET(
        BULK N'https://nikola.dfs.core.windows.net/nikola/Data/yellow_tripdata_2019-02.csv',
        FORMAT = 'CSV',
        PARSER_VERSION='2.0',
HEADER_ROW = TRUE
) 
WITH(
VendorID INT,
tpep_pickup_datetime DATETIME2,
tpep_dropoff_datetime DATETIME2,
passenger_count INT,
trip_distance DECIMAL(10,2),
RateCodeID INT,
store_and_fwd_flag VARCHAR(10),
PULocationID INT,
DOLocationID INT,
payment_type INT,
fare_amount DECIMAL(10,2),
extra DECIMAL(10,2),
mta_tax DECIMAL(10,2),
tip_amount DECIMAL(10,2),
tolls_amount DECIMAL(10,2),
improvement_surcharge DECIMAL(10,2),
total_amount DECIMAL(10,2),
congestion_surcharge DECIMAL(10,2)
)
AS [taxi201902csv]
Image for post
Image for post
Image by author
DROP VIEW IF EXISTS taxi201901parquet;
GO

CREATE VIEW taxi201901parquet AS
SELECT
VendorID
,CAST(TpepPickupDatetime AS DATE) TpepPickupDatetime
,CAST(TpepDropoffDatetime AS DATE) TpepDropoffDatetime
,PassengerCount
,TripDistance
,PuLocationId
,DoLocationId
,StartLon
,StartLat
,EndLon
,EndLat
,RateCodeId
,StoreAndFwdFlag
,PaymentType
,FareAmount
,Extra
,MtaTax
,ImprovementSurcharge
,TipAmount
,TollsAmount
,TotalAmount
FROM
OPENROWSET(
BULK 'puYear=*/puMonth=*/*.snappy.parquet',
DATA_SOURCE = 'YellowTaxi',
FORMAT='PARQUET'
) nyc
WHERE
nyc.filepath(1) = 2019
AND nyc.filepath(2) IN (1)
AND tpepPickupDateTime BETWEEN CAST('1/1/2019' AS datetime) AND CAST('1/31/2019' AS datetime)

CSV vs Parquet — What do I need to know?

Use Case #1 — Import CSV data into Power BI

SELECT * FROM sys.dm_external_data_processed
WHERE type = 'daily'
Image for post
Image for post
Image by author
Image for post
Image for post
Image by author
let
StartDate = #date(StartYear,1,1),
EndDate = #date(EndYear,12,31),
NumberOfDays = Duration.Days( EndDate - StartDate ),
Dates = List.Dates(StartDate, NumberOfDays+1, #duration(1,0,0,0)),
#"Converted to Table" = Table.FromList(Dates, Splitter.SplitByNothing(), null, null, ExtraValues.Error),
#"Renamed Columns" = Table.RenameColumns(#"Converted to Table",{{"Column1", "FullDateAlternateKey"}}),
#"Changed Type" = Table.TransformColumnTypes(#"Renamed Columns",{{"FullDateAlternateKey", type date}}),
#"Inserted Year" = Table.AddColumn(#"Changed Type", "Year", each Date.Year([FullDateAlternateKey]), type number),
#"Inserted Month" = Table.AddColumn(#"Inserted Year", "Month", each Date.Month([FullDateAlternateKey]), type number),
#"Inserted Month Name" = Table.AddColumn(#"Inserted Month", "Month Name", each Date.MonthName([FullDateAlternateKey]), type text),
#"Inserted Quarter" = Table.AddColumn(#"Inserted Month Name", "Quarter", each Date.QuarterOfYear([FullDateAlternateKey]), type number),
#"Inserted Week of Year" = Table.AddColumn(#"Inserted Quarter", "Week of Year", each Date.WeekOfYear([FullDateAlternateKey]), type number),
#"Inserted Week of Month" = Table.AddColumn(#"Inserted Week of Year", "Week of Month", each Date.WeekOfMonth([FullDateAlternateKey]), type number),
#"Inserted Day" = Table.AddColumn(#"Inserted Week of Month", "Day", each Date.Day([FullDateAlternateKey]), type number),
#"Inserted Day of Week" = Table.AddColumn(#"Inserted Day", "Day of Week", each Date.DayOfWeek([FullDateAlternateKey]), type number),
#"Inserted Day of Year" = Table.AddColumn(#"Inserted Day of Week", "Day of Year", each Date.DayOfYear([FullDateAlternateKey]), type number),
#"Inserted Day Name" = Table.AddColumn(#"Inserted Day of Year", "Day Name", each Date.DayOfWeekName([FullDateAlternateKey]), type text)
in
#"Inserted Day Name"
Image for post
Image for post
Image by author

Use Case #2 — DirectQuery over CSV files

Image for post
Image for post
Image by author
Image for post
Image for post
Image by author
Image for post
Image for post
Image by author

Use Case #3 — Use Date slicer in DirectQuery mode

Image for post
Image for post
Image by author
/*Query 1*/
SELECT
TOP (1000001) [semijoin1].[c1],SUM([a0])
AS [a0]
FROM
(
(
SELECT [t1].[tpep_pickup_datetime] AS [c14],[t1].[total_amount] AS [a0]
FROM
(
(SELECT * FROM dbo.taxi201901csv
UNION ALL
SELECT * FROM dbo.taxi201902csv
)
)
AS [t1]
)
AS [basetable0]
INNER JOIN
(
(SELECT 3 AS [c1],CAST( '20190101 00:00:00' AS datetime) AS [c14] ) UNION ALL
(SELECT 4 AS [c1],CAST( '20190102 00:00:00' AS datetime) AS [c14] ) UNION ALL
(SELECT 5 AS [c1],CAST( '20190103 00:00:00' AS datetime) AS [c14] ) UNION ALL
(SELECT 6 AS [c1],CAST( '20190104 00:00:00' AS datetime) AS [c14] ) UNION ALL
(SELECT 7 AS [c1],CAST( '20190105 00:00:00' AS datetime) AS [c14] ) UNION ALL
(SELECT 8 AS [c1],CAST( '20190106 00:00:00' AS datetime) AS [c14] ) UNION ALL
(SELECT 9 AS [c1],CAST( '20190107 00:00:00' AS datetime) AS [c14] ) UNION ALL
(SELECT 10 AS [c1],CAST( '20190108 00:00:00' AS datetime) AS [c14] ) UNION ALL
(SELECT 11 AS [c1],CAST( '20190109 00:00:00' AS datetime) AS [c14] ) UNION ALL
(SELECT 12 AS [c1],CAST( '20190110 00:00:00' AS datetime) AS [c14] ) UNION ALL
(SELECT 13 AS [c1],CAST( '20190111 00:00:00' AS datetime) AS [c14] ) UNION ALL
(SELECT 14 AS [c1],CAST( '20190112 00:00:00' AS datetime) AS [c14] ) UNION ALL
(SELECT 15 AS [c1],CAST( '20190113 00:00:00' AS datetime) AS [c14] )
)
AS [semijoin1] on
(
([semijoin1].[c14] = [basetable0].[c14])
)
)
GROUP BY [semijoin1].[c1]

/*Query 2*/
SELECT SUM([t1].[total_amount])
AS [a0]
FROM
(
(SELECT * FROM dbo.taxi201901csv
UNION ALL
SELECT * FROM dbo.taxi201902csv)
)
AS [t1]
WHERE
(
([t1].[tpep_pickup_datetime] IN (CAST( '20190112 00:00:00' AS datetime),CAST( '20190113 00:00:00' AS datetime),CAST( '20190101 00:00:00' AS datetime),CAST( '20190102 00:00:00' AS datetime),CAST( '20190103 00:00:00' AS datetime),CAST( '20190104 00:00:00' AS datetime),CAST( '20190105 00:00:00' AS datetime),CAST( '20190106 00:00:00' AS datetime),CAST( '20190107 00:00:00' AS datetime),CAST( '20190108 00:00:00' AS datetime),CAST( '20190109 00:00:00' AS datetime),CAST( '20190110 00:00:00' AS datetime),CAST( '20190111 00:00:00' AS datetime)))
)

Use Case #4 — Aggregated table in DirectQuery mode

Image for post
Image for post
Image by author
Image for post
Image for post
Image by author

Use Case #5 — Aggregated table in Import mode

Image for post
Image for post
Image by author

Use Case #6 — Aggregated data in Serverless SQL pool

DROP VIEW IF EXISTS taxi201901_02_agg;
GO

CREATE VIEW taxi201901_02_agg AS
  SELECT  CAST(C2 AS DATE) AS tpep_pickup_datetime,
SUM(CAST(C17 AS DECIMAL(10,2))) AS total_amount
FROM
    OPENROWSET(
        BULK N'https://nikola.dfs.core.windows.net/nikola/Data/yellow_tripdata_2019-01.csv',
        FORMAT = 'CSV',
        PARSER_VERSION='2.0',
HEADER_ROW = TRUE
) 
AS [taxi201901_02_agg]
GROUP BY CAST(C2 AS DATE)
Image for post
Image for post
Image by author

Use Case #7 — Import Parquet files

Use Case #8 — DirectQuery over Parquet files

SELECT 
TOP (1000001) [semijoin1].[c1],SUM([a0])
AS [a0]
FROM
(
(
SELECT [t1].[TpepPickupDatetime] AS [c13],[t1].[TotalAmount] AS [a0]
FROM
(
(SELECT *
FROM taxi201901parquet
UNION ALL
SELECT *
FROM taxi201902parquet)
)
AS [t1]
)
AS [basetable0]
INNER JOIN
(
(SELECT 3 AS [c1],CAST( '20190101 00:00:00' AS datetime) AS [c13] ) UNION ALL
(SELECT 4 AS [c1],CAST( '20190102 00:00:00' AS datetime) AS [c13] ) UNION ALL
(SELECT 5 AS [c1],CAST( '20190103 00:00:00' AS datetime) AS [c13] ) UNION ALL
(SELECT 6 AS [c1],CAST( '20190104 00:00:00' AS datetime) AS [c13] ) UNION ALL
(SELECT 7 AS [c1],CAST( '20190105 00:00:00' AS datetime) AS [c13] ) UNION ALL
(SELECT 8 AS [c1],CAST( '20190106 00:00:00' AS datetime) AS [c13] ) UNION ALL
(SELECT 9 AS [c1],CAST( '20190107 00:00:00' AS datetime) AS [c13] ) UNION ALL
(SELECT 10 AS [c1],CAST( '20190108 00:00:00' AS datetime) AS [c13] ) UNION ALL
(SELECT 11 AS [c1],CAST( '20190109 00:00:00' AS datetime) AS [c13] ) UNION ALL
…..

Use Case #9 — Aggregated data in the Serverless SQL pool

Final Verdict

Image for post
Image for post
Image by author

Conclusion

Data Mozart — Don’t let data make noise, create music from it | data-mozart.com | https://www.linkedin.com/in/nikola-ilic-data-mozart/

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store