Skip to content
This repository was archived by the owner on Jun 29, 2019. It is now read-only.

Commit 3dfead3

Browse files
author
xibingaomsft
committed
Update SQLDW draft doc
1 parent 359645c commit 3dfead3

15 files changed

Lines changed: 220 additions & 154 deletions

Misc/SQLDW/Sample Scripts/SQLDW.ipynb

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
-- Report number of rows in table nyctaxi_trip without table scan
2+
SELECT SUM(rows) FROM sys.partitions WHERE object_id = OBJECT_ID('nyctaxi_trip')
3+
4+
-- Report number of columns in table nyctaxi_trip
5+
SELECT COUNT(*) FROM information_schema.columns WHERE table_name = 'nyctaxi_trip'
6+
7+
SELECT medallion, COUNT(*)
8+
FROM nyctaxi_fare
9+
WHERE pickup_datetime BETWEEN '20130101' AND '20130331'
10+
GROUP BY medallion
11+
HAVING COUNT(*) > 100
12+
13+
SELECT medallion, hack_license, COUNT(*)
14+
FROM nyctaxi_fare
15+
WHERE pickup_datetime BETWEEN '20130101' AND '20130131'
16+
GROUP BY medallion, hack_license
17+
HAVING COUNT(*) > 100
18+
19+
SELECT COUNT(*) FROM nyctaxi_trip
20+
WHERE pickup_datetime BETWEEN '20130101' AND '20130331'
21+
AND (CAST(pickup_longitude AS float) NOT BETWEEN -90 AND 90
22+
OR CAST(pickup_latitude AS float) NOT BETWEEN -90 AND 90
23+
OR CAST(dropoff_longitude AS float) NOT BETWEEN -90 AND 90
24+
OR CAST(dropoff_latitude AS float) NOT BETWEEN -90 AND 90
25+
OR (pickup_longitude = '0' AND pickup_latitude = '0')
26+
OR (dropoff_longitude = '0' AND dropoff_latitude = '0'))
27+
28+
SELECT tipped, COUNT(*) AS tip_freq FROM (
29+
SELECT CASE WHEN (tip_amount > 0) THEN 1 ELSE 0 END AS tipped, tip_amount
30+
FROM nyctaxi_fare
31+
WHERE pickup_datetime BETWEEN '20130101' AND '20131231') tc
32+
GROUP BY tipped
33+
34+
SELECT tip_class, COUNT(*) AS tip_freq FROM (
35+
SELECT CASE
36+
WHEN (tip_amount = 0) THEN 0
37+
WHEN (tip_amount > 0 AND tip_amount <= 5) THEN 1
38+
WHEN (tip_amount > 5 AND tip_amount <= 10) THEN 2
39+
WHEN (tip_amount > 10 AND tip_amount <= 20) THEN 3
40+
ELSE 4
41+
END AS tip_class
42+
FROM nyctaxi_fare
43+
WHERE pickup_datetime BETWEEN '20130101' AND '20131231') tc
44+
GROUP BY tip_class
45+
46+
47+
GO
48+
49+
/****** Object: UserDefinedFunction [dbo].[fnCalculateDistance] ******/
50+
SET ANSI_NULLS ON
51+
GO
52+
53+
SET QUOTED_IDENTIFIER ON
54+
GO
55+
56+
IF EXISTS (SELECT * FROM sys.objects WHERE type IN ('FN', 'IF') AND name = 'fnCalculateDistance')
57+
DROP FUNCTION fnCalculateDistance
58+
GO
59+
60+
CREATE FUNCTION [dbo].[fnCalculateDistance] (@Lat1 float, @Long1 float, @Lat2 float, @Long2 float)
61+
-- User-defined function calculate the direct distance between two geographical coordinates.
62+
RETURNS float
63+
AS
64+
BEGIN
65+
DECLARE @distance decimal(28, 10)
66+
-- Convert to radians
67+
SET @Lat1 = @Lat1 / 57.2958
68+
SET @Long1 = @Long1 / 57.2958
69+
SET @Lat2 = @Lat2 / 57.2958
70+
SET @Long2 = @Long2 / 57.2958
71+
-- Calculate distance
72+
SET @distance = (SIN(@Lat1) * SIN(@Lat2)) + (COS(@Lat1) * COS(@Lat2) * COS(@Long2 - @Long1))
73+
--Convert to miles
74+
IF @distance <> 0
75+
BEGIN
76+
SET @distance = 3958.75 * ATAN(SQRT(1 - POWER(@distance, 2)) / @distance);
77+
END
78+
RETURN @distance
79+
END
80+
GO
81+
82+
SELECT t.*, f.payment_type, f.fare_amount, f.surcharge, f.mta_tax, f.tolls_amount, f.total_amount, f.tip_amount,
83+
CASE WHEN (tip_amount > 0) THEN 1 ELSE 0 END AS tipped,
84+
CASE WHEN (tip_amount = 0) THEN 0
85+
WHEN (tip_amount > 0 AND tip_amount <= 5) THEN 1
86+
WHEN (tip_amount > 5 AND tip_amount <= 10) THEN 2
87+
WHEN (tip_amount > 10 AND tip_amount <= 20) THEN 3
88+
ELSE 4
89+
END AS tip_class
90+
FROM nyctaxi_trip t, nyctaxi_fare f
91+
WHERE datepart("mi",t.pickup_datetime) = 1
92+
AND t.medallion = f.medallion
93+
AND t.hack_license = f.hack_license
94+
AND t.pickup_datetime = f.pickup_datetime
95+
AND pickup_longitude != '0' AND dropoff_longitude != '0'
96+
97+
98+
CREATE TABLE nyctaxi_sample
99+
WITH
100+
(
101+
CLUSTERED COLUMNSTORE INDEX,
102+
DISTRIBUTION = HASH(medallion)
103+
)
104+
AS
105+
(
106+
SELECT t.*, f.payment_type, f.fare_amount, f.surcharge, f.mta_tax, f.tolls_amount, f.total_amount, f.tip_amount,
107+
tipped = CASE WHEN (tip_amount > 0) THEN 1 ELSE 0 END,
108+
tip_class = CASE WHEN (tip_amount = 0) THEN 0
109+
WHEN (tip_amount > 0 AND tip_amount <= 5) THEN 1
110+
WHEN (tip_amount > 5 AND tip_amount <= 10) THEN 2
111+
WHEN (tip_amount > 10 AND tip_amount <= 20) THEN 3
112+
ELSE 4
113+
END
114+
FROM nyctaxi_trip t, nyctaxi_fare f
115+
WHERE datepart("mi",t.pickup_datetime) = 1
116+
AND t.medallion = f.medallion
117+
AND t.hack_license = f.hack_license
118+
AND t.pickup_datetime = f.pickup_datetime
119+
AND pickup_longitude <> '0' AND dropoff_longitude <> '0'
120+
)

0 commit comments

Comments
 (0)