3.8 组内复杂判断
按月统计出当月发生过连续三天都有订单的用户数
SQL
WITH a AS(
SELECT count (1) AS num, day (OrderDate) days,
CustomerID,year(OrderDate) Years,month(OrderDate) Months
FROM Orders
WHERE OrderDate>='2022-01-01'
GROUP BY CustomerID, day (OrderDate), year(OrderDate),month(OrderDate)),
b AS (
SELECT
lag(days,1) OVER(PARTITION BY CustomerID, Years,Months ORDER BY days) pd,
days, CustomerID,Years,Months
FROM a),
c AS (
SELECT *,
CASE WHEN days-pd=1 THEN 0 ELSE 1 END AS tmp
FROM b),
d AS (
SELECT CustomerID,Years,Months,sum (tmp) OVER (
PARTITION BY CustomerID,Years,Months ORDER BY days) NoOrderDays
FROM c),
e AS (
SELECT CustomerID, Years,Months,NoOrderDays,count (*) ContinuousDays
FROM d
GROUP BY CustomerID,Years,Months,NoOrderDays),
f AS (
SELECT CustomerID,Years,Months,max (ContinuousDays) mCon
FROM e
GROUP BY CustomerID,Years,Months
HAVING max (ContinuousDays)>=3)
SELECT Years,Months,count (*)
FROM f
GROUP BY Years,Months
SPL
A | |
---|---|
1 | >st=date(“2022-01-01”),start=days@o(st) |
2 | =file(“Orders_Account.ctx”).open().cursor@x(CustomerID,OrderDate;OrderDate>=start) |
3 | =A2.group(CustomerID,month@y(OrderDate)) |
4 | =A3.(~.select@1((a=case(OrderDate-OrderDate[-1],0:a,1:a+1;1))==3)) |
5 | =A4.groups(year(OrderDate):Years,month(OrderDate):Months; count(1)) |
A3 有序读出每用户每月的订单数据形成小集合
A4 针对小集合计算出是否发生过连续三天下单的情况
A5 A4 返回数据已对 CustomerID 有序了,只要简单 count 就可以得到去重计数了