5.1 有序归并
本章中要尝试更大数据量,将订单表按年分别存储成不同的表,表名为 Orders+ 年份,如 Orders2021,Orders2022,依此类推,表结构均和 Orders 相同
SQL
SELECT ProductID,City,sum(Amount) Amount
FROM Customers
INNER JOIN Orders2021
ON Customers.CustomerID= Orders2021.CustomerID
WHERE Orders2021.Quantity>5
GROUP BY ProductID,City
SPL
本例为主子表关联,且主子两张表的数据量均超出内存容量,需要用游标读出,此时应先把两张表均按关联字段排序,然后再用有序归并关联,效率最高。
实际使用中,可以参考前面的【账户有序存储】的做法,将数据事先按关联字段有序存储。本章后面的所有例子,都默认已经按主表的主键字段有序存储。
A | |
---|---|
1 | =file(“Orders2021_Account.ctx”).open().cursor@x(CustomerID,ProductID,Amount;Quantity>5) |
2 | =file(“Customers.ctx”).open().cursor@x(CustomerID,City) |
3 | =joinx(A2:primary, CustomerID; A1:sub, CustomerID) |
4 | =A3.groups(sub.ProductID, primary.City; sum(sub.Amount):Amount) |
SQL
SELECT Customers.CustomerID,CustomerName,sum(Amount) Amount
FROM Customers
LEFT JOIN Orders2021
ON Customers.CustomerID= Orders2021.CustomerID
GROUP BY Customers.CustomerID,CustomerName
SPL
A | |
---|---|
1 | =file(“Orders2021_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount) |
2 | =file(“Customers.ctx”).open().cursor@x(CustomerID,CustomerName) |
3 | =joinx@1(A2:primary, CustomerID; A1:sub, CustomerID) |
4 | =A3.group(primary.CustomerID;primary.CustomerName,~.sum(sub.Amount):Amount) |
5 | =file(“result.btx”).export@b(A4) |
A4 此处为大分组结果集,内存放不下,且数据按分组字段有序,因此使用 group 聚合返回成游标。
SQL
WITH a AS(
SELECT CustomerID,sum(Amount) Amount2020
FROM Orders2020
GROUP BY CustomerID),
b AS(
SELECT CustomerID,sum(Amount) Amount2021
FROM Orders2021
GROUP BY CustomerID)
SELECT
coalesce ( a.CustomerID, b.CustomerID ) CustomerID,
Amount2020, Amount2021,Amount2021/Amount2020-1 GrowthRate
FROM a FULL JOIN b ON a.CustomerID= b.CustomerID
SPL
A | |
---|---|
1 | =file(“Orders2020_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2020) |
2 | =file(“Orders2021_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2021) |
3 | =joinx@f(A1:a, CustomerID; A2:b, CustomerID) |
4 | =A3.new([a.CustomerID,b.CustomerID].nvl():CustomerID,a.Amount2020, b.Amount2021, b.Amount2021/a.Amount2020-1:GrowthRate) |
5 | =file(“result.btx”).export@b(A4) |
SQL
WITH a AS(
SELECT CustomerID,sum(Amount) Amount2020
FROM Orders2020
GROUP BY CustomerID),
b AS(
SELECT CustomerID,sum(Amount) Amount2021
FROM Orders2021
GROUP BY CustomerID)
SELECT City,sum(Amount2020) Amount2020,sum(Amount2021) Amount2021,
sum(Amount2021)/sum(Amount2020)-1 GrowthRate
FROM Customers
INNER JOIN a ON Customers.CustomerID= a.CustomerID
INNER JOIN b ON Customers.CustomerID= b.CustomerID
GROUP BY City
SPL
A | |
---|---|
1 | =file(“Orders2020_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2020) |
2 | =file(“Orders2021_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2021) |
3 | =file(“Customers.ctx”).open().cursor@x(CustomerID, City) |
4 | =joinx(A3:primary, CustomerID; A1:sub1, CustomerID; A2:sub2, CustomerID) |
5 | =A4.groups(primary.City;sum(sub1.Amount2020):Amount2020, sum(sub2.Amount2021):Amount2021, null:GrowthRate).run(GrowthRate=Amount2021/Amount2020-1) |
SQL
WITH a AS(
SELECT CustomerID,sum(Amount) Amount2020
FROM Orders2020
GROUP BY CustomerID),
b AS(
SELECT CustomerID,sum(Amount) Amount2021
FROM Orders2021
GROUP BY CustomerID)
SELECT City,sum(Amount2020) Amount2020,sum(Amount2021) Amount2021,
sum(Amount2021)/sum(Amount2020)-1 GrowthRate
FROM Customers
LEFT JOIN a ON Customers.CustomerID= a.CustomerID
LEFT JOIN b ON Customers.CustomerID= b.CustomerID
GROUP BY City
SPL
A | |
---|---|
1 | =file(“Orders2020_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2020) |
2 | =file(“Orders2021_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2021) |
3 | =file(“Customers.ctx”).open().cursor@x(CustomerID,City) |
4 | =joinx@1(A3:primary, CustomerID; A1:sub1, CustomerID; A2:sub2, CustomerID) |
5 | =A4.groups(primary.City;sum(sub1.Amount2020):Amount2020, sum(sub2.Amount2021):Amount2021, null:GrowthRate).run(GrowthRate=Amount2021/Amount2020-1) |
SQL
WITH a AS(
SELECT CustomerID,sum(Amount) Amount2019
FROM Orders2019
GROUP BY CustomerID),
b AS(
SELECT CustomerID,sum(Amount) Amount2020
FROM Orders2020
GROUP BY CustomerID),
c AS(
SELECT CustomerID,sum(Amount) Amount2021
FROM Orders2021
GROUP BY CustomerID)
SELECT coalesce (a.CustomerID, b.CustomerID, c.CustomerID ) CustomerID,
Amount2019, Amount2020, Amount2021
FROM a
FULL JOIN b ON b.CustomerID= a.CustomerID
FULL JOIN c ON c.CustomerID= b.CustomerID or c.CustomerID= a.CustomerID
GROUP BY CustomerID
SPL
A | |
---|---|
1 | =file(“Orders2019_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2019) |
2 | =file(“Orders2020_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2020) |
3 | =file(“Orders2021_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2021) |
4 | =joinx@f(A1:a, CustomerID; A2:b, CustomerID; A3:c, CustomerID) |
5 | =A4.new([a.CustomerID,b.CustomerID,c.CustomerID].nvl():CustomerID, a.Amount2019, b.Amount2020, c.Amount2021) |
6 | =file(“result.btx”).export@b(A5) |