5.1 有序归并

本章中要尝试更大数据量,将订单表按年分别存储成不同的表,表名为 Orders+ 年份,如 Orders2021,Orders2022,依此类推,表结构均和 Orders 相同

SQL

SELECT ProductID,City,sum(Amount) Amount
FROM Customers
INNER JOIN Orders2021
ON Customers.CustomerID= Orders2021.CustomerID
WHERE Orders2021.Quantity>5
GROUP BY ProductID,City 

SPL

本例为主子表关联,且主子两张表的数据量均超出内存容量,需要用游标读出,此时应先把两张表均按关联字段排序,然后再用有序归并关联,效率最高。

实际使用中,可以参考前面的【账户有序存储】的做法,将数据事先按关联字段有序存储。本章后面的所有例子,都默认已经按主表的主键字段有序存储。

A
1 =file(“Orders2021_Account.ctx”).open().cursor@x(CustomerID,ProductID,Amount;Quantity>5)
2 =file(“Customers.ctx”).open().cursor@x(CustomerID,City)
3 =joinx(A2:primary, CustomerID; A1:sub, CustomerID)
4 =A3.groups(sub.ProductID, primary.City; sum(sub.Amount):Amount)

SQL

SELECT Customers.CustomerID,CustomerName,sum(Amount) Amount
FROM Customers
LEFT JOIN Orders2021
ON Customers.CustomerID= Orders2021.CustomerID
GROUP BY Customers.CustomerID,CustomerName

SPL

A
1 =file(“Orders2021_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount)
2 =file(“Customers.ctx”).open().cursor@x(CustomerID,CustomerName)
3 =joinx@1(A2:primary, CustomerID; A1:sub, CustomerID)
4 =A3.group(primary.CustomerID;primary.CustomerName,~.sum(sub.Amount):Amount)
5 =file(“result.btx”).export@b(A4)

A4 此处为大分组结果集,内存放不下,且数据按分组字段有序,因此使用 group 聚合返回成游标。


SQL

WITH a AS(
    SELECT CustomerID,sum(Amount) Amount2020 
    FROM Orders2020 
    GROUP BY CustomerID),
b AS(
    SELECT CustomerID,sum(Amount) Amount2021 
    FROM Orders2021 
    GROUP BY CustomerID)
SELECT 
    coalesce ( a.CustomerID, b.CustomerID ) CustomerID, 
    Amount2020, Amount2021,Amount2021/Amount2020-1 GrowthRate
FROM a FULL JOIN b ON a.CustomerID= b.CustomerID

SPL

A
1 =file(“Orders2020_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2020)
2 =file(“Orders2021_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2021)
3 =joinx@f(A1:a, CustomerID; A2:b, CustomerID)
4 =A3.new([a.CustomerID,b.CustomerID].nvl():CustomerID,a.Amount2020, b.Amount2021, b.Amount2021/a.Amount2020-1:GrowthRate)
5 =file(“result.btx”).export@b(A4)

SQL

WITH a AS(
    SELECT CustomerID,sum(Amount) Amount2020 
    FROM Orders2020 
    GROUP BY CustomerID),
b AS(
    SELECT CustomerID,sum(Amount) Amount2021 
    FROM Orders2021 
    GROUP BY CustomerID)
SELECT City,sum(Amount2020) Amount2020,sum(Amount2021) Amount2021, 
    sum(Amount2021)/sum(Amount2020)-1 GrowthRate
FROM Customers
INNER JOIN a ON Customers.CustomerID= a.CustomerID
INNER JOIN b ON Customers.CustomerID= b.CustomerID
GROUP BY City

SPL

A
1 =file(“Orders2020_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2020)
2 =file(“Orders2021_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2021)
3 =file(“Customers.ctx”).open().cursor@x(CustomerID, City)
4 =joinx(A3:primary, CustomerID; A1:sub1, CustomerID; A2:sub2, CustomerID)
5 =A4.groups(primary.City;sum(sub1.Amount2020):Amount2020, sum(sub2.Amount2021):Amount2021, null:GrowthRate).run(GrowthRate=Amount2021/Amount2020-1)

SQL

WITH a AS(
    SELECT CustomerID,sum(Amount) Amount2020 
    FROM Orders2020 
    GROUP BY CustomerID),
b AS(
    SELECT CustomerID,sum(Amount) Amount2021 
    FROM Orders2021 
    GROUP BY CustomerID)
SELECT City,sum(Amount2020) Amount2020,sum(Amount2021) Amount2021, 
    sum(Amount2021)/sum(Amount2020)-1 GrowthRate
FROM Customers
LEFT JOIN a ON Customers.CustomerID= a.CustomerID
LEFT JOIN b ON Customers.CustomerID= b.CustomerID
GROUP BY City

SPL

A
1 =file(“Orders2020_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2020)
2 =file(“Orders2021_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2021)
3 =file(“Customers.ctx”).open().cursor@x(CustomerID,City)
4 =joinx@1(A3:primary, CustomerID; A1:sub1, CustomerID; A2:sub2, CustomerID)
5 =A4.groups(primary.City;sum(sub1.Amount2020):Amount2020, sum(sub2.Amount2021):Amount2021, null:GrowthRate).run(GrowthRate=Amount2021/Amount2020-1)

SQL

WITH a AS(
    SELECT CustomerID,sum(Amount) Amount2019
    FROM Orders2019 
    GROUP BY CustomerID),
b AS(
    SELECT CustomerID,sum(Amount) Amount2020 
    FROM Orders2020 
    GROUP BY CustomerID),
c AS(
    SELECT CustomerID,sum(Amount) Amount2021
    FROM Orders2021 
    GROUP BY CustomerID)
SELECT coalesce (a.CustomerID, b.CustomerID, c.CustomerID ) CustomerID,
    Amount2019, Amount2020, Amount2021
FROM a
FULL JOIN b ON b.CustomerID= a.CustomerID
FULL JOIN c ON c.CustomerID= b.CustomerID or c.CustomerID= a.CustomerID
GROUP BY CustomerID

SPL

A
1 =file(“Orders2019_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2019)
2 =file(“Orders2020_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2020)
3 =file(“Orders2021_Account.ctx”).open().cursor@x(CustomerID,Amount).group@s(CustomerID;sum(Amount):Amount2021)
4 =joinx@f(A1:a, CustomerID; A2:b, CustomerID; A3:c, CustomerID)
5 =A4.new([a.CustomerID,b.CustomerID,c.CustomerID].nvl():CustomerID, a.Amount2019, b.Amount2020, c.Amount2021)
6 =file(“result.btx”).export@b(A5)