SPL 协助 Mongodb:Find multiple latest by filter criteria
Mongodb 数据库中某 collection(名为 category_time) 数据如下:
[
{
"_id": 1,
"category": "FIRE",
"time": "2024-05-11T07:11:00Z"
},
{
"_id": 2,
"category": "FIRE",
"time": "2024-05-11T08:11:00Z"
},
{
"_id": 3,
"category": "FIRE",
"time": "2024-05-11T09:11:00Z"
},
{
"_id": 4,
"category": "POLICE",
"time": "2024-05-11T07:22:00Z"
},
{
"_id": 5,
"category": "POLICE",
"time": "2024-05-11T08:22:00Z"
},
{
"_id": 6,
"category": "POLICE",
"time": "2024-05-11T09:22:00Z"
},
{
"_id": 7,
"category": "AMBULANCE",
"time": "2024-05-11T07:33:00Z"
},
{
"_id": 8,
"category": "AMBULANCE",
"time": "2024-05-11T08:33:00Z"
},
{
"_id": 9,
"category": "AMBULANCE",
"time": "2024-05-11T09:33:00Z"
}
]
现在输入两个集合参数,对应 category 集合和 time 集合,比如 [“FIRE”, “AMBULANCE”] 和 [“2024-05-11T08:15:00Z”, “2024-05-11T09:00:00Z”]。
请将这两个集合的成员组合成多组参数,也就是叉乘,本例是四组,每组是一个 category 和一个 time。
然后用 4 组参数遍历搜索文档,每次找出 category 等于参数,且 time 早于参数但最近的那条记录。
[
{
"category": "FIRE",
"time": "2024-05-11T08:15:00Z",
"last_entry_on_or_before": {
"_id": 2,
"category": "FIRE",
"time": "2024-05-11T08:11:00Z"
}
},
{
"category": "FIRE",
"time": "2024-05-11T09:00:00Z",
"last_entry_on_or_before": {
"_id": 2,
"category": "FIRE",
"time": "2024-05-11T08:11:00Z"
}
},
{
"category": "AMBULANCE",
"time": "2024-05-11T08:15:00Z",
"last_entry_on_or_before": {
"_id": 7,
"category": "AMBULANCE",
"time": "2024-05-11T07:33:00Z"
}
},
{
"category": "AMBULANCE",
"time": "2024-05-11T09:00:00Z",
"last_entry_on_or_before": {
"_id": 8,
"category": "AMBULANCE",
"time": "2024-05-11T08:33:00Z"
}
}
]
该题的难点在于每组参数查询一次,且每组查询结果里选出 time 最近的那一条记录。使用 Mongodb query 尝试了很多办法,结果都不对,其中一种接近的写法如下:
db.category_time.aggregate([
{
"$match": {
"_id": {
"$exists": false
}
}
},
{
"$unionWith": {
"coll": "collection",
"pipeline": [
{
"$documents": [
{
"category": //your input category array here
["FIRE",
"AMBULANCE"
],
//your input time array here
"time": [
"2024-05-11T08:15:00Z",
"2024-05-11T09:00:00Z"
]
}
]
},
{
"$unwind": "$category"
},
{
"$unwind": "$time"
}
]
}
},
{
"$lookup": {
"from": "collection",
"localField": "category",
"foreignField": "category",
"let": {
ts: "$time"
},
"pipeline": [
{
"$match": {
$expr: {
$lte: [
"$time",
"$$ts"
]
}
}
},
{
"$sort": {
"time": -1
}
},
{
"$limit": 1
}
],
"as": "last_entry_on_or_before"
}
},
{
"$unwind": "$last_entry_on_or_before"
}
])
SPL 提供了 maxp 函数,可以轻松找到 time 小于参数的记录中最近的那条记录:
A |
|
1 |
=mongo_open@d("mongodb://127.0.0.1:27017/local") |
2 |
=mongo_shell@d(A1, "{'find':'category_time'}") |
3 |
=mongo_close(A1) |
4 |
["FIRE", "AMBULANCE"] |
5 |
["2024-05-11T08:15:00Z", "2024-05-11T09:00:00Z"] |
6 |
>A5.run(~=datetime(~,"yyyy-MM-dd'T'HH🇲🇲ss'Z'")) |
7 |
=A4.conj(A5.new(~:time,A4.~:category)).sort(category,time) |
8 |
>A2.run('time'=datetime('time',"yyyy-MM-dd'T'HH🇲🇲ss'Z'")) |
9 |
=A7.new(category,time,A2.select(category==A7.category && time<=A7.time).maxp(time):last_entry_on_or_before) |
10 |
=json(A9) |
英文版:https://c.esproc.com/article/1741314956132