Advanced Aggregation
This module covers advanced techniques for the MongoDB aggregation pipeline, enabling complex transformations, analytics, and data manipulation.
Advanced Pipeline Stages
$facet — Multiple Aggregations in One Query
Run multiple aggregation paths on the same data in parallel:
// Get product stats AND top categories in single aggregation
db.products.aggregate([
{ $match: { status: "active" } },
{
$facet: {
"price_stats": [
{
$group: {
_id: null,
avgPrice: { $avg: "$price" },
minPrice: { $min: "$price" },
maxPrice: { $max: "$price" },
totalProducts: { $sum: 1 }
}
}
],
"top_categories": [
{ $group: { _id: "$category", count: { $sum: 1 } } },
{ $sort: { count: -1 } },
{ $limit: 5 }
],
"new_arrivals": [
{ $sort: { createdAt: -1 } },
{ $limit: 10 }
]
}
}
]);
// Result:
// {
// price_stats: [{ avgPrice: 45.50, minPrice: 10, maxPrice: 299, totalProducts: 1000 }],
// top_categories: [{ _id: "electronics", count: 400 }, ...],
// new_arrivals: [{ _id: ObjectId, name: "...", ... }, ...]
// }
$bucket — Group by Ranges
Organize documents into bucketed ranges:
// Group products by price ranges
db.products.aggregate([
{
$bucket: {
groupBy: "$price",
boundaries: [0, 50, 100, 200, 500, 1000],
default: "other", // Documents outside boundaries
output: {
count: { $sum: 1 },
products: { $push: "$name" },
avg_price: { $avg: "$price" }
}
}
}
]);
// Result:
// { _id: 0, count: 150, products: ["USB Hub", ...], avg_price: 25.50 }
// { _id: 50, count: 200, products: ["Monitor", ...], avg_price: 75.00 }
// { _id: 100, count: 100, products: ["Laptop", ...], avg_price: 150.00 }
// { _id: "other", count: 50, products: [...], avg_price: 750.00 }
$bucketAuto — Automatic Bucketing
MongoDB automatically determines bucket boundaries:
// Automatically create 5 buckets for ages
db.users.aggregate([
{
$bucketAuto: {
groupBy: "$age",
buckets: 5, // Number of buckets to create
output: {
count: { $sum: 1 },
avg_age: { $avg: "$age" }
}
}
}
]);
// MongoDB determines boundaries to evenly distribute documents
// Result might be: [0-20], [20-40], [40-60], [60-80], [80-100]
$redact — Dynamic Field Inclusion
Include or exclude fields based on field values (useful for multi-tenant data):
// Hide departments with security level > user's clearance
db.documents.aggregate([
{
$match: { owner: "alice" }
},
{
$redact: {
$cond: [
{ $lte: ["$accessLevel", 3] }, // User has clearance 3
"$$KEEP", // Include this field
"$$PRUNE" // Exclude this field
]
}
}
]);
// Documents:
// { name: "Public Doc", accessLevel: 1 } → KEPT
// { name: "Secret Doc", accessLevel: 5 } → PRUNED
$out — Write Results to Collection
Save aggregation results to a new collection:
db.orders.aggregate([
{ $match: { status: "completed" } },
{
$group: {
_id: "$customerId",
totalSpent: { $sum: "$amount" },
orderCount: { $sum: 1 }
}
},
{ $sort: { totalSpent: -1 } },
{ $out: "customer_analytics" } // Write results here
]);
// New collection 'customer_analytics' now contains results
// Useful for pre-computed summaries, reporting
let analytics = db.customer_analytics.findOne({ _id: "customer123" });
print("Total spent:", analytics.totalSpent);
$merge — Upsert Results
Merge aggregation results into a collection (insert or update):
db.sales.aggregate([
{ $group: { _id: "$month", total: { $sum: "$amount" } } },
{
$merge: {
into: "monthly_totals",
whenMatched: "replace", // Update if _id exists
whenNotMatched: "insert" // Insert if new
}
}
]);
// Result goes to 'monthly_totals'; existing docs are replaced
Advanced Expressions
Conditional Operators
// $cond: if-then-else for computed fields
db.products.aggregate([
{
$project: {
name: 1,
price: 1,
discount: {
$cond: [
{ $gte: ["$price", 100] }, // If price >= 100
{ $multiply: ["$price", 0.1] }, // Then 10% discount
{ $multiply: ["$price", 0.05] } // Else 5% discount
]
}
}
}
]);
// $switch: multi-way conditional (like switch-case)
db.orders.aggregate([
{
$project: {
amount: 1,
shipCost: {
$switch: {
branches: [
{ case: { $lt: ["$amount", 25] }, then: 5.99 },
{ case: { $lt: ["$amount", 100] }, then: 2.99 },
{ case: { $lt: ["$amount", 500] }, then: 0 }
],
default: -10 // Free shipping + discount
}
}
}
}
]);
Array Expressions
// $arrayElemAt: Get element at index
db.users.aggregate([
{
$project: {
name: 1,
firstHobby: { $arrayElemAt: ["$hobbies", 0] },
lastHobby: { $arrayElemAt: ["$hobbies", -1] } // Last element
}
}
]);
// $slice: Get portion of array
db.products.aggregate([
{
$project: {
name: 1,
topReviews: { $slice: ["$reviews", 5] } // First 5 reviews
}
}
]);
// $map: Transform each element in array
db.orders.aggregate([
{
$project: {
customerId: 1,
originalItems: 1,
itemNames: {
$map: {
input: "$items",
as: "item",
in: "$$item.productName" // Extract product name from each item
}
}
}
}
]);
// $filter: Include only matching elements
db.students.aggregate([
{
$project: {
name: 1,
grades: 1,
passingGrades: {
$filter: {
input: "$grades",
as: "grade",
cond: { $gte: ["$$grade", 70] } // Only grades >= 70
}
}
}
}
]);
// $size: Count elements in array
db.posts.aggregate([
{
$project: {
title: 1,
commentCount: { $size: "$comments" }
}
}
]);
// $concatArrays: Combine arrays
db.inventory.aggregate([
{
$project: {
name: 1,
allVariants: { $concatArrays: ["$colors", "$sizes", "$materials"] }
}
}
]);
String Expressions
// $concat: Combine strings
db.users.aggregate([
{
$project: {
_id: 0,
firstName: 1,
lastName: 1,
fullName: { $concat: ["$firstName", " ", "$lastName"] }
}
}
]);
// $substr: Extract substring
db.products.aggregate([
{
$project: {
code: 1,
prefix: { $substr: ["$code", 0, 3] } // First 3 chars
}
}
]);
// $toUpper / $toLower: Case conversion
db.users.aggregate([
{
$project: {
email: { $toLower: "$email" } // Normalize to lowercase
}
}
]);
// $split: Split string by delimiter
db.logs.aggregate([
{
$project: {
log: 1,
parts: { $split: ["$log", " "] } // Split by spaces
}
}
]);
// $regexMatch: Match regex
db.users.aggregate([
{
$project: {
email: 1,
isValidEmail: {
$regexMatch: {
input: "$email",
regex: /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/
}
}
}
}
]);
Math Expressions
// $add, $subtract, $multiply, $divide
db.orders.aggregate([
{
$project: {
items: 1,
subtotal: { $sum: "$items.price" },
tax: { $multiply: [{ $sum: "$items.price" }, 0.08] },
total: {
$add: [
{ $sum: "$items.price" },
{ $multiply: [{ $sum: "$items.price" }, 0.08] }
]
}
}
}
]);
// $round, $ceil, $floor
db.products.aggregate([
{
$project: {
name: 1,
price: 1,
roundedPrice: { $round: ["$price", 2] },
ceilingPrice: { $ceil: "$price" },
floorPrice: { $floor: "$price" }
}
}
]);
// $abs: Absolute value
db.transactions.aggregate([
{
$project: {
amount: 1,
absoluteAmount: { $abs: "$amount" }
}
}
]);
// $mod: Modulo
db.numbers.aggregate([
{
$project: {
num: 1,
isEven: { $eq: [{ $mod: ["$num", 2] }, 0] }
}
}
]);
Window Functions
Window functions compute values over a range of related documents:
// $setWindowFields: Compute values within a window
// Running total of sales by date
db.sales.aggregate([
{ $sort: { date: 1 } },
{
$setWindowFields: {
partitionBy: null, // Across entire collection
sortBy: { date: 1 },
output: {
runningTotal: {
$sum: "$amount",
window: { range: ["unbounded", "current"] } // From start to current doc
},
movingAvg: {
$avg: "$amount",
window: { range: [-7, 0] } // Previous 7 + current
},
rank: { $rank: {} } // Rank documents
}
}
}
]);
// Result: { date: "2024-01-01", amount: 100, runningTotal: 100, rank: 1 }
// { date: "2024-01-02", amount: 200, runningTotal: 300, rank: 2 }
// { date: "2024-01-03", amount: 150, runningTotal: 450, rank: 3 }
// Partition by customer: running total per customer
db.orders.aggregate([
{ $sort: { customerId: 1, date: 1 } },
{
$setWindowFields: {
partitionBy: "$customerId", // Separate window per customer
sortBy: { date: 1 },
output: {
customerRunningTotal: {
$sum: "$amount",
window: { range: ["unbounded", "current"] }
}
}
}
}
]);
Complex Pipeline Examples
Customer Lifetime Value Analysis
db.orders.aggregate([
{ $match: { status: "completed" } },
{
$group: {
_id: "$customerId",
totalSpent: { $sum: "$amount" },
orderCount: { $sum: 1 },
avgOrder: { $avg: "$amount" },
firstOrder: { $min: "$date" },
lastOrder: { $max: "$date" }
}
},
{
$project: {
totalSpent: 1,
orderCount: 1,
avgOrder: { $round: ["$avgOrder", 2] },
customerAge: {
$divide: [
{ $subtract: [new Date(), "$firstOrder"] },
86400000 // Convert to days (ms per day)
]
},
value_tier: {
$switch: {
branches: [
{ case: { $gte: ["$totalSpent", 10000] }, then: "platinum" },
{ case: { $gte: ["$totalSpent", 5000] }, then: "gold" },
{ case: { $gte: ["$totalSpent", 1000] }, then: "silver" }
],
default: "bronze"
}
}
}
},
{ $sort: { totalSpent: -1 } },
{ $limit: 100 }
]);
Product Performance Dashboard
db.orders.aggregate([
{ $unwind: "$items" },
{ $match: { "items.productId": { $exists: true } } },
{
$facet: {
"by_product": [
{
$group: {
_id: "$items.productId",
productName: { $first: "$items.productName" },
unitsSold: { $sum: "$items.quantity" },
revenue: { $sum: { $multiply: ["$items.quantity", "$items.price"] } },
avgPrice: { $avg: "$items.price" },
orderCount: { $sum: 1 }
}
},
{ $sort: { revenue: -1 } }
],
"by_category": [
{
$group: {
_id: "$items.category",
totalSales: { $sum: "$items.quantity" },
totalRevenue: { $sum: { $multiply: ["$items.quantity", "$items.price"] } }
}
}
]
}
}
]);
Summary
Advanced Stages:
- $facet: Multiple aggregations in parallel
- $bucket/$bucketAuto: Group into ranges
- $redact: Conditional field inclusion
- $out/$merge: Write results to collections
Advanced Expressions:
- Conditional: $cond, $switch
- Arrays: $arrayElemAt, $slice, $map, $filter, $size
- Strings: $concat, $substr, $split, $regexMatch
- Math: $round, $abs, $mod, $divide
Window Functions:
- $setWindowFields for running totals, moving averages, ranking
- Partition windows by field or across entire collection
- Range-based windows: ["unbounded", "current"] or [-7, 0]
Best Practices:
- Use $facet to avoid multiple passes over data
- Move $match early to reduce documents processed
- Use $out for pre-computed reports
- Combine window functions with partitioning for per-group analysis