Indexes & Aggregation
This module covers two critical topics for MongoDB performance and analytics: building effective indexes to speed up queries, and using the aggregation pipeline for complex data transformations.
Indexes
An index is a sorted data structure that allows MongoDB to quickly locate documents without scanning the entire collection.
Index Types
Single-Field Index
// Create ascending index on price
db.products.createIndex({ price: 1 });
// Create descending index
db.products.createIndex({ createdAt: -1 });
// 1 = ascending (A→Z, 0→∞)
// -1 = descending (Z→A, ∞→0)
// For single-field equality, direction doesn't matter much for performance
// But direction matters for sorting and range queries
// This index speeds up:
db.products.find({ price: 100 });
db.products.find({ price: { $gt: 50 } });
db.products.find({}).sort({ price: 1 });
Compound Index
// Index on multiple fields — field ORDER matters!
db.products.createIndex({ category: 1, price: -1 });
// ESR Rule: Equality, Sort, Range
// Best for: WHERE category = X AND ORDER BY price DESC AND price > Y
// Supports queries on:
// ✅ (category)
// ✅ (category, price)
// ❌ (price) alone — can't skip the first field
// BAD QUERY for this index (won't use it efficiently):
db.products.find({ price: { $gt: 50 } }); // doesn't use category index
// GOOD QUERIES:
db.products.find({ category: "electronics" });
db.products.find({ category: "electronics", price: { $gt: 50 } });
db.products.find({ category: "electronics", price: { $gt: 50 } }).sort({ price: -1 });
Unique Index
// Prevent duplicate values — e.g., usernames, emails must be unique
db.users.createIndex({ email: 1 }, { unique: true });
db.users.createIndex({ username: 1 }, { unique: true });
// Insert fails if email already exists
db.users.insertOne({ email: "alice@example.com", name: "Alice" });
db.users.insertOne({ email: "alice@example.com", name: "Alice2" }); // Error: duplicate key
// Unique indexes on fields with nulls:
// MongoDB allows multiple null values by default
// To restrict, use { unique: true, sparse: true }
Sparse Index
// Index only documents that have the field (skip docs where field is missing/null)
db.users.createIndex({ phone: 1 }, { sparse: true });
// Useful when:
// - Many documents don't have a field (optional field)
// - Want to optimize index size
// - Querying optional fields
// This index is smaller because it skips ~50% of users without phone
db.users.find({ phone: { $exists: true } });
Multikey Index
// MongoDB automatically creates a multikey index when indexing an array field
// Each array element gets indexed individually
db.users.createIndex({ tags: 1 }); // Automatically multikey
// Documents:
db.users.insertMany([
{ name: "Alice", tags: ["java", "python", "javascript"] },
{ name: "Bob", tags: ["go", "rust"] },
{ name: "Charlie", tags: [] }
]);
// Index enables fast queries on any array element
db.users.find({ tags: "javascript" }); // Fast — uses index
// Caveat: Cannot have TWO array fields in same compound index
db.collection.createIndex({ tags: 1, hobbies: 1 }); // ERROR if both are arrays
Text Index
// Full-text search on string fields
db.products.createIndex({ name: "text", description: "text" });
// Note: Only ONE text index per collection
// Query with $text operator
db.products.find({
$text: { $search: "keyboard mechanical" } // finds docs with these words
});
// Return relevance score
db.products.find(
{ $text: { $search: "keyboard" } },
{ score: { $meta: "textScore" } }
).sort({ score: { $meta: "textScore" } });
// Text index features:
// - Case-insensitive
// - Stemming (keyboard, keyboards → same root)
// - Stop words (the, a, is) ignored by default
// - OR by default — matches any word
// - Phrase search: "mechanical keyboard" (exact phrase)
// - Negation: -wireless (exclude)
TTL Index
// Automatically delete documents after expiry (covered in detail in TTL module)
db.sessions.createIndex(
{ createdAt: 1 },
{ expireAfterSeconds: 3600 } // Delete after 1 hour
);
// Any document with createdAt date > 1 hour ago is deleted automatically
Viewing & Managing Indexes
// List all indexes on a collection
db.products.getIndexes();
// Output: [ { key: { _id: 1 }, name: "_id_" }, { key: { price: 1 }, name: "price_1" }, ... ]
// Get detailed index info
db.products.getIndexSpecs();
// Drop a specific index
db.products.dropIndex("price_1"); // By name
db.products.dropIndex({ price: 1 }); // By key spec
// Drop all indexes except _id (which can't be dropped)
db.products.dropIndexes();
// Rename an index
db.collection.dropIndex("oldIndexName");
db.collection.createIndex({ field: 1 }, { name: "newIndexName" });
Query Explanation & Analysis
// Explain how MongoDB executes a query
let explanation = db.products.find({ price: { $gt: 50 }, category: "electronics" })
.explain("executionStats");
// Key fields to check:
console.log(explanation.executionStats.executionStages.stage);
// COLLSCAN = full collection scan (bad, need index)
// IXSCAN = index scan (good)
// FETCH = retrieve full document (after index found it)
console.log(explanation.executionStats.executionStages.nReturned);
// Documents returned
console.log(explanation.executionStats.executionStages.totalDocsExamined);
// Documents scanned
// RULE: totalDocsExamined should be close to nReturned
// If totalDocsExamined >> nReturned, you need a better index
// Example: If nReturned = 10 but totalDocsExamined = 100,000
// The query is scanning 100,000 docs to return 10 → consider indexing
Index Best Practices
// 1. Create indexes for frequent queries
db.orders.createIndex({ userId: 1 }); // for find({ userId: X })
db.orders.createIndex({ status: 1 }); // for find({ status: X })
// 2. Use compound indexes for multi-field filters
db.orders.createIndex({ userId: 1, status: 1 }); // for find({ userId: X, status: Y })
// 3. Order fields in compound index by ESR rule: Equality, Sort, Range
db.orders.createIndex({ userId: 1, createdAt: -1, amount: 1 });
// WHERE userId = X ORDER BY createdAt DESC AND amount > Y
// 4. Don't over-index — every index slows down inserts/updates
// Keep indexes to high-return queries
// 5. Monitor index usage
db.collection.aggregate([
{ $indexStats: {} }
]);
// Shows which indexes are used and how often
// 6. Prefer ascending for sort, but in compound indexes, put sort field in middle
db.collection.createIndex({
status: 1, // Equality
createdAt: -1, // Sort
userId: 1 // Range
});
Aggregation Pipeline
The aggregation pipeline is a powerful framework for transforming and analyzing data within MongoDB, similar to SQL's GROUP BY, JOIN, and window functions combined.
Pipeline Stages
The aggregation pipeline processes documents through stages, each modifying the result for the next stage.
Input → [$match] → [$project] → [$group] → [$sort] → [$limit] → Output
$match
Filter documents — like SQL's WHERE clause.
db.orders.aggregate([
{ $match: { status: "completed", amount: { $gt: 100 } } }
]);
// Returns only orders with status = "completed" AND amount > 100
// Should usually come first to reduce documents early
$project
Reshape documents — include/exclude fields, compute new fields.
db.orders.aggregate([
{
$project: {
_id: 1, // include
customerName: 1, // include
amount: 1,
status: 0, // exclude
profit: { $subtract: ["$amount", "$cost"] } // computed field
}
}
]);
// Result: { _id: X, customerName: "Alice", amount: 100, profit: 25 }
// Note: Use $ prefix to reference field values
$group
Group documents and compute aggregates — like SQL's GROUP BY.
db.orders.aggregate([
{
$group: {
_id: "$status", // Group by status field
count: { $sum: 1 }, // Count documents
totalAmount: { $sum: "$amount" }, // Sum amounts
avgAmount: { $avg: "$amount" },
maxAmount: { $max: "$amount" },
minAmount: { $min: "$amount" }
}
}
]);
// Result:
// { _id: "completed", count: 150, totalAmount: 15000, avgAmount: 100, ... }
// { _id: "pending", count: 50, totalAmount: 3000, avgAmount: 60, ... }
// { _id: "cancelled", count: 10, totalAmount: 500, avgAmount: 50, ... }
Common $group accumulators:
| Operator | Description |
|---|---|
$sum |
Sum of values or count of documents ($sum: 1) |
$avg |
Average of values |
$min |
Minimum value |
$max |
Maximum value |
$first |
First value (usually with $sort) |
$last |
Last value (usually with $sort) |
$push |
Collect values into array |
$addToSet |
Collect unique values into array |
$count |
Count of documents in group |
$sort
Sort documents by field(s).
db.orders.aggregate([
{ $match: { status: "completed" } },
{ $sort: { amount: -1 } } // -1 = descending (high to low)
]);
// For compound sort:
db.orders.aggregate([
{ $sort: { status: 1, amount: -1 } } // By status ASC, then amount DESC
]);
$limit & $skip
db.orders.aggregate([
{ $match: { status: "completed" } },
{ $sort: { amount: -1 } },
{ $skip: 10 }, // Skip first 10
{ $limit: 5 } // Return next 5
]);
// For pagination: skip = (page - 1) * pageSize, limit = pageSize
$lookup
Join with another collection — like SQL's JOIN.
// Orders with customer details (many-to-one)
db.orders.aggregate([
{
$lookup: {
from: "customers", // Collection to join
localField: "customerId", // Field in orders
foreignField: "_id", // Field in customers
as: "customer" // Output array field
}
}
]);
// Result: { _id: X, customerId: Y, amount: 100, customer: [{ _id: Y, name: "Alice", ... }] }
// Note: customer is always an array (even if 1 document)
$unwind
Deconstruct arrays — create separate document for each array element.
// Order with items array:
// { _id: 1, customerId: 1, items: [{ product: "A", qty: 2 }, { product: "B", qty: 1 }] }
db.orders.aggregate([
{ $unwind: "$items" }
]);
// Result:
// { _id: 1, customerId: 1, items: { product: "A", qty: 2 } }
// { _id: 1, customerId: 1, items: { product: "B", qty: 1 } }
// Now you can group by product:
db.orders.aggregate([
{ $unwind: "$items" },
{ $group: { _id: "$items.product", totalQty: { $sum: "$items.qty" } } }
]);
Aggregation Examples
Example 1: Sales by Category
db.orders.aggregate([
{ $match: { status: "completed" } },
{ $unwind: "$items" },
{
$group: {
_id: "$items.category",
totalSales: { $sum: { $multiply: ["$items.qty", "$items.price"] } },
itemCount: { $sum: "$items.qty" },
orderCount: { $sum: 1 }
}
},
{ $sort: { totalSales: -1 } }
]);
Example 2: Customer Spending Analysis
db.orders.aggregate([
{ $match: { status: "completed" } },
{
$group: {
_id: "$customerId",
totalSpent: { $sum: "$amount" },
orderCount: { $sum: 1 },
avgOrder: { $avg: "$amount" },
firstOrder: { $min: "$createdAt" },
lastOrder: { $max: "$createdAt" }
}
},
{
$lookup: {
from: "customers",
localField: "_id",
foreignField: "_id",
as: "customer"
}
},
{ $unwind: "$customer" },
{
$project: {
_id: 0,
customerName: "$customer.name",
totalSpent: 1,
orderCount: 1,
avgOrder: { $round: ["$avgOrder", 2] }
}
},
{ $sort: { totalSpent: -1 } },
{ $limit: 10 }
]);
Summary
Indexes: - Use single-field indexes for frequent equality/range queries - Use compound indexes with ESR rule: Equality, Sort, Range - Unique indexes prevent duplicates; sparse indexes skip null/missing values - Text indexes enable full-text search - Always explain() queries to verify index usage
Aggregation: - Pipeline stages transform data: $match → $group → $sort → $project - Group with accumulators ($sum, $avg, $min, $max, $push) - $lookup for joins, $unwind to expand arrays - Combine with $sort and $limit for top-N queries - Aggregation is powerful for analytics, but can be slower than indexed finds