Aggregation and Grouping
Aggregation and Grouping¶
1. Aggregate Functions¶
Aggregate functions calculate multiple rows into a single result.
| Function | Description |
|---|---|
COUNT() |
Row count |
SUM() |
Sum |
AVG() |
Average |
MIN() |
Minimum value |
MAX() |
Maximum value |
2. Practice Table Setup¶
CREATE TABLE sales (
id SERIAL PRIMARY KEY,
product VARCHAR(100),
category VARCHAR(50),
amount NUMERIC(10, 2),
quantity INTEGER,
sale_date DATE,
region VARCHAR(50)
);
INSERT INTO sales (product, category, amount, quantity, sale_date, region) VALUES
('Laptop', 'Electronics', 1500000, 2, '2024-01-05', 'Seoul'),
('Mouse', 'Electronics', 50000, 10, '2024-01-05', 'Seoul'),
('Keyboard', 'Electronics', 100000, 5, '2024-01-06', 'Busan'),
('Monitor', 'Electronics', 300000, 3, '2024-01-07', 'Seoul'),
('Desk', 'Furniture', 250000, 2, '2024-01-08', 'Daejeon'),
('Chair', 'Furniture', 150000, 4, '2024-01-08', 'Seoul'),
('Laptop', 'Electronics', 1800000, 1, '2024-01-10', 'Busan'),
('Mouse', 'Electronics', 45000, 20, '2024-01-12', 'Daejeon'),
('Desk', 'Furniture', 280000, 1, '2024-01-15', 'Seoul'),
('Chair', 'Furniture', 180000, 3, '2024-01-15', 'Busan');
3. COUNT - Counting¶
Total Row Count¶
SELECT COUNT(*) FROM sales;
-- 10
Count Specific Column (Excludes NULL)¶
SELECT COUNT(region) FROM sales;
-- Count of non-NULL region
Count Distinct¶
SELECT COUNT(DISTINCT category) FROM sales;
-- 2 (Electronics, Furniture)
SELECT COUNT(DISTINCT region) FROM sales;
-- 3 (Seoul, Busan, Daejeon)
4. SUM - Summation¶
-- Total sales amount
SELECT SUM(amount) FROM sales;
-- 4653000
-- Total quantity sold
SELECT SUM(quantity) FROM sales;
-- 51
-- Conditional sum
SELECT SUM(amount) FROM sales WHERE category = 'Electronics';
5. AVG - Average¶
-- Average sales amount
SELECT AVG(amount) FROM sales;
-- 465300
-- Handle decimals
SELECT ROUND(AVG(amount), 2) AS avg_amount FROM sales;
-- Conditional average
SELECT ROUND(AVG(amount), 2)
FROM sales
WHERE region = 'Seoul';
6. MIN / MAX - Minimum/Maximum¶
-- Minimum sales amount
SELECT MIN(amount) FROM sales;
-- 45000
-- Maximum sales amount
SELECT MAX(amount) FROM sales;
-- 1800000
-- Most recent sale date
SELECT MAX(sale_date) FROM sales;
-- Oldest sale date
SELECT MIN(sale_date) FROM sales;
7. Using Multiple Aggregate Functions Together¶
SELECT
COUNT(*) AS total_count,
SUM(amount) AS total_sales,
ROUND(AVG(amount), 2) AS avg_sales,
MIN(amount) AS min_sales,
MAX(amount) AS max_sales,
SUM(quantity) AS total_quantity
FROM sales;
8. GROUP BY - Grouping¶
Groups data by specific columns for aggregation.
Basic GROUP BY¶
-- Sales by category
SELECT
category,
COUNT(*) AS count,
SUM(amount) AS total_amount
FROM sales
GROUP BY category;
Result:
category │ count │ total_amount
─────────────┼───────┼──────────────
Electronics │ 6 │ 3795000
Furniture │ 4 │ 858000
Sales by Region¶
SELECT
region,
COUNT(*) AS sales_count,
SUM(amount) AS total_amount,
ROUND(AVG(amount), 2) AS avg_amount
FROM sales
GROUP BY region
ORDER BY total_amount DESC;
Sales by Product¶
SELECT
product,
SUM(quantity) AS total_qty,
SUM(amount) AS total_sales
FROM sales
GROUP BY product
ORDER BY total_sales DESC;
9. Multi-Column GROUP BY¶
-- Sales by category + region
SELECT
category,
region,
COUNT(*) AS count,
SUM(amount) AS total
FROM sales
GROUP BY category, region
ORDER BY category, region;
Result:
category │ region │ count │ total
─────────────┼────────┼───────┼─────────
Furniture │ Daejeon│ 1 │ 250000
Furniture │ Busan │ 1 │ 180000
Furniture │ Seoul │ 2 │ 430000
Electronics │ Daejeon│ 1 │ 45000
Electronics │ Busan │ 2 │ 1900000
Electronics │ Seoul │ 3 │ 1850000
10. HAVING - Group Filtering¶
WHERE filters before grouping, HAVING filters after grouping.
-- Only categories with total sales >= 500,000
SELECT
category,
SUM(amount) AS total_amount
FROM sales
GROUP BY category
HAVING SUM(amount) >= 500000;
WHERE + HAVING¶
-- Products with sales >= 1,000,000 in Seoul and Busan regions
SELECT
product,
SUM(amount) AS total_amount
FROM sales
WHERE region IN ('Seoul', 'Busan') -- Filter before grouping
GROUP BY product
HAVING SUM(amount) >= 1000000 -- Filter after grouping
ORDER BY total_amount DESC;
Using Aliases in HAVING (PostgreSQL)¶
-- PostgreSQL allows aliases in HAVING
SELECT
product,
SUM(amount) AS total
FROM sales
GROUP BY product
HAVING SUM(amount) > 500000; -- Standard way
-- Or (works in some PostgreSQL versions)
-- HAVING total > 500000; -- Works only in some versions
11. GROUP BY + JOIN¶
-- Setup: Categories table
CREATE TABLE categories (
id SERIAL PRIMARY KEY,
name VARCHAR(50),
description TEXT
);
INSERT INTO categories (name, description) VALUES
('Electronics', 'Electronic products'),
('Furniture', 'Furniture products');
-- Aggregate with category information
SELECT
c.name AS category,
c.description,
COUNT(s.id) AS sales_count,
SUM(s.amount) AS total_sales
FROM categories c
LEFT JOIN sales s ON c.name = s.category
GROUP BY c.id, c.name, c.description;
12. Date-Based Aggregation¶
Daily Sales¶
SELECT
sale_date,
COUNT(*) AS count,
SUM(amount) AS daily_total
FROM sales
GROUP BY sale_date
ORDER BY sale_date;
Monthly Sales¶
SELECT
DATE_TRUNC('month', sale_date) AS month,
COUNT(*) AS count,
SUM(amount) AS monthly_total
FROM sales
GROUP BY DATE_TRUNC('month', sale_date)
ORDER BY month;
Yearly Sales¶
SELECT
EXTRACT(YEAR FROM sale_date) AS year,
SUM(amount) AS yearly_total
FROM sales
GROUP BY EXTRACT(YEAR FROM sale_date);
13. Conditional Aggregation¶
CASE + SUM¶
SELECT
SUM(CASE WHEN category = 'Electronics' THEN amount ELSE 0 END) AS electronics,
SUM(CASE WHEN category = 'Furniture' THEN amount ELSE 0 END) AS furniture
FROM sales;
FILTER (PostgreSQL 9.4+)¶
SELECT
COUNT(*) FILTER (WHERE category = 'Electronics') AS electronics_count,
COUNT(*) FILTER (WHERE category = 'Furniture') AS furniture_count,
SUM(amount) FILTER (WHERE region = 'Seoul') AS seoul_sales
FROM sales;
14. ROLLUP and CUBE¶
ROLLUP - Add Subtotals¶
SELECT
category,
region,
SUM(amount) AS total
FROM sales
GROUP BY ROLLUP (category, region)
ORDER BY category NULLS LAST, region NULLS LAST;
Result:
category │ region │ total
─────────────┼────────┼──────────
Furniture │ Daejeon│ 250000
Furniture │ Busan │ 180000
Furniture │ Seoul │ 430000
Furniture │ NULL │ 860000 ← Furniture subtotal
Electronics │ Daejeon│ 45000
Electronics │ Busan │ 1900000
Electronics │ Seoul │ 1850000
Electronics │ NULL │ 3795000 ← Electronics subtotal
NULL │ NULL │ 4655000 ← Grand total
CUBE - All Combination Subtotals¶
SELECT
category,
region,
SUM(amount) AS total
FROM sales
GROUP BY CUBE (category, region)
ORDER BY category NULLS LAST, region NULLS LAST;
GROUPING - Distinguish NULL¶
SELECT
CASE WHEN GROUPING(category) = 1 THEN 'All' ELSE category END AS category,
CASE WHEN GROUPING(region) = 1 THEN 'All' ELSE region END AS region,
SUM(amount) AS total
FROM sales
GROUP BY ROLLUP (category, region);
15. Practice Examples¶
Practice 1: Basic Aggregation¶
-- 1. Overall sales statistics
SELECT
COUNT(*) AS total_sales,
SUM(amount) AS total_revenue,
ROUND(AVG(amount), 0) AS avg_revenue,
MIN(amount) AS min_revenue,
MAX(amount) AS max_revenue
FROM sales;
-- 2. Sales statistics by category
SELECT
category,
COUNT(*) AS sales_count,
SUM(quantity) AS total_quantity,
SUM(amount) AS total_revenue,
ROUND(AVG(amount), 0) AS avg_revenue
FROM sales
GROUP BY category
ORDER BY total_revenue DESC;
Practice 2: Complex Conditions¶
-- 1. Sales by region (500,000+ only)
SELECT
region,
SUM(amount) AS total
FROM sales
GROUP BY region
HAVING SUM(amount) >= 500000
ORDER BY total DESC;
-- 2. Product quantity ranking
SELECT
product,
SUM(quantity) AS total_qty
FROM sales
GROUP BY product
ORDER BY total_qty DESC
LIMIT 5;
Practice 3: Date Aggregation¶
-- 1. Daily sales trend
SELECT
sale_date,
SUM(amount) AS daily_sales,
SUM(SUM(amount)) OVER (ORDER BY sale_date) AS cumulative_sales
FROM sales
GROUP BY sale_date
ORDER BY sale_date;
-- 2. Average daily sales for last 7 days
SELECT
ROUND(AVG(daily_total), 2) AS avg_daily_sales
FROM (
SELECT sale_date, SUM(amount) AS daily_total
FROM sales
WHERE sale_date >= CURRENT_DATE - INTERVAL '7 days'
GROUP BY sale_date
) daily;
Practice 4: Crosstab (Pivot)¶
-- Category × Region sales crosstab
SELECT
category,
SUM(amount) FILTER (WHERE region = 'Seoul') AS seoul,
SUM(amount) FILTER (WHERE region = 'Busan') AS busan,
SUM(amount) FILTER (WHERE region = 'Daejeon') AS daejeon,
SUM(amount) AS total
FROM sales
GROUP BY category;
Result:
category │ seoul │ busan │ daejeon │ total
─────────────┼─────────┼─────────┼─────────┼──────────
Furniture │ 430000 │ 180000 │ 250000 │ 860000
Electronics │ 1850000 │ 1900000 │ 45000 │ 3795000
16. Query Execution Order¶
FROM / JOIN ← Specify tables
↓
WHERE ← Filter rows
↓
GROUP BY ← Group
↓
HAVING ← Filter groups
↓
SELECT ← Select columns
↓
DISTINCT ← Remove duplicates
↓
ORDER BY ← Sort
↓
LIMIT/OFFSET ← Limit results
Next Steps¶
Learn about subqueries and WITH clauses in 08_Subqueries_and_CTE.md!