서브쿼리와 CTE

서브쿼리와 CTE

1. 서브쿼리란?

서브쿼리(Subquery)는 쿼리 안에 포함된 또 다른 쿼리입니다.

SELECT * FROM users
WHERE id IN (SELECT user_id FROM orders);  -- 서브쿼리
          
       괄호 안의 쿼리

2. WHERE 절 서브쿼리

스칼라 서브쿼리 (단일 값)

-- 평균 가격보다 비싼 상품
SELECT * FROM products
WHERE price > (SELECT AVG(price) FROM products);

-- 최신 주문 날짜의 주문들
SELECT * FROM orders
WHERE order_date = (SELECT MAX(order_date) FROM orders);

다중 행 서브쿼리

-- 주문한 적 있는 사용자
SELECT * FROM users
WHERE id IN (SELECT DISTINCT user_id FROM orders);

-- 전자기기를 구매한 사용자
SELECT * FROM users
WHERE id IN (
    SELECT o.user_id FROM orders o
    JOIN order_items oi ON o.id = oi.order_id
    JOIN products p ON oi.product_id = p.id
    WHERE p.category = '전자기기'
);

NOT IN

-- 주문한 적 없는 사용자
SELECT * FROM users
WHERE id NOT IN (
    SELECT user_id FROM orders WHERE user_id IS NOT NULL
);
-- 주의: NOT IN에서 NULL이 있으면 결과가 비어버릴 수 있음

ANY / SOME

-- 어떤 전자기기보다 비싼 가구
SELECT * FROM products
WHERE category = '가구'
  AND price > ANY (SELECT price FROM products WHERE category = '전자기기');
-- = ANY 는 IN과 동일

ALL

-- 모든 전자기기보다 비싼 상품
SELECT * FROM products
WHERE price > ALL (SELECT price FROM products WHERE category = '전자기기');

3. EXISTS / NOT EXISTS

행의 존재 여부만 확인합니다.

-- 주문이 있는 사용자
SELECT * FROM users u
WHERE EXISTS (
    SELECT 1 FROM orders o WHERE o.user_id = u.id
);

-- 주문이 없는 사용자
SELECT * FROM users u
WHERE NOT EXISTS (
    SELECT 1 FROM orders o WHERE o.user_id = u.id
);

IN vs EXISTS

-- IN: 서브쿼리 결과를 메모리에 로드
SELECT * FROM users
WHERE id IN (SELECT user_id FROM orders);

-- EXISTS: 매 행마다 존재 여부 확인
SELECT * FROM users u
WHERE EXISTS (SELECT 1 FROM orders o WHERE o.user_id = u.id);

-- 일반적으로:
-- - 서브쿼리 결과가 작으면 IN
-- - 서브쿼리 결과가 크면 EXISTS
-- - NOT IN 대신 NOT EXISTS 권장 (NULL 문제 방지)

4. FROM 절 서브쿼리 (인라인 뷰)

-- 카테고리별 평균 가격 계산 후 필터링
SELECT *
FROM (
    SELECT category, AVG(price) AS avg_price
    FROM products
    GROUP BY category
) AS category_avg
WHERE avg_price > 100000;

-- 서브쿼리에 별칭 필수 (AS category_avg)

복잡한 집계 후 JOIN

-- 사용자별 주문 통계와 사용자 정보 결합
SELECT
    u.name,
    u.email,
    stats.order_count,
    stats.total_amount
FROM users u
JOIN (
    SELECT
        user_id,
        COUNT(*) AS order_count,
        SUM(amount) AS total_amount
    FROM orders
    GROUP BY user_id
) AS stats ON u.id = stats.user_id;

5. SELECT 절 서브쿼리 (스칼라 서브쿼리)

-- 각 상품과 함께 카테고리 평균 가격 표시
SELECT
    name,
    price,
    (SELECT AVG(price) FROM products p2 WHERE p2.category = p.category) AS category_avg
FROM products p;

-- 각 사용자의 주문 수
SELECT
    u.name,
    (SELECT COUNT(*) FROM orders o WHERE o.user_id = u.id) AS order_count
FROM users u;

6. 상관 서브쿼리

외부 쿼리의 값을 참조하는 서브쿼리입니다.

-- 자신의 카테고리 평균보다 비싼 상품
SELECT * FROM products p
WHERE price > (
    SELECT AVG(price) FROM products WHERE category = p.category
);
--                                                    ↑ 외부 쿼리 참조

-- 각 카테고리에서 가장 비싼 상품
SELECT * FROM products p
WHERE price = (
    SELECT MAX(price) FROM products WHERE category = p.category
);

7. CTE (Common Table Expression)

WITH 절을 사용하여 임시 결과 집합에 이름을 붙입니다.

기본 CTE

-- 서브쿼리 방식
SELECT * FROM (
    SELECT category, AVG(price) AS avg_price
    FROM products
    GROUP BY category
) AS category_stats
WHERE avg_price > 100000;

-- CTE 방식 (더 읽기 쉬움)
WITH category_stats AS (
    SELECT category, AVG(price) AS avg_price
    FROM products
    GROUP BY category
)
SELECT * FROM category_stats
WHERE avg_price > 100000;

여러 CTE 사용

WITH
-- 카테고리별 통계
category_stats AS (
    SELECT
        category,
        COUNT(*) AS product_count,
        AVG(price) AS avg_price
    FROM products
    GROUP BY category
),
-- 고가 상품 (100만원 이상)
expensive_products AS (
    SELECT * FROM products WHERE price >= 1000000
)
SELECT
    cs.category,
    cs.product_count,
    cs.avg_price,
    COUNT(ep.id) AS expensive_count
FROM category_stats cs
LEFT JOIN expensive_products ep ON cs.category = ep.category
GROUP BY cs.category, cs.product_count, cs.avg_price;

CTE와 메인 쿼리 결합

WITH monthly_sales AS (
    SELECT
        DATE_TRUNC('month', order_date) AS month,
        SUM(amount) AS total
    FROM orders
    GROUP BY DATE_TRUNC('month', order_date)
)
SELECT
    month,
    total,
    LAG(total) OVER (ORDER BY month) AS prev_month,
    total - LAG(total) OVER (ORDER BY month) AS diff
FROM monthly_sales
ORDER BY month;

8. 재귀 CTE (WITH RECURSIVE)

자기 자신을 참조하는 CTE입니다.

조직도 탐색

-- 직원 테이블
CREATE TABLE employees (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    manager_id INTEGER REFERENCES employees(id)
);

INSERT INTO employees (name, manager_id) VALUES
('CEO', NULL),
('CTO', 1),
('개발팀장', 2),
('개발자A', 3),
('개발자B', 3),
('CFO', 1),
('재무팀장', 6);

-- CEO부터 모든 부하 직원 조회
WITH RECURSIVE org_tree AS (
    -- 기본 케이스: CEO
    SELECT id, name, manager_id, 1 AS level, name::TEXT AS path
    FROM employees
    WHERE manager_id IS NULL

    UNION ALL

    -- 재귀 케이스: 부하 직원들
    SELECT
        e.id,
        e.name,
        e.manager_id,
        ot.level + 1,
        ot.path || ' > ' || e.name
    FROM employees e
    JOIN org_tree ot ON e.manager_id = ot.id
)
SELECT
    REPEAT('  ', level - 1) || name AS org_chart,
    level,
    path
FROM org_tree
ORDER BY path;

결과:

    org_chart    │ level │           path
─────────────────┼───────┼──────────────────────────
 CEO             │     1 │ CEO
   CFO           │     2 │ CEO > CFO
     재무팀장    │     3 │ CEO > CFO > 재무팀장
   CTO           │     2 │ CEO > CTO
     개발팀장    │     3 │ CEO > CTO > 개발팀장
       개발자A   │     4 │ CEO > CTO > 개발팀장 > 개발자A
       개발자B   │     4 │ CEO > CTO > 개발팀장 > 개발자B

숫자 시퀀스 생성

-- 1부터 10까지
WITH RECURSIVE numbers AS (
    SELECT 1 AS n
    UNION ALL
    SELECT n + 1 FROM numbers WHERE n < 10
)
SELECT * FROM numbers;

날짜 범위 생성

-- 최근 7일
WITH RECURSIVE date_range AS (
    SELECT CURRENT_DATE - INTERVAL '6 days' AS date
    UNION ALL
    SELECT date + INTERVAL '1 day'
    FROM date_range
    WHERE date < CURRENT_DATE
)
SELECT date::DATE FROM date_range;

9. 실습 예제

샘플 데이터

-- 테이블 생성
CREATE TABLE departments (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100)
);

CREATE TABLE employees (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    department_id INTEGER REFERENCES departments(id),
    salary NUMERIC(10, 2),
    hire_date DATE
);

-- 데이터 삽입
INSERT INTO departments (name) VALUES
('개발'), ('마케팅'), ('인사'), ('재무');

INSERT INTO employees (name, department_id, salary, hire_date) VALUES
('김개발', 1, 5000000, '2020-03-15'),
('이개발', 1, 4500000, '2021-06-20'),
('박마케팅', 2, 4000000, '2019-11-10'),
('최마케팅', 2, 3800000, '2022-01-05'),
('정인사', 3, 3500000, '2020-08-25'),
('한재무', 4, 4200000, '2021-03-10'),
('오재무', 4, 3900000, '2022-07-15');

실습 1: WHERE 서브쿼리

-- 1. 전체 평균 급여보다 높은 직원
SELECT name, salary
FROM employees
WHERE salary > (SELECT AVG(salary) FROM employees);

-- 2. 가장 최근 입사한 직원
SELECT * FROM employees
WHERE hire_date = (SELECT MAX(hire_date) FROM employees);

-- 3. 개발 또는 마케팅 부서 직원
SELECT * FROM employees
WHERE department_id IN (
    SELECT id FROM departments WHERE name IN ('개발', '마케팅')
);

실습 2: 상관 서브쿼리

-- 1. 자기 부서 평균보다 급여가 높은 직원
SELECT
    e.name,
    e.salary,
    d.name AS department
FROM employees e
JOIN departments d ON e.department_id = d.id
WHERE e.salary > (
    SELECT AVG(salary)
    FROM employees
    WHERE department_id = e.department_id
);

-- 2. 각 부서에서 급여가 가장 높은 직원
SELECT * FROM employees e
WHERE salary = (
    SELECT MAX(salary)
    FROM employees
    WHERE department_id = e.department_id
);

실습 3: CTE 활용

-- 1. 부서별 통계와 함께 직원 정보 조회
WITH dept_stats AS (
    SELECT
        department_id,
        AVG(salary) AS avg_salary,
        COUNT(*) AS emp_count
    FROM employees
    GROUP BY department_id
)
SELECT
    e.name,
    e.salary,
    d.name AS department,
    ds.avg_salary AS dept_avg,
    ds.emp_count AS dept_count
FROM employees e
JOIN departments d ON e.department_id = d.id
JOIN dept_stats ds ON e.department_id = ds.department_id;

-- 2. 급여 순위와 함께 조회
WITH ranked_employees AS (
    SELECT
        *,
        RANK() OVER (ORDER BY salary DESC) AS salary_rank,
        RANK() OVER (PARTITION BY department_id ORDER BY salary DESC) AS dept_rank
    FROM employees
)
SELECT
    name,
    salary,
    salary_rank AS 전체순위,
    dept_rank AS 부서내순위
FROM ranked_employees
ORDER BY salary_rank;

실습 4: 복합 활용

-- 각 부서에서 평균 이상 급여를 받는 직원과 그 차이
WITH
dept_avg AS (
    SELECT department_id, AVG(salary) AS avg_salary
    FROM employees
    GROUP BY department_id
),
above_avg AS (
    SELECT
        e.*,
        da.avg_salary,
        e.salary - da.avg_salary AS diff
    FROM employees e
    JOIN dept_avg da ON e.department_id = da.department_id
    WHERE e.salary >= da.avg_salary
)
SELECT
    aa.name,
    d.name AS department,
    aa.salary,
    ROUND(aa.avg_salary, 0) AS dept_avg,
    ROUND(aa.diff, 0) AS above_avg_by
FROM above_avg aa
JOIN departments d ON aa.department_id = d.id
ORDER BY aa.diff DESC;

10. 서브쿼리 vs CTE vs JOIN

상황 권장
단순 값 비교 서브쿼리
여러 번 참조 CTE
테이블 연결 JOIN
복잡한 로직 분리 CTE
재귀 탐색 WITH RECURSIVE

다음 단계

09_Views_and_Indexes.md에서 VIEW와 INDEX를 배워봅시다!

to navigate between lessons