→ 정답은 없고, 스스로 처음부터 생각해보기
WITH base AS(
SELECT
DISTINCT
user_id,
user_pseudo_id,
event_name,
DATE(DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul')) AS event_date,
DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime
FROM advanced.app_logs
WHERE
event_date BETWEEN '2022-08-01' AND '2022-10-31'
), first_date_diff AS (
SELECT
*,
DATE_DIFF(event_week, fisrt_week, WEEK) AS diff_of_week
FROM (
SELECT
DISTINCT
user_pseudo_id,
DATE_TRUNC(MIN(event_date) OVER(PARTITION BY user_pseudo_id), WEEK(MONDAY)) AS fisrt_week,
DATE_TRUNC(event_date, WEEK(MONDAY)) AS event_week
FROM base
)
), user_counts AS (
SELECT
diff_of_week,
COUNT(DISTINCT user_pseudo_id) AS user_cnt
FROM first_date_diff
GROUP BY diff_of_week
ORDER BY diff_of_week
)
SELECT
*,
ROUND(SAFE_DIVIDE(user_cnt, first_week_user_cnt), 3) AS retention_rate
FROM (
SELECT
diff_of_week,
user_cnt,
FIRST_VALUE(user_cnt) OVER(ORDER BY diff_of_week) AS first_week_user_cnt
FROM user_counts
)
-- 1) 전 기간 기준 같은 유저-같은날-같은 이벤트 중복 제거
WITH base AS (
SELECT DISTINCT
user_id,
user_pseudo_id,
event_name,
DATE(DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul')) AS event_date,
DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime
FROM advanced.app_logs
),
-- 2) 유저별 서비스 최초 방문일과 그 날짜가 속한 주 정립
user_first_all AS (
SELECT
user_pseudo_id,
MIN(event_date) AS first_event_date_all,
DATE_TRUNC(MIN(event_date), WEEK(MONDAY)) AS first_week
FROM base
GROUP BY user_pseudo_id
),
-- 3) 유저X주차 단위 활동을 1번만 남기도록 설계 (주차 중복 제거)
user_week AS (
SELECT DISTINCT
b.user_pseudo_id,
DATE_TRUNC(b.event_date, WEEK(MONDAY)) AS event_week
FROM base b
),
-- 4) 유저의 각 방문 주에 대해 코호트 주로부터 경과주를 계산
first_week_and_diff AS (
SELECT
uw.user_pseudo_id,
ufa.first_week,
uw.event_week,
DATE_DIFF(uw.event_week, ufa.first_week, WEEK) AS diff_of_week
FROM user_week uw
JOIN user_first_all ufa USING (user_pseudo_id)
WHERE DATE_DIFF(uw.event_week, ufa.first_week, WEEK) >= 0 -- 코호트 주 이후만
),
-- 5) 코호트 주 x 경과 주별 신규 유저 수 (해당 diff에 활동한 신규 유저 수)
cohort_counts AS (
SELECT
first_week,
diff_of_week,
COUNT(DISTINCT user_pseudo_id) AS new_user_cnt
FROM first_week_and_diff
GROUP BY first_week, diff_of_week
)
-- 6) 최종: 코호트 크기(= diff_of_week=0) 대비 리텐션
SELECT
first_week, -- 코호트 주(월요일 날짜)
diff_of_week, -- 경과 주(0,1,2,…)
new_user_cnt, -- 해당 코호트 중 해당 경과 주에 활동한 신규 유저 수
FIRST_VALUE(new_user_cnt) OVER (
PARTITION BY first_week
ORDER BY diff_of_week
) AS cohort_user_cnt, -- 코호트 크기(= diff_of_week=0)
ROUND(
SAFE_DIVIDE(
new_user_cnt,
FIRST_VALUE(new_user_cnt) OVER (PARTITION BY first_week ORDER BY diff_of_week)
),
3
) AS new_user_retention_rate
FROM cohort_counts
ORDER BY first_week, diff_of_week;