πŸ”Έ def summary_stats β†’ μˆ˜μΉ˜ν˜• λ°μ΄ν„°μ˜ λŒ€ν‘œκ°’ μΆ”μΆœν•˜λŠ” ν•¨μˆ˜ ⭐⭐⭐

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# μ‹œν€€μŠ€(리슀트, λ¬Έμžμ—΄ λ“±) μ•ˆμ—μ„œ 각 μ›μ†Œκ°€ λͺ‡ 번 λ“±μž₯ν–ˆλŠ”μ§€ μ„Έμ£ΌλŠ” λΉˆλ„ 계산 도ꡬ -> μ΅œλΉˆκ°’ 계산할 λ•Œ μ‚¬μš©
from collections import Counter

# numpy의 λžœλ€κ°’μ„ λ§Œλ“œλŠ” ν•¨μˆ˜λ“€ seedλ₯Ό μ •ν•΄μ£ΌλŠ” 것 (μž¬ν˜„μ„±μ„ λ³΄μ‘΄ν•˜κΈ° μœ„ν•΄μ„œ)
# 42λŠ” κ΄€λ‘€μ μœΌλ‘œ λ§Žμ΄μ“°λŠ” 숫자 -> μ†Œμ„€μ±… (곡상과학) μ€ν•˜μˆ˜λ₯Ό μ—¬ν–‰ν•˜λŠ” νžˆμΉ˜ν•˜μ΄μ»€λ₯Ό μœ„ν•œ μ•ˆλ‚΄μ„œ -> 우주λ₯Ό μ „μ²΄μ˜ 진리λ₯Ό μ„€λͺ…ν•˜λŠ” 닡이 42
np.random.seed(42)

# μˆ˜μΉ˜ν˜• λ°μ΄ν„°μ˜ λŒ€ν‘œκ°’ μΆ”μΆœν•˜λŠ” ν•¨μˆ˜
def summary_stats(series, is_countinuous=True, bins=50):
    mean = float(series.mean())
    median = float(series.median())
    std = float(series.std(ddof=1))     # ddof : ν‘œλ³Έν‘œμ€€νŽΈμ°¨ κ³„μ‚°μ˜ λΆ„λͺ¨κ°€ λ˜λŠ” μžμœ λ„λ₯Ό μ§€μ •.

    # is_continuous=True인 경우(μ—°μ†ν˜• 데이터) νžˆμŠ€ν† κ·Έλž¨ 생성해 λΉˆλ„κ°€ κ°€μž₯ 높은 κ΅¬κ°„μ˜ 쀑앙값을 μ΅œλΉˆκ°’ μΆ”μ •μΉ˜λ‘œ μ‚¬μš©
    # μˆ«μžμ€‘μ—μ„œ μ‹€μˆ˜κ°’μ„ κ°€μ§„ μˆ«μžλ“€ (μ†Œμˆ˜μ  λ‚˜μ˜¬ 수 μžˆλŠ” μˆ«μžλ“€) -> μ΅œλΉˆκ°’μ„ λ°”λ‘œ μ“°κΈ°κ°€ 어렀움
    if is_countinuous:
        counts, edges = np.histogram(series, bins=bins)
        idx = int(np.argmax(counts))
        mode_est = float((edges[idx] + edges[idx+1]) / 2.0)
    # is_continuous=False인 경우(μ΄μ‚°ν˜• 데이터) Counter 객체 μ‚¬μš©ν•΄ κ°€μž₯ λΉˆλ²ˆν•œ κ°’ 찾음
    # μ΄μ‚°ν˜• 데이터듀 (μ •μˆ˜ν˜•) ex) 점수, λ“±κΈ‰
    else:
        c = Counter(series.tolist())
        mode_est = float(Counter(series.tolist().most_common(1)[0][0]))
    return {"mean": mean, "median": median, "mode": mode_est, "std": std}

# κ·Έλž˜ν”„ 그리고 그림을 μ €μž₯ν•˜κ³  싢을 λ•Œ μ‚¬μš©
def savefig(name):
    path = f"{name}.png"
    plt.tight_layout()
    plt.savefig(path, dpi=180, bbox_inches="tight")
    plt.show()
    print(f"Saved figure to {path}")

# 금육 μ‚¬μš©ν•˜κ²Œ λ˜λŠ” (수읡λ₯  계산) -> <https://m.blog.naver.com/suyou111/222301932504>
def annualized_stats(monthly_returns):
    monthly_geom = np.prod(1 + monthly_returns) ** (1 / len(monthly_returns)) - 1
    ann_return = (1 + monthly_geom) ** 12 - 1
    ann_std = np.std(monthly_returns, ddof=1) * np.sqrt(12)
    return float(ann_return), float(ann_std)

πŸ”Ž heavy tail(긴꼬리) 뢄포λ₯Ό κ°€μ§„ 맀좜 λ³΄μ—¬μ£ΌλŠ” μ‹œκ°ν™”

### 데이터 μž„μ˜ 생성 ꡬ간
n = 5000
regular = np.random.lognormal(mean=np.log(30000), sigma=0.5, size=int(n * 0.97))    # typical carts around 30k KRW
vip = np.random.lognormal(mean=np.log(300000), sigma=0.6, size=int(n * 0.03))   # VIP big orders
sales = pd.Series(np.concatenate([regular, vip]))
###

stats_sales = summary_stats(sales, is_countinuous=True, bins=60)
stats_sales["scenario"] = "Sales (KRW)"

plt.figure()
plt.hist(sales, bins=60, color="#9bc4d5")
plt.xlabel("Cart amount (KRW)")
plt.ylabel("Count")
plt.title("Sales distribution (heavy tail)")
plt.xscale("log")
plt.axvline(stats_sales["mean"], linestyle="--", label="Mean", color="#3e4e5f")
plt.axvline(stats_sales["median"], linestyle=":", label="Median", color="#34558b")
plt.legend()
plt.show()
# savefig("sales_hist")

image.png

πŸ”Ž 두 개의 λˆ„μ  수읡λ₯  비ꡐ

### 데이터 μž„μ˜ 생성 ꡬ간
months = 120
fundA = np.random.normal(loc=0.006, scale=0.04, size=months)    # high vol
fundB = np.random.normal(loc=0.006, scale=0.01, size=months)    # low vol
###

ann_ret_A, ann_std_A = annualized_stats(fundA)
ann_ret_B, ann_std_B = annualized_stats(fundB)

stats_fundA = {"mean" : ann_ret_A, "median" : float(np.median(fundA)) * 12, 
               "mode" : float(pd.Series(fundA).mode().iloc[0]), "std" : ann_std_A,
               "scenario" : "Fund A (annualized)"}
stats_fundB = {"mean" : ann_ret_B, "median" : float(np.median(fundB)) * 12, 
               "mode" : float(pd.Series(fundB).mode().iloc[0]), "std" : ann_std_B,
               "scenario" : "Fund B (annualized)"}

plt.figure()
cum_A = np.cumprod(1 + fundA) - 1
cum_B = np.cumprod(1 + fundB) - 1
plt.plot(cum_A, label="Fund A (high Οƒ)", color="#ea435d")
plt.plot(cum_B, label="Fund B (low Οƒ)", color="#6768ab")
plt.xlabel("Months")
plt.ylabel("Cumulative return")
plt.title("Cumulative returns: same mean, different risk")
plt.grid(axis="y", linestyle="--", alpha=0.5)
plt.legend()
plt.show()
# savefig("funds_cumulative")

image.png

fundA β†’ ν‘œμ€€νŽΈμ°¨ 0.04

fundB β†’ ν‘œμ€€νŽΈμ°¨ 0.01

β‡’ 즉 νŽ€λ“œAκ°€ νŽ€λ“œB보닀 더 변동성이 큰, μœ„ν—˜ν•œ 투자 μƒν’ˆμž„