import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# μνμ€(리μ€νΈ, λ¬Έμμ΄ λ±) μμμ κ° μμκ° λͺ λ² λ±μ₯νλμ§ μΈμ£Όλ λΉλ κ³μ° λꡬ -> μ΅λΉκ° κ³μ°ν λ μ¬μ©
from collections import Counter
# numpyμ λλ€κ°μ λ§λλ ν¨μλ€ seedλ₯Ό μ ν΄μ£Όλ κ² (μ¬νμ±μ 보쑴νκΈ° μν΄μ)
# 42λ κ΄λ‘μ μΌλ‘ λ§μ΄μ°λ μ«μ -> μμ€μ±
(곡μκ³Όν) μνμλ₯Ό μ¬ννλ νμΉνμ΄μ»€λ₯Ό μν μλ΄μ -> μ°μ£Όλ₯Ό μ 체μ μ§λ¦¬λ₯Ό μ€λͺ
νλ λ΅μ΄ 42
np.random.seed(42)
# μμΉν λ°μ΄ν°μ λνκ° μΆμΆνλ ν¨μ
def summary_stats(series, is_countinuous=True, bins=50):
mean = float(series.mean())
median = float(series.median())
std = float(series.std(ddof=1)) # ddof : νλ³Ένμ€νΈμ°¨ κ³μ°μ λΆλͺ¨κ° λλ μμ λλ₯Ό μ§μ .
# is_continuous=TrueμΈ κ²½μ°(μ°μν λ°μ΄ν°) νμ€ν κ·Έλ¨ μμ±ν΄ λΉλκ° κ°μ₯ λμ ꡬκ°μ μ€μκ°μ μ΅λΉκ° μΆμ μΉλ‘ μ¬μ©
# μ«μμ€μμ μ€μκ°μ κ°μ§ μ«μλ€ (μμμ λμ¬ μ μλ μ«μλ€) -> μ΅λΉκ°μ λ°λ‘ μ°κΈ°κ° μ΄λ €μ
if is_countinuous:
counts, edges = np.histogram(series, bins=bins)
idx = int(np.argmax(counts))
mode_est = float((edges[idx] + edges[idx+1]) / 2.0)
# is_continuous=FalseμΈ κ²½μ°(μ΄μ°ν λ°μ΄ν°) Counter κ°μ²΄ μ¬μ©ν΄ κ°μ₯ λΉλ²ν κ° μ°Ύμ
# μ΄μ°ν λ°μ΄ν°λ€ (μ μν) ex) μ μ, λ±κΈ
else:
c = Counter(series.tolist())
mode_est = float(Counter(series.tolist().most_common(1)[0][0]))
return {"mean": mean, "median": median, "mode": mode_est, "std": std}
# κ·Έλν κ·Έλ¦¬κ³ κ·Έλ¦Όμ μ μ₯νκ³ μΆμ λ μ¬μ©
def savefig(name):
path = f"{name}.png"
plt.tight_layout()
plt.savefig(path, dpi=180, bbox_inches="tight")
plt.show()
print(f"Saved figure to {path}")
# κΈμ΅ μ¬μ©νκ² λλ (μμ΅λ₯ κ³μ°) -> <https://m.blog.naver.com/suyou111/222301932504>
def annualized_stats(monthly_returns):
monthly_geom = np.prod(1 + monthly_returns) ** (1 / len(monthly_returns)) - 1
ann_return = (1 + monthly_geom) ** 12 - 1
ann_std = np.std(monthly_returns, ddof=1) * np.sqrt(12)
return float(ann_return), float(ann_std)
.std(ddof=1) : νμ€νΈμ°¨ κ³μ°. 1μ΄λ©΄ νλ³Ένμ€νΈμ°¨λ₯Ό κ³μ°νλ€λ μλ―Έ. λΆλͺ¨λ₯Ό n-1λ‘ λλκ² λ¨. κΈ°λ³Έκ° n-1np.argmax() : μ΅λκ°μ μΈλ±μ€λ₯Ό λ°νplt.savefig(fname, dpi, transparent, bbox_inches) : μ΄λ―Έμ§ νμΌ μ μ₯
np.prod() : λ°°μ΄ μμμ κ³± κ³μ°np.sqrt() : μ«μμ μμ μ κ³±κ·Ό κ³μ°π heavy tail(긴꼬리) λΆν¬λ₯Ό κ°μ§ λ§€μΆ λ³΄μ¬μ£Όλ μκ°ν
### λ°μ΄ν° μμ μμ± κ΅¬κ°
n = 5000
regular = np.random.lognormal(mean=np.log(30000), sigma=0.5, size=int(n * 0.97)) # typical carts around 30k KRW
vip = np.random.lognormal(mean=np.log(300000), sigma=0.6, size=int(n * 0.03)) # VIP big orders
sales = pd.Series(np.concatenate([regular, vip]))
###
stats_sales = summary_stats(sales, is_countinuous=True, bins=60)
stats_sales["scenario"] = "Sales (KRW)"
plt.figure()
plt.hist(sales, bins=60, color="#9bc4d5")
plt.xlabel("Cart amount (KRW)")
plt.ylabel("Count")
plt.title("Sales distribution (heavy tail)")
plt.xscale("log")
plt.axvline(stats_sales["mean"], linestyle="--", label="Mean", color="#3e4e5f")
plt.axvline(stats_sales["median"], linestyle=":", label="Median", color="#34558b")
plt.legend()
plt.show()
# savefig("sales_hist")

π λ κ°μ λμ μμ΅λ₯ λΉκ΅
### λ°μ΄ν° μμ μμ± κ΅¬κ°
months = 120
fundA = np.random.normal(loc=0.006, scale=0.04, size=months) # high vol
fundB = np.random.normal(loc=0.006, scale=0.01, size=months) # low vol
###
ann_ret_A, ann_std_A = annualized_stats(fundA)
ann_ret_B, ann_std_B = annualized_stats(fundB)
stats_fundA = {"mean" : ann_ret_A, "median" : float(np.median(fundA)) * 12,
"mode" : float(pd.Series(fundA).mode().iloc[0]), "std" : ann_std_A,
"scenario" : "Fund A (annualized)"}
stats_fundB = {"mean" : ann_ret_B, "median" : float(np.median(fundB)) * 12,
"mode" : float(pd.Series(fundB).mode().iloc[0]), "std" : ann_std_B,
"scenario" : "Fund B (annualized)"}
plt.figure()
cum_A = np.cumprod(1 + fundA) - 1
cum_B = np.cumprod(1 + fundB) - 1
plt.plot(cum_A, label="Fund A (high Ο)", color="#ea435d")
plt.plot(cum_B, label="Fund B (low Ο)", color="#6768ab")
plt.xlabel("Months")
plt.ylabel("Cumulative return")
plt.title("Cumulative returns: same mean, different risk")
plt.grid(axis="y", linestyle="--", alpha=0.5)
plt.legend()
plt.show()
# savefig("funds_cumulative")

fundA β νμ€νΈμ°¨ 0.04
fundB β νμ€νΈμ°¨ 0.01
β μ¦ νλAκ° νλBλ³΄λ€ λ λ³λμ±μ΄ ν°, μνν ν¬μ μνμ