1. 예제 자료

library(readxl)
library(dplyr)

mydf = as.data.frame(read_excel(path = '/cloud/project/mydata.xlsx'))

names(mydf) = c('agree', 'age', 'sex', 'edu', 'marital',
                  paste0('bpns', 1:18),
                  paste0('ctq', 1:10)) 

mydf3 = mydf %>% filter(bpns14<=5 & ctq7<=4 & ctq9<=4 & ctq10<=4) %>% 
  filter(rowSums(is.na(.))==0) %>%
  mutate(bpns1r = 6 - bpns1,
         bpns2r = 6 - bpns2,
         bpns3r = 6 - bpns3,
         bpns6r = 6 - bpns6,
         bpns14r = 6 - bpns14,
         ctq6r = 5 - ctq6,
         ctq7r = 5 - ctq7,
         ctq8r = 5 - ctq8,
         ctq9r = 5 - ctq9,
         ctq10r = 5 - ctq10) %>% 
  mutate(autonomy = rowSums(select(.,bpns1r,bpns2r,bpns3r,bpns4,bpns5,bpns6r)),
         competence = rowSums(select(.,bpns7:bpns12)),
         related = rowSums(select(.,bpns13,bpns14r,bpns15:bpns18)),
         abuse = rowSums(select(.,ctq1:ctq5)),
         neglect = rowSums(select(.,ctq6r:ctq10r)))

2. 회귀 분석

2-1. lm() 함수 사용하기

m1 = lm(formula = autonomy ~ neglect, data = mydf3)
summary(m1)

# Call:
#   lm(formula = autonomy ~ neglect, data = mydf3)
# 
# Residuals:
#   Min       1Q   Median       3Q      Max 
# -10.6511  -2.4828  -0.0373   2.4875  11.1212 
# 
# Coefficients:
#             Estimate Std. Error t value Pr(>|t|)    
# (Intercept) 24.42346    0.49533  49.307  < 2e-16 ***
# neglect     -0.27724    0.04766  -5.817 1.51e-08 ***
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Residual standard error: 3.827 on 305 degrees of freedom
# Multiple R-squared:  0.09988,	Adjusted R-squared:  0.09693 
# F-statistic: 33.84 on 1 and 305 DF,  p-value: 1.51e-08

<aside> 📎 lm() 함수에서 formula 인자 입력 방법

m2 = lm(formula = autonomy ~ neglect + abuse + age, data = mydf3)
summary(m2)
# Call:
#   lm(formula = autonomy ~ neglect + abuse + age, data = mydf3)
# 
# Residuals:
#   Min       1Q   Median       3Q      Max 
# -10.9453  -2.4717  -0.0349   2.5277  10.8665 
# 
# Coefficients:
#             Estimate Std. Error t value Pr(>|t|)    
# (Intercept) 25.49669    1.38454  18.415  < 2e-16 ***
# neglect     -0.25775    0.05395  -4.778 2.77e-06 ***
# abuse       -0.05737    0.08027  -0.715    0.475    
# age         -0.02710    0.04045  -0.670    0.503    
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Residual standard error: 3.834 on 303 degrees of freedom
# Multiple R-squared:  0.1024,	Adjusted R-squared:  0.09354 
# F-statistic: 11.53 on 3 and 303 DF,  p-value: 3.556e-07
mydf4 = mydf3 %>% select(autonomy, neglect, abuse, age)

m3 = lm(formula = autonomy ~ ., data = mydf4)
summary(m3)
# Call:
#   lm(formula = autonomy ~ ., data = mydf4)
# 
# Residuals:
#   Min       1Q   Median       3Q      Max 
# -10.9453  -2.4717  -0.0349   2.5277  10.8665 
# 
# Coefficients:
#             Estimate Std. Error t value Pr(>|t|)    
# (Intercept) 25.49669    1.38454  18.415  < 2e-16 ***
# neglect     -0.25775    0.05395  -4.778 2.77e-06 ***
# abuse       -0.05737    0.08027  -0.715    0.475    
# age         -0.02710    0.04045  -0.670    0.503    
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Residual standard error: 3.834 on 303 degrees of freedom
# Multiple R-squared:  0.1024,	Adjusted R-squared:  0.09354 
# F-statistic: 11.53 on 3 and 303 DF,  p-value: 3.556e-07
m4 = lm(formula = autonomy ~ . - age , data = mydf4)
summary(m4)
# Call:
#   lm(formula = autonomy ~ . - age, data = mydf4)
# 
# Residuals:
#   Min      1Q  Median      3Q     Max 
# -10.645  -2.420  -0.055   2.417  10.816 
# 
# Coefficients:
#             Estimate Std. Error t value Pr(>|t|)    
# (Intercept) 24.66980    0.62662  39.369  < 2e-16 ***
# neglect     -0.26151    0.05361  -4.878 1.73e-06 ***
# abuse       -0.05122    0.07967  -0.643    0.521    
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Residual standard error: 3.83 on 304 degrees of freedom
# Multiple R-squared:  0.1011,	Adjusted R-squared:  0.09518 
# F-statistic:  17.1 on 2 and 304 DF,  p-value: 9.21e-08
m5 = lm(formula = autonomy ~ neglect + abuse + neglect:abuse, data = mydf3)
summary(m5)
# Call:
#   lm(formula = autonomy ~ neglect + abuse + neglect:abuse, data = mydf3)
# 
# Residuals:
#   Min       1Q   Median       3Q      Max 
# -10.5692  -2.4235  -0.0107   2.4872  11.0013 
# 
# Coefficients:
#               Estimate Std. Error t value Pr(>|t|)    
# (Intercept)   25.61800    1.56131  16.408  < 2e-16 ***
# neglect       -0.33694    0.12577  -2.679  0.00779 ** 
# abuse         -0.19374    0.22923  -0.845  0.39867    
# neglect:abuse  0.01088    0.01641   0.663  0.50772    
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Residual standard error: 3.834 on 303 degrees of freedom
# Multiple R-squared:  0.1024,	Adjusted R-squared:  0.09351 
# F-statistic: 11.52 on 3 and 303 DF,  p-value: 3.571e-07
m6 = lm(formula = autonomy ~ neglect * abuse * age, data = mydf3)
summary(m6)
# Call:
#   lm(formula = autonomy ~ neglect * abuse * age, data = mydf3)
# 
# Residuals:
#   Min       1Q   Median       3Q      Max 
# -10.6159  -2.4345   0.0037   2.5148  10.9317 
# 
# Coefficients:
#                   Estimate Std. Error t value Pr(>|t|)    
# (Intercept)       32.500684   9.218282   3.526 0.000489 ***
# neglect           -1.163237   0.955644  -1.217 0.224477    
# abuse             -1.010648   1.230678  -0.821 0.412179    
# age               -0.208272   0.298421  -0.698 0.485774    
# neglect:abuse      0.112349   0.100643   1.116 0.265186    
# neglect:age        0.025652   0.030302   0.847 0.397924    
# abuse:age          0.024035   0.040790   0.589 0.556159    
# neglect:abuse:age -0.003109   0.003208  -0.969 0.333385    
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Residual standard error: 3.845 on 299 degrees of freedom
# Multiple R-squared:  0.1092,	Adjusted R-squared:  0.08833 
# F-statistic: 5.235 on 7 and 299 DF,  p-value: 1.191e-05
m7 = lm(formula = autonomy ~ I(neglect + abuse), data = mydf3)
summary(m7)
# Call:
#   lm(formula = autonomy ~ I(neglect + abuse), data = mydf3)
# 
# Residuals:
#   Min       1Q   Median       3Q      Max 
# -10.6535  -2.5483   0.0571   2.4255  10.5569 
# 
# Coefficients:
#                    Estimate Std. Error t value Pr(>|t|)    
# (Intercept)        24.96914    0.60739   41.11  < 2e-16 ***
# I(neglect + abuse) -0.18420    0.03331   -5.53 6.88e-08 ***
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Residual standard error: 3.845 on 305 degrees of freedom
# Multiple R-squared:  0.09113,	Adjusted R-squared:  0.08815 
# F-statistic: 30.58 on 1 and 305 DF,  p-value: 6.885e-08

</aside>

2-2. 범주형 독립변수

m8 = lm(formula = related ~ sex, data = mydf3)
summary(m8)
# Call:
#   lm(formula = related ~ sex, data = mydf3)
# 
# Residuals:
#   Min       1Q   Median       3Q      Max 
# -10.0732  -2.0732  -0.0732   1.9268   7.1569 
# 
# Coefficients:
#              Estimate Std. Error t value Pr(>|t|)    
# (Intercept)  22.8431     0.3265  69.974  < 2e-16 ***
# sex여성       1.2300     0.3995   3.079  0.00227 ** 
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Residual standard error: 3.297 on 305 degrees of freedom
# Multiple R-squared:  0.03015,	Adjusted R-squared:  0.02697 
# F-statistic:  9.48 on 1 and 305 DF,  p-value: 0.002266
mydf3$sex = factor(x = mydf3$sex, levels = c('여성', '남성'))

m8 = lm(formula = related ~ sex, data = mydf3)
summary(m8)
# summary(m8)
# Call:
#   lm(formula = related ~ sex, data = mydf3)
# 
# Residuals:
#   Min       1Q   Median       3Q      Max 
# -10.0732  -2.0732  -0.0732   1.9268   7.1569 
# 
# Coefficients:
#              Estimate Std. Error t value Pr(>|t|)    
# (Intercept)  24.0732     0.2303 104.542  < 2e-16 ***
# sex남성      -1.2300     0.3995  -3.079  0.00227 ** 
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Residual standard error: 3.297 on 305 degrees of freedom
# Multiple R-squared:  0.03015,	Adjusted R-squared:  0.02697 
# F-statistic:  9.48 on 1 and 305 DF,  p-value: 0.002266