-
Notifications
You must be signed in to change notification settings - Fork 13
/
1.Basis_of_linear_regression.R
44 lines (34 loc) · 1.21 KB
/
1.Basis_of_linear_regression.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#GENERATOR MODEL
n=1000 #ndata
x<-floor(runif(n,0,3)) #genotypes
beta<-rnorm(1,0,.5) #allele subst effects
mu=20 #population mean
y<-mu+beta*x+rnorm(n,0,sd=5) #generate data
y_cat<-ifelse(y<mean(y),0,1) #categorize variable
plot(x,y) #plot the data
#FREQUENTIST APPROACH
#####################
#Calculate regression coeficients
SSxy<- sum( (x-mean(x))*(y-mean(y)) )
SSxx<-sum( (x-mean(x))**2 )
b1<-SSxy/SSxx #b=cov(x,y)/var(x)
print(paste("The estimated coefficient regresion is",b1))
b0<-mean(y)-b1*mean(x) #a=E(y)+bE(x)
print(paste("The estimated intercept is",b0))
#plot data and regression line
plot(x,y)
abline(b0,b1,col="red")
#Calculate the t statistic and the p-value
SSE<-sum((y-(b0+b1*x))**2)
SSE
sR<-SSE/(length(y)-2)
t<-(b1-0)/sqrt(sR/SSxx)
t
#t-Student tables to calculate the cummulative probability of the parametric value (P-value)
#https://www.sjsu.edu/faculty/gerstman/StatPrimer/t-table.pdf
A<- 1-pt(abs(t),df=(n-2)) #1-cummulative_probability(t,for n-2 degrees of freedom)
pvalue<-2*A
pvalue
#Use glm
summary(glm(y~x))
summary(glm(y_cat~x,family = binomial))