Chapter 4

Example-4-1-cv.R
# CV for alpha=5% and 1% using the t distribution with 522 d.f.:
alpha <- c(0.05, 0.01)
qt(1-alpha, 522)

# Critical values for alpha=5% and 1% using the normal approximation:
qnorm(1-alpha)
Example-4-1.R
data(wage1, package='wooldridge')

# OLS regression:
summary( lm(log(wage) ~ educ+exper+tenure, data=wage1) )
Example-4-10.R
data(meap93, package='wooldridge')

# define new variable within data frame
meap93$b_s <- meap93$benefits / meap93$salary

# Estimate three different models
model1<- lm(log(salary) ~ b_s                       , data=meap93)
model2<- lm(log(salary) ~ b_s+log(enroll)+log(staff), data=meap93)
model3<- lm(log(salary) ~ b_s+log(enroll)+log(staff)+droprate+gradrate
                                                    , data=meap93)
# Load package and display table of results
library(stargazer)
stargazer(list(model1,model2,model3),type="text",keep.stat=c("n","rsq"))
Example-4-3-cv.R
# CV for alpha=5% and 1% using the t distribution with 137 d.f.:
alpha <- c(0.05, 0.01)
qt(1-alpha/2, 137)

# Critical values for alpha=5% and 1% using the normal approximation:
qnorm(1-alpha/2)
Example-4-3.R
data(gpa1, package='wooldridge')

# Store results under "sumres" and display full table:
( sumres <- summary( lm(colGPA ~ hsGPA+ACT+skipped, data=gpa1) ) )

# Manually confirm the formulas: Extract coefficients and SE
regtable <- sumres$coefficients
bhat <- regtable[,1]
se   <- regtable[,2]

# Reproduce t statistic
( tstat <- bhat / se )
# Reproduce p value
( pval  <- 2*pt(-abs(tstat),137) )
Example-4-8.R
data(rdchem, package='wooldridge')

# OLS regression:
myres <- lm(log(rd) ~ log(sales)+profmarg, data=rdchem)

# Regression output:
summary(myres)

# 95% CI:
confint(myres)

# 99% CI:
confint(myres, level=0.99)
F-Test-MLB-auto.R
data(mlb1, package='wooldridge')

# Unrestricted OLS regression:
res.ur <- lm(log(salary) ~ years+gamesyr+bavg+hrunsyr+rbisyr, data=mlb1)

# Load package "car" (which has to be installed on the computer)
library(car)

# F test
myH0 <- c("bavg","hrunsyr","rbisyr")
linearHypothesis(res.ur, myH0)
F-Test-MLB-auto2.R
# F test (F-Test-MLB-auto.R has to be run first!)
myH0 <- c("bavg", "hrunsyr=2*rbisyr")
linearHypothesis(res.ur, myH0)
F-Test-MLB-auto3.R
# Note: Script "F-Test-MLB-auto.R" has to be run first to create res.ur.
# Which variables used in res.ur contain "yr" in their names?
myH0 <- matchCoefs(res.ur,"yr")
myH0

# F test (F-Test-MLB-auto.R has to be run first!)
linearHypothesis(res.ur, myH0)
F-Test-MLB-restr.R
data(mlb1, package='wooldridge')

summary(lm(log(salary) ~ years+gamesyr, data=mlb1))
F-Test-MLB-unrestr.R
summary(lm(log(salary) ~ years+gamesyr+bavg+hrunsyr+rbisyr, data=mlb1))
F-Test-MLB.R
data(mlb1, package='wooldridge')

# Unrestricted OLS regression:
res.ur <- lm(log(salary) ~ years+gamesyr+bavg+hrunsyr+rbisyr, data=mlb1)

# Restricted OLS regression:
res.r <- lm(log(salary) ~ years+gamesyr, data=mlb1)

# R2:
( r2.ur <- summary(res.ur)$r.squared )
( r2.r <- summary(res.r)$r.squared )

# F statistic:
( F <- (r2.ur-r2.r) / (1-r2.ur) * 347/3 )

# p value = 1-cdf of the appropriate F distribution:
1-pf(F, 3,347)
Fex-CV.R
# CV for alpha=1% using the F distribution with 3 and 347 d.f.:
qf(1-0.01, 3,347)
Example-4-1-cv.py
import scipy.stats as stats
import numpy as np

# CV for alpha=5% and 1% using the t distribution with 522 d.f.:
alpha = np.array([0.05, 0.01])
cv_t = stats.t.ppf(1 - alpha, 522)
print(f'cv_t: {cv_t}\n')

# CV for alpha=5% and 1% using the normal approximation:
cv_n = stats.norm.ppf(1 - alpha)
print(f'cv_n: {cv_n}\n')
Example-4-1.py
import wooldridge as woo
import numpy as np
import statsmodels.formula.api as smf

wage1 = woo.dataWoo('wage1')

reg = smf.ols(formula='np.log(wage) ~ educ + exper + tenure', data=wage1)
results = reg.fit()
print(f'results.summary(): \n{results.summary()}\n')
Example-4-10.py
# Not in use...
#import wooldridge as woo
#import numpy as np
#import statsmodels.formula.api as smf
#import stargazer.stargazer as sg
#
#meap93 = woo.dataWoo('meap93')
#meap93['b_s'] = meap93['benefits'] / meap93['salary']
#
# OLS regression:
#reg1 = smf.ols(formula='np.log(salary) ~ b_s', data=meap93)
#results1 = reg1.fit()
#reg2 = smf.ols(formula='np.log(salary) ~ b_s + np.log(enroll) + np.log(staff)', data=meap93)
#results2 = reg2.fit()
#reg3 = smf.ols(formula='np.log(salary) ~ b_s + np.log(enroll) + np.log(staff) + droprate + gradrate', data=meap93)
#results3 = reg3.fit()
#
## create latex table of results with stargazer
#stargazer = sg.Stargazer([results1, results2, results3])
#print(stargazer.render_latex())
Example-4-3-cv.py
import scipy.stats as stats
import numpy as np

# CV for alpha=5% and 1% using the t distribution with 137 d.f.:
alpha = np.array([0.05, 0.01])
cv_t = stats.t.ppf(1 - alpha / 2, 137)
print(f'cv_t: {cv_t}\n')

# CV for alpha=5% and 1% using the normal approximation:
cv_n = stats.norm.ppf(1 - alpha / 2)
print(f'cv_n: {cv_n}\n')
Example-4-3.py
import wooldridge as woo
import statsmodels.formula.api as smf
import scipy.stats as stats

gpa1 = woo.dataWoo('gpa1')

# store and display results:
reg = smf.ols(formula='colGPA ~ hsGPA + ACT + skipped', data=gpa1)
results = reg.fit()
print(f'results.summary(): \n{results.summary()}\n')

# manually confirm the formulas, i.e. extract coefficients and SE:
b = results.params
se = results.bse

# reproduce t statistic:
tstat = b / se
print(f'tstat: \n{tstat}\n')

# reproduce p value:
pval = 2 * stats.t.cdf(-abs(tstat), 137)
print(f'pval: \n{pval}\n')
Example-4-8.py
import wooldridge as woo
import numpy as np
import statsmodels.formula.api as smf

rdchem = woo.dataWoo('rdchem')

# OLS regression:
reg = smf.ols(formula='np.log(rd) ~ np.log(sales) + profmarg', data=rdchem)
results = reg.fit()
print(f'results.summary(): \n{results.summary()}\n')

# 95% CI:
CI95 = results.conf_int(0.05)
print(f'CI95: \n{CI95}\n')

# 99% CI:
CI99 = results.conf_int(0.01)
print(f'CI99: \n{CI99}\n')
F-Test-Automatic.py
import wooldridge as woo
import numpy as np
import statsmodels.formula.api as smf

mlb1 = woo.dataWoo('mlb1')

# OLS regression:
reg = smf.ols(
    formula='np.log(salary) ~ years + gamesyr + bavg + hrunsyr + rbisyr',
    data=mlb1)
results = reg.fit()

# automated F test:
hypotheses = ['bavg = 0', 'hrunsyr = 0', 'rbisyr = 0']
ftest = results.f_test(hypotheses)
fstat = ftest.statistic[0][0]
fpval = ftest.pvalue

print(f'fstat: {fstat}\n')
print(f'fpval: {fpval}\n')
F-Test-Automatic2.py
import wooldridge as woo
import numpy as np
import statsmodels.formula.api as smf

mlb1 = woo.dataWoo('mlb1')

# OLS regression:
reg = smf.ols(
    formula='np.log(salary) ~ years + gamesyr + bavg + hrunsyr + rbisyr',
    data=mlb1)
results = reg.fit()

# automated F test:
hypotheses = ['bavg = 0', 'hrunsyr = 2*rbisyr']
ftest = results.f_test(hypotheses)
fstat = ftest.statistic[0][0]
fpval = ftest.pvalue

print(f'fstat: {fstat}\n')
print(f'fpval: {fpval}\n')
F-Test.py
import wooldridge as woo
import numpy as np
import statsmodels.formula.api as smf
import scipy.stats as stats

mlb1 = woo.dataWoo('mlb1')
n = mlb1.shape[0]

# unrestricted OLS regression:
reg_ur = smf.ols(
    formula='np.log(salary) ~ years + gamesyr + bavg + hrunsyr + rbisyr',
    data=mlb1)
fit_ur = reg_ur.fit()
r2_ur = fit_ur.rsquared
print(f'r2_ur: {r2_ur}\n')

# restricted OLS regression:
reg_r = smf.ols(formula='np.log(salary) ~ years + gamesyr', data=mlb1)
fit_r = reg_r.fit()
r2_r = fit_r.rsquared
print(f'r2_r: {r2_r}\n')

# F statistic:
fstat = (r2_ur - r2_r) / (1 - r2_ur) * (n - 6) / 3
print(f'fstat: {fstat}\n')

# CV for alpha=1% using the F distribution with 3 and 347 d.f.:
cv = stats.f.ppf(1 - 0.01, 3, 347)
print(f'cv: {cv}\n')

# p value = 1-cdf of the appropriate F distribution:
fpval = 1 - stats.f.cdf(fstat, 3, 347)
print(f'fpval: {fpval}\n')
Example-4-1-cv.jl
using Distributions

# CV for alpha=5% and 1% using the t distribution with 522 d.f.:
alpha = [0.05, 0.01]
cv_t = round.(quantile.(TDist(522), 1 .- alpha), digits=5)
println("cv_t = $cv_t\n")

# CV for alpha=5% and 1% using the normal approximation:
cv_n = round.(quantile.(Normal(), 1 .- alpha), digits=5)
println("cv_n = $cv_n")
Example-4-1.jl
using WooldridgeDatasets, GLM, DataFrames

wage1 = DataFrame(wooldridge("wage1"))

reg = lm(@formula(log(wage) ~ educ + exper + tenure), wage1)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg")
Example-4-10.jl
using WooldridgeDatasets, GLM, DataFrames, RegressionTables

meap93 = DataFrame(wooldridge("meap93"))
meap93.b_s = meap93.benefits ./ meap93.salary

# estimate three different models:
reg1 = lm(@formula(log(salary) ~ b_s), meap93)
reg2 = lm(@formula(log(salary) ~ b_s + log(enroll) + log(staff)), meap93)
reg3 = lm(@formula(log(salary) ~
                b_s + log(enroll) + log(staff) + droprate + gradrate), meap93)

# print results with RegressionTables:
regtable(reg1, reg2, reg3)
Example-4-3-cv.jl
using Distributions

# CV for alpha=5% and 1% using the t distribution with 137 d.f.:
alpha = [0.05, 0.01]
cv_t = round.(quantile.(TDist(137), 1 .- alpha ./ 2), digits=5)
println("cv_t = $cv_t\n")

# CV for alpha=5% and 1% using the normal approximation:
cv_n = round.(quantile.(Normal(), 1 .- alpha ./ 2), digits=5)
println("cv_n = $cv_n")
Example-4-3.jl
using WooldridgeDatasets, GLM, DataFrames, Distributions

gpa1 = DataFrame(wooldridge("gpa1"))

# store and display results:
reg = lm(@formula(colGPA ~ hsGPA + ACT + skipped), gpa1)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg\n")

# manually confirm the formulas, i.e. extract coefficients and SE:
b = coef(reg)
se = stderror(reg)

# reproduce t statistic:
tstat = round.(b ./ se, digits=5)
println("tstat = $tstat\n")

# reproduce p value:
pval = round.(2 * cdf.(TDist(137), -abs.(tstat)), digits=5)
println("pval = $pval")
Example-4-8-2.jl
using WooldridgeDatasets, GLM, DataFrames

rdchem = DataFrame(wooldridge("rdchem"))

# OLS regression:
reg_ur = lm(@formula(log(rd) ~ log(sales) + profmarg), rdchem)
reg_r = lm(@formula(log(rd) ~ 1), rdchem)

# automated F test:
ftest_res = ftest(reg_r.model, reg_ur.model)

fstat = ftest_res.fstat[2]
fpval = ftest_res.pval[2]
println("fstat = $fstat\n")
println("fpval = $fpval")
Example-4-8.jl
using WooldridgeDatasets, GLM, DataFrames, Distributions

rdchem = DataFrame(wooldridge("rdchem"))

# OLS regression:
reg = lm(@formula(log(rd) ~ log(sales) + profmarg), rdchem)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg\n")

# replicating 95% CI:
alpha = 0.05
CI95_upper = coef(reg) .+ stderror(reg) .* quantile(TDist(32 - 3), alpha / 2)
CI95_lower = coef(reg) .- stderror(reg) .* quantile(TDist(32 - 3), alpha / 2)
println("CI95_upper = $CI95_upper\n")
println("CI95_lower = $CI95_lower\n")

# calculating 99% CI:
alpha = 0.01
CI99_upper = coef(reg) .+ stderror(reg) .* quantile(TDist(32 - 3), alpha / 2)
CI99_lower = coef(reg) .- stderror(reg) .* quantile(TDist(32 - 3), alpha / 2)
println("CI99_upper = $CI99_upper\n")
println("CI99_lower = $CI99_lower")
F-Test-Automatic.jl
using WooldridgeDatasets, GLM, DataFrames

mlb1 = DataFrame(wooldridge("mlb1"))

# OLS regression:
reg_ur = lm(@formula(log(salary) ~
                years + gamesyr + bavg + hrunsyr + rbisyr), mlb1)
reg_r = lm(@formula(log(salary) ~
                years + gamesyr), mlb1)

# automated F test:
ftest_res = ftest(reg_r.model, reg_ur.model)

fstat = ftest_res.fstat[2]
fpval = ftest_res.pval[2]
println("fstat = $fstat\n")
println("fpval = $fpval")
F-Test-Automatic2.jl
using WooldridgeDatasets, GLM, DataFrames

mlb1 = DataFrame(wooldridge("mlb1"))

# OLS regression:
reg_ur = lm(@formula(log(salary) ~
                years + gamesyr + bavg + hrunsyr + rbisyr), mlb1)

# restrictions "bavg = 0" and "hrunsyr = 2*rbisyr":
mlb1.newvar = 2 * mlb1.hrunsyr + mlb1.rbisyr
reg_r = lm(@formula(log(salary) ~ years + gamesyr + newvar), mlb1)

# automated F test:
ftest_res = ftest(reg_r.model, reg_ur.model)

fstat = ftest_res.fstat[2]
fpval = ftest_res.pval[2]
println("fstat = $fstat\n")
println("fpval = $fpval")
F-Test.jl
using WooldridgeDatasets, GLM, DataFrames, Distributions

mlb1 = DataFrame(wooldridge("mlb1"))

# unrestricted OLS regression:
reg_ur = lm(@formula(log(salary) ~
        years + gamesyr + bavg + hrunsyr + rbisyr), mlb1)
r2_ur = r2(reg_ur)
println("r2_ur = $r2_ur\n")

# restricted OLS regression:
reg_r = lm(@formula(log(salary) ~ years + gamesyr), mlb1)
r2_r = r2(reg_r)
println("r2_r = $r2_r\n")

# F statistic:
n = nobs(reg_ur)
fstat = (r2_ur - r2_r) / (1 - r2_ur) * (n - 6) / 3
println("fstat = $fstat\n")

# CV for alpha=1% using the F distribution with 3 and 347 d.f.:
cv = quantile(FDist(3, 347), 1 - 0.01)
println("cv = $cv\n")

# p value = 1-cdf of the appropriate F distribution:
fpval = 1 - cdf(FDist(3, 347), fstat)
println("fpval = $fpval")