Chapter 14

Example-13-9-ClSE.R

library(plm);library(lmtest)
data(crime4, package='wooldridge')

# Generate pdata.frame:
crime4.p <- pdata.frame(crime4, index=c("county","year") )

# Estimate FD model:
reg <- ( plm(log(crmrte)~d83+d84+d85+d86+d87+lprbarr+lprbconv+ 
                   lprbpris+lavgsen+lpolpc,data=crime4.p, model="fd") )
# Regression table with standard SE
coeftest(reg)
# Regression table with "clustered" SE (default type HC0):
coeftest(reg, vcovHC)
# Regression table with "clustered" SE (small-sample correction)
# This is the default version used by Stata and reported by Wooldridge:
coeftest(reg, vcovHC(reg, type="sss"))

Example-14-2.R

library(plm)
data(wagepan, package='wooldridge')

# Generate pdata.frame:
wagepan.p <- pdata.frame(wagepan, index=c("nr","year") )

pdim(wagepan.p)

# Estimate FE model
summary( plm(lwage~married+union+factor(year)*educ, 
                                        data=wagepan.p, model="within") )

Example-14-4-1.R

library(plm);library(stargazer)
data(wagepan, package='wooldridge')

# Generate pdata.frame:
wagepan.p <- pdata.frame(wagepan, index=c("nr","year") )

pdim(wagepan.p)

# Check variation of variables within individuals
pvar(wagepan.p)

Example-14-4-2.R

# Estimate different models
wagepan.p$yr<-factor(wagepan.p$year)

reg.ols<- (plm(lwage~educ+black+hisp+exper+I(exper^2)+married+union+yr, 
                                      data=wagepan.p, model="pooling") )
reg.re <- (plm(lwage~educ+black+hisp+exper+I(exper^2)+married+union+yr, 
                                      data=wagepan.p, model="random") )
reg.fe <- (plm(lwage~                      I(exper^2)+married+union+yr, 
                                      data=wagepan.p, model="within") )

# Pretty table of selected results (not reporting year dummies)
stargazer(reg.ols,reg.re,reg.fe, type="text", 
          column.labels=c("OLS","RE","FE"),keep.stat=c("n","rsq"),
          keep=c("ed","bl","hi","exp","mar","un"))

Example-CRE-test-RE.R

# Note that the estimates "reg.cre" are calculated in
# Script "Example-Dummy-CRE-1.R" which has to be run first.

# RE test as an F test on the "Between" coefficients 
library(car)
linearHypothesis(reg.cre, matchCoefs(reg.cre,"Between"))

Example-CRE2.R

library(plm)
data(wagepan, package='wooldridge')

# Generate pdata.frame:
wagepan.p <- pdata.frame(wagepan, index=c("nr","year") )

# Estimate CRE parameters
wagepan.p$yr<-factor(wagepan.p$year)
summary(plm(lwage~married+union+educ+black+hisp+Between(married)+
                         Between(union), data=wagepan.p, model="random"))

Example-Dummy-CRE-1.R

library(plm);library(stargazer)
data(wagepan, package='wooldridge')

# Generate pdata.frame:
wagepan.p <- pdata.frame(wagepan, index=c("nr","year") )

# Estimate FE parameter in 3 different ways:
wagepan.p$yr<-factor(wagepan.p$year)
reg.fe <-(plm(lwage~married+union+yr*educ,data=wagepan.p, model="within"))
reg.dum<-( lm(lwage~married+union+yr*educ+factor(nr), data=wagepan.p))
reg.re <-(plm(lwage~married+union+yr*educ,data=wagepan.p, model="random"))
reg.cre<-(plm(lwage~married+union+yr*educ+Between(married)+Between(union)
                                         ,data=wagepan.p, model="random"))

Example-Dummy-CRE-2.R

stargazer(reg.fe,reg.dum,reg.cre,reg.re,type="text",model.names=FALSE,
          keep=c("married","union",":educ"),keep.stat=c("n","rsq"),
          column.labels=c("Within","Dummies","CRE","RE"))

Example-HausmTest.R

# Note that the estimates "reg.fe" and "reg.re" are calculated in
# Example 14.4. The scripts have to be run first.

# Hausman test of RE vs. FE:
phtest(reg.fe, reg.re)

Example-13-9-ClSE.py

import wooldridge as woo
import numpy as np
import pandas as pd
import linearmodels as plm

crime4 = woo.dataWoo('crime4')
crime4 = crime4.set_index(['county', 'year'], drop=False)

# estimate FD model:
reg = plm.FirstDifferenceOLS.from_formula(
    formula='np.log(crmrte) ~ year + d83 + d84 + d85 + d86 + d87 +'
            'lprbarr + lprbconv + lprbpris + lavgsen + lpolpc',
    data=crime4)

# regression with standard SE:
results_default = reg.fit()

# regression with "clustered" SE:
results_cluster = reg.fit(cov_type='clustered', cluster_entity=True,
                          debiased=False)

# regression with "clustered" SE (small-sample correction):
results_css = reg.fit(cov_type='clustered', cluster_entity=True)

# print results:
table = pd.DataFrame({'b': round(results_default.params, 4),
                      'se_default': round(results_default.std_errors, 4),
                      'se_cluster': round(results_cluster.std_errors, 4),
                      'se_css': round(results_css.std_errors, 4)})
print(f'table: \n{table}\n')

Example-14-1.py

import wooldridge as woo
import pandas as pd
import statsmodels.formula.api as smf
import linearmodels as plm

jtrain = woo.dataWoo('jtrain')
jtrain['entity'] = jtrain['fcode']
jtrain = jtrain.set_index(['fcode', 'year'])

# Manual computation of deviations of entity means:
jtrain['lscrap_w'] = jtrain['lscrap'] - jtrain.groupby('fcode').mean()['lscrap']
jtrain['d88_w'] = jtrain['d88'] - jtrain.groupby('fcode').mean()['d88']
jtrain['d89_w'] = jtrain['d89'] - jtrain.groupby('fcode').mean()['d89']
jtrain['grant_w'] = jtrain['grant'] - jtrain.groupby('fcode').mean()['grant']
jtrain['grant_1_w'] = jtrain['grant_1'] - jtrain.groupby('fcode').mean()['grant_1']

# manual FE model estimation:
results_man = smf.ols(formula='lscrap_w ~ 0 + d88_w + d89_w + grant_w + grant_1_w', data=jtrain).fit()
table_man = pd.DataFrame({'b': round(results_man.params, 4),
                          'se': round(results_man.bse, 4),
                          't': round(results_man.tvalues, 4),
                          'pval': round(results_man.pvalues, 4)})
print(f'table_man: \n{table_man}\n')

# automatic FE model estimation:
reg_aut = plm.PanelOLS.from_formula(formula='lscrap ~ d88 + d89 + grant + grant_1 + EntityEffects', data=jtrain)
results_aut = reg_aut.fit()
table_aut = pd.DataFrame({'b': round(results_aut.params, 4),
                          'se': round(results_aut.std_errors, 4),
                          't': round(results_aut.tstats, 4),
                          'pval': round(results_aut.pvalues, 4)})
print(f'table_aut: \n{table_aut}\n')

Example-14-2.py

import wooldridge as woo
import pandas as pd
import linearmodels as plm

wagepan = woo.dataWoo('wagepan')
wagepan = wagepan.set_index(['nr', 'year'], drop=False)

# FE model estimation:
reg = plm.PanelOLS.from_formula(
    formula='lwage ~ married + union + C(year)*educ + EntityEffects',
    data=wagepan, drop_absorbed=True)
results = reg.fit()

# print regression table:
table = pd.DataFrame({'b': round(results.params, 4),
                      'se': round(results.std_errors, 4),
                      't': round(results.tstats, 4),
                      'pval': round(results.pvalues, 4)})
print(f'table: \n{table}\n')

Example-14-4-1.py

import wooldridge as woo

wagepan = woo.dataWoo('wagepan')

# print relevant dimensions for panel:
N = wagepan.shape[0]
T = wagepan['year'].drop_duplicates().shape[0]
n = wagepan['nr'].drop_duplicates().shape[0]
print(f'N: {N}\n')
print(f'T: {T}\n')
print(f'n: {n}\n')

# check non-varying variables

# (I) across time and within individuals by calculating individual
# specific variances for each variable:
isv_nr = (wagepan.groupby('nr').var() == 0)  # True, if variance is zero
# choose variables where all grouped variances are zero:
noVar_nr = isv_nr.all(axis=0)  # which cols are completely True
print(f'isv_nr.columns[noVar_nr]: \n{isv_nr.columns[noVar_nr]}\n')

# (II) across individuals within one point in time for each variable:
isv_t = (wagepan.groupby('year').var() == 0)
noVar_t = isv_t.all(axis=0)
print(f'isv_t.columns[noVar_t]: \n{isv_t.columns[noVar_t]}\n')

Example-14-4-2.py

import wooldridge as woo
import pandas as pd
import linearmodels as plm

wagepan = woo.dataWoo('wagepan')

# estimate different models:
wagepan = wagepan.set_index(['nr', 'year'], drop=False)

reg_ols = plm.PooledOLS.from_formula(
    formula='lwage ~ educ + black + hisp + exper + I(exper**2) +'
            'married + union + C(year)', data=wagepan)
results_ols = reg_ols.fit()

reg_re = plm.RandomEffects.from_formula(
    formula='lwage ~ educ + black + hisp + exper + I(exper**2) +'
            'married + union + C(year)', data=wagepan)
results_re = reg_re.fit()

reg_fe = plm.PanelOLS.from_formula(
    formula='lwage ~ I(exper**2) + married + union +'
            'C(year) + EntityEffects', data=wagepan)
results_fe = reg_fe.fit()

# print results:
theta_hat = results_re.theta.iloc[0, 0]
print(f'theta_hat: {theta_hat}\n')

table_ols = pd.DataFrame({'b': round(results_ols.params, 4),
                          'se': round(results_ols.std_errors, 4),
                          't': round(results_ols.tstats, 4),
                          'pval': round(results_ols.pvalues, 4)})
print(f'table_ols: \n{table_ols}\n')

table_re = pd.DataFrame({'b': round(results_re.params, 4),
                         'se': round(results_re.std_errors, 4),
                         't': round(results_re.tstats, 4),
                         'pval': round(results_re.pvalues, 4)})
print(f'table_re: \n{table_re}\n')

table_fe = pd.DataFrame({'b': round(results_fe.params, 4),
                         'se': round(results_fe.std_errors, 4),
                         't': round(results_fe.tstats, 4),
                         'pval': round(results_fe.pvalues, 4)})
print(f'table_fe: \n{table_fe}\n')

Example-CRE-2.py

import wooldridge as woo
import pandas as pd
import linearmodels as plm

wagepan = woo.dataWoo('wagepan')
wagepan['t'] = wagepan['year']
wagepan['entity'] = wagepan['nr']
wagepan = wagepan.set_index(['nr'])

# include group specific means:
wagepan['married_b'] = wagepan.groupby('nr').mean()['married']
wagepan['union_b'] = wagepan.groupby('nr').mean()['union']
wagepan = wagepan.set_index(['year'], append=True)

# estimate CRE paramters:
reg = plm.RandomEffects.from_formula(
    formula='lwage ~ married + union + educ +'
            'black + hisp + married_b + union_b',
    data=wagepan)
results = reg.fit()

# print regression table:
table = pd.DataFrame({'b': round(results.params, 4),
                      'se': round(results.std_errors, 4),
                      't': round(results.tstats, 4),
                      'pval': round(results.pvalues, 4)})
print(f'table: \n{table}\n')

Example-CRE-test-RE.py

import wooldridge as woo
import linearmodels as plm

wagepan = woo.dataWoo('wagepan')
wagepan['t'] = wagepan['year']
wagepan['entity'] = wagepan['nr']
wagepan = wagepan.set_index(['nr'])

# include group specific means:
wagepan['married_b'] = wagepan.groupby('nr').mean()['married']
wagepan['union_b'] = wagepan.groupby('nr').mean()['union']
wagepan = wagepan.set_index(['year'], append=True)

# estimate CRE:
reg_cre = plm.RandomEffects.from_formula(
    formula='lwage ~ married + union + C(t)*educ  + married_b + union_b',
    data=wagepan)
results_cre = reg_cre.fit()

# RE test as an Wald test on the CRE specific coefficients:
wtest = results_cre.wald_test(formula='married_b = union_b = 0')
print(f'wtest: \n{wtest}\n')

Example-Dummy-CRE-1.py

import wooldridge as woo
import pandas as pd
import statsmodels.formula.api as smf
import linearmodels as plm

wagepan = woo.dataWoo('wagepan')
wagepan['t'] = wagepan['year']
wagepan['entity'] = wagepan['nr']
wagepan = wagepan.set_index(['nr'])

# include group specific means:
wagepan['married_b'] = wagepan.groupby('nr').mean()['married']
wagepan['union_b'] = wagepan.groupby('nr').mean()['union']
wagepan = wagepan.set_index(['year'], append=True)

# estimate FE parameters in 3 different ways:
reg_we = plm.PanelOLS.from_formula(
    formula='lwage ~ married + union + C(t)*educ + EntityEffects',
    drop_absorbed=True, data=wagepan)
results_we = reg_we.fit()

reg_dum = smf.ols(
    formula='lwage ~ married + union + C(t)*educ + C(entity)',
    data=wagepan)
results_dum = reg_dum.fit()

reg_cre = plm.RandomEffects.from_formula(
    formula='lwage ~ married + union + C(t)*educ + married_b + union_b',
    data=wagepan)
results_cre = reg_cre.fit()

# compare to RE estimates:
reg_re = plm.RandomEffects.from_formula(
    formula='lwage ~ married + union + C(t)*educ',
    data=wagepan)
results_re = reg_re.fit()

var_selection = ['married', 'union', 'C(t)[T.1982]:educ']

# print results:
table = pd.DataFrame({'b_we': round(results_we.params[var_selection], 4),
                      'b_dum': round(results_dum.params[var_selection], 4),
                      'b_cre': round(results_cre.params[var_selection], 4),
                      'b_re': round(results_re.params[var_selection], 4)})
print(f'table: \n{table}\n')

Example-HausmTest.py

import wooldridge as woo
import numpy as np
import linearmodels as plm
import scipy.stats as stats

wagepan = woo.dataWoo('wagepan')
wagepan = wagepan.set_index(['nr', 'year'], drop=False)

# estimation of FE and RE:
reg_fe = plm.PanelOLS.from_formula(formula='lwage ~ I(exper**2) + married +'
                                           'union + C(year) + EntityEffects',
                                   data=wagepan)
results_fe = reg_fe.fit()
b_fe = results_fe.params
b_fe_cov = results_fe.cov

reg_re = plm.RandomEffects.from_formula(
    formula='lwage ~ educ + black + hisp + exper + I(exper**2)'
            '+ married + union + C(year)', data=wagepan)
results_re = reg_re.fit()
b_re = results_re.params
b_re_cov = results_re.cov

# Hausman test of FE vs. RE
# (I) find overlapping coefficients:
common_coef = list(set(results_fe.params.index).intersection(results_re.params.index))

# (II) calculate differences between FE and RE:
b_diff = np.array(results_fe.params[common_coef] - results_re.params[common_coef])
df = len(b_diff)
b_diff.reshape((df, 1))
b_cov_diff = np.array(b_fe_cov.loc[common_coef, common_coef] -
                      b_re_cov.loc[common_coef, common_coef])
b_cov_diff.reshape((df, df))

# (III) calculate test statistic:
stat = abs(np.transpose(b_diff) @ np.linalg.inv(b_cov_diff) @ b_diff)
pval = 1 - stats.chi2.cdf(stat, df)

print(f'stat: {stat}\n')
print(f'pval: {pval}\n')

Example-14-2.jl

using WooldridgeDatasets, DataFrames, Econometrics

wagepan = DataFrame(wooldridge("wagepan"))

# FE model estimation:
reg = fit(EconometricModel,
    @formula(lwage ~ married + union +
                     d81 + d81 + d82 + d83 + d84 + d85 + d86 + d87 +
                     d81 & educ + d81 & educ + d82 & educ + d83 & educ +
                     d84 & educ + d85 & educ + d86 & educ + d87 & educ +
                     absorb(nr)),
    wagepan)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg")

Example-14-4.jl

using WooldridgeDatasets, GLM, DataFrames, Econometrics

wagepan = DataFrame(wooldridge("wagepan"))

# estimate different models:
reg_ols = lm(@formula(lwage ~ educ + black + hisp + exper + (exper^2) +
                              married + union + year),
    wagepan,
    contrasts=Dict(:year => DummyCoding()))

reg_re = fit(RandomEffectsEstimator,
    @formula(lwage ~ educ + black + hisp + exper + (exper^2) +
                     married + union + year),
    wagepan,
    panel=:nr,
    time=:year,
    contrasts=Dict(:year => DummyCoding()))

reg_fe = fit(EconometricModel,
    @formula(lwage ~ (exper^2) + married + union + year + absorb(nr)),
    wagepan,
    contrasts=Dict(:year => DummyCoding()))

# print results:
table_ols = coeftable(reg_ols)
println("table_ols: \n$table_ols\n")

table_re = coeftable(reg_re)
println("table_re: \n$table_re\n")

table_fe = coeftable(reg_fe)
println("table_fe: \n$table_fe")

Example-CRE.jl

using WooldridgeDatasets, GLM, DataFrames, Econometrics, Statistics

wagepan = DataFrame(wooldridge("wagepan"))

# include group specific means:
grouped_means = combine(groupby(wagepan, :nr), [:married, :union] .=> mean)
wagepan = innerjoin(grouped_means, wagepan, on=:nr)

# estimate CRE:
reg_CRE = fit(RandomEffectsEstimator,
    @formula(lwage ~ married + union + educ + black +
                     hisp + married_mean + union_mean),
    wagepan,
    panel=:nr,
    time=:year)
table_reg = coeftable(reg_CRE)
println("table_reg: \n$table_reg")

Example-Dummy-CRE.jl

using WooldridgeDatasets, GLM, DataFrames, Econometrics, Statistics

wagepan = DataFrame(wooldridge("wagepan"))

# include group specific means:
grouped_means = combine(groupby(wagepan, :nr), [:married, :union] .=> mean)
wagepan = innerjoin(grouped_means, wagepan, on=:nr)

# estimate FE parameters in 3 different ways:
reg_we = fit(EconometricModel,
    @formula(lwage ~ married + union + absorb(nr) +
                     d81 + d81 + d82 + d83 + d84 + d85 + d86 + d87 +
                     d81 & educ + d81 & educ + d82 & educ + d83 & educ +
                     d84 & educ + d85 & educ + d86 & educ + d87 & educ),
    wagepan)

reg_dum = lm(@formula(lwage ~ married + union + year * educ + nr),
    wagepan,
    contrasts=Dict(:year => DummyCoding(), :nr => DummyCoding()))


reg_cre = fit(RandomEffectsEstimator,
    @formula(lwage ~ married + union + year * educ +
                     married_mean + union_mean),
    wagepan,
    panel=:nr,
    time=:year,
    contrasts=Dict(:year => DummyCoding()))

# compare to RE estimates:
reg_re = fit(RandomEffectsEstimator,
    @formula(lwage ~ married + union + year * educ),
    wagepan,
    panel=:nr,
    time=:year,
    contrasts=Dict(:year => DummyCoding()))

# print results for married and union:
table = DataFrame(coef_names=["married", "union"],
    b_we=round.(coef(reg_we)[[2, 3]], digits=5),
    b_dum=round.(coef(reg_dum)[[2, 3]], digits=5),
    b_cre=round.(coef(reg_cre)[[2, 3]], digits=5),
    b_re=round.(coef(reg_re)[[2, 3]], digits=5))
println("table:\n $table")