HTT: Hypothesis Testing Tree

Regression Tree

data("Boston", package = "MASS")
# set the p-value of the permutation test to 0.01
htt_boston <- HTT(X = Boston[, 1:13], Y = Boston[, 14], controls = htt_control(pt = 0.01))
htt_boston
#      Hypothesis Testing Tree 
# 
# node, split, n, pvalue
# * denotes terminal node
# 
# [1] root   (n = 506, pvalue = 0)
# |  [2] rm<=7.437   (n = 476) *
# |  [3] rm>7.437   (n = 30) *
# print the split information
htt_boston$frame
#   node parent leftChild rightChild statistic  pval  split     var isleaf   n
# 1    1      0         2          3 2258.9268 0.000  7.437      rm      0 506
# 2    2      1        NA         NA    0.0000    NA <leaf>    <NA>      1 476
# 3    3      1        NA         NA   54.7354 0.045 <leaf> ptratio      1  30
#       yval
# 1 22.53281
# 2 21.11071
# 3 45.09667
# Visualize HTT
plot(htt_boston)

Classification Tree

htt_iris <- HTT(X = iris[, 1:4], Y = iris[, 5], controls = htt_control(pt = 0.01))
plot(htt_iris, layout = "tree")

# prediction 
table(predict(htt_iris), iris[, 5])
#             
#              setosa versicolor virginica
#   setosa         50          0         0
#   versicolor      0         49         5
#   virginica       0          1        45

Multivariate regression Tree

data("ENB")
set.seed(1)
train = sample(1:nrow(ENB), floor(nrow(ENB)*0.8))
train_x = ENB[train, 1:8]
train_y = ENB[train, 9:10]
test_x = ENB[-train, 1:8]
test_y = ENB[-train, 9:10]
htt_enb = HTT(train_x, train_y, controls = htt_control(pt = 0.05))
# prediction
pred = predict(htt_enb, newdata = test_x)
# MAE
colMeans(abs(pred - test_y))
#       Y1       Y2 
# 3.139541 3.067190
# MSE
colMeans(abs(pred - test_y)^2)
#       Y1       Y2 
# 19.61021 18.34629