## ----echo = FALSE, message = FALSE--------------------------------------------
library(dplyr); library(flextable); library(knitr); library(officer);  library(tidyr)
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
options(tibble.print_min = 4L, tibble.print_max = 4L)

## ----echo = FALSE, results = 'asis'-------------------------------------------
twobytwo.df <- data.frame("exp" = c("Exposure+","Exposure-","Total"), "dpos" = c("a","c","a + c"), "dneg" = c("b","c","b + c"), "total" = c("a + b","c + d","a + b + c + d"))

# Create a header key data frame:
hkey.df <- data.frame(col_keys = c("exp","dpos","dneg","total"),
  h1 = c("", "Outcome+", "Outcome-", "Total"), stringsAsFactors = FALSE)

# Create table:
caption.t <- flextable::as_paragraph(as_chunk("Table 1: A 2 × 2 contingency table.",
    props = fp_text(font.size = 11, font.family = "Arial", bold = TRUE)))

border_h = fp_border(color = "black", width = 2)

ft <- flextable(twobytwo.df) %>%
  width(j = 1, width = 2.00) %>%
  width(j = 2, width = 2.00) %>%
  width(j = 3, width = 2.00) %>%
  width(j = 4, width = 4.00) %>%
  
  set_header_df(mapping = hkey.df, key = "col_keys") %>%
  
  bg(bg = "grey80", part = "header") %>%
  hline_top(border = border_h, part = "all" ) %>%
  align(align = "left", part = "all") %>%
  set_caption(caption = caption.t)
ft

## ----echo = FALSE, results = 'asis'-------------------------------------------

irr.df <- data.frame(
  "exp" = c("Exposure+","Exposure-","Total"), 
  "dpos" = c("a","c","a + c"), 
  "dneg" = c("b","c","b + c"), 
  "total" = c("a + b","c + d","a + b + c + d"), 
  risk = c("R[D+|E+] = a \U00F7 (a + b)", "R[D+|E-] = c \U00F7 (c + d)", "R[D+|E±] = (a + c) \U00F7 (a + b + c + d)"))

# Create a header key data frame:
hkey.df <- data.frame(col_keys = c("exp","dpos","dneg","total","risk"),
  h1 = c("", "Outcome+", "Outcome-", "Total", "Risk"), stringsAsFactors = FALSE)

# Create table:
caption.t <- flextable::as_paragraph(as_chunk("Table 2: A 2 × 2 contingency table with incidence risks calculated for the exposure positive, the exposure negative and the entire study population.",
    props = fp_text(font.size = 11, font.family = "Arial", bold = TRUE)))

border_h = fp_border(color = "black", width = 2)

ft <- flextable(irr.df) %>%
  width(j = 1, width = 2.00) %>%
  width(j = 2, width = 2.00) %>%
  width(j = 3, width = 2.00) %>%
  width(j = 4, width = 2.00) %>%
  width(j = 5, width = 4.00) %>%
  
  set_header_df(mapping = hkey.df, key = "col_keys") %>%
  
  bg(bg = "grey80", part = "header") %>%
  hline_top(border = border_h, part = "all" ) %>%
  align(align = "left", part = "all") %>%
  set_caption(caption = caption.t)

vfixa <- c("[D+|E+]","[D+|E-]","[D+|E±]")
vfixb <- c(" = a \U00F7 (a + b)"," = c \U00F7 (c + d)"," = (a + c) \U00F7 (a + b + c + d)")

for (i in 1:3) {
  ft <- compose(
    ft, 
    j = "risk", 
    i = i,
    value = as_paragraph(
      as_chunk("R", props = fp_text()),
      as_chunk(vfixa[i], props = fp_text(vertical.align = "subscript")),
      as_chunk(vfixb[i], props = fp_text())
    )
  )
}

ft

## ----echo = FALSE, results = 'asis'-------------------------------------------
orcohort.df <- data.frame(
  "exp" = c("Exposure+","Exposure-","Total"), 
  "dpos" = c("a","c","a + c"), "dneg" = c("b","d","b + d"), 
  "total" = c("a + b","c + d","a + b + c + d"), 
  odds = c("OE+ = a \U00F7 b","OE- = c \U00F7 d", "OT = (a + c) \U00F7 (b + d)"))

# Create a header key data frame:
hkey.df <- data.frame(col_keys = c("exp","dpos","dneg","total","odds"),
  h1 = c("", "Outcome+", "Outcome-", "Total", "Odds"), stringsAsFactors = FALSE)

# Create table:
caption.t <- flextable::as_paragraph(as_chunk("Table 3: A 2 × 2 contigency table with incidence odds calculated for the exposure positive, the exposure negative and the entire study population.",
    props = fp_text(font.size = 11, font.family = "Arial", bold = TRUE)))

border_h = fp_border(color = "black", width = 2)

ft <- flextable(orcohort.df) %>%
  width(j = 1, width = 2.00) %>%
  width(j = 2, width = 2.00) %>%
  width(j = 3, width = 2.00) %>%
  width(j = 4, width = 2.00) %>%
  width(j = 5, width = 4.00) %>%
  
  set_header_df(mapping = hkey.df, key = "col_keys") %>%
  
  bg(bg = "grey80", part = "header") %>%
  hline_top(border = border_h, part = "all" ) %>%
  align(align = "left", part = "all") %>%
  set_caption(caption = caption.t)

vfixa <- c("[D+|E+]","[D+|E-]","[D+|E±]")
vfixb <- c(" = a \U00F7 b"," = c \U00F7 d"," = (a + c) \U00F7 (b + d)")

for (i in 1:3) {
  ft <- compose(
    ft, 
    j = "odds", 
    i = i,
    value = as_paragraph(
      as_chunk("O", props = fp_text()),
      as_chunk(vfixa[i], props = fp_text(vertical.align = "subscript")),
      as_chunk(vfixb[i], props = fp_text())
    )
  )
}
ft

## ----echo = FALSE, results = 'asis'-------------------------------------------
orcc.df <- data.frame(
  "out" = c("Outcome+","Outcome-","Total"), 
  "epos" = c("a","c","a + c"), 
  "eneg" = c("b","d","b + d"), 
  "total" = c("a + b","c + d","a + b + c + d"), 
  odds = c("OE+ = a \U00F7 b","OE- = c \U00F7 d", "OT = (a + c) \U00F7 (b + d)"))

# Create a header key data frame:
hkey.df <- data.frame(col_keys = c("out","epos","eneg","total","odds"),
  h1 = c("", "Exposure+", "Exposure-", "Total", "Odds"), stringsAsFactors = FALSE)

# Create table:
caption.t <- flextable::as_paragraph(as_chunk("Table 4: A 2 × 2 contingency table with incidence odds calculated for the outcome positive, the outcome negative and the entire study population.",
    props = fp_text(font.size = 11, font.family = "Arial", bold = TRUE)))

border_h = fp_border(color = "black", width = 2)

ft <- flextable(orcc.df) %>%
  width(j = 1, width = 2.00) %>%
  width(j = 2, width = 2.00) %>%
  width(j = 3, width = 2.00) %>%
  width(j = 4, width = 2.00) %>%
  width(j = 5, width = 4.00) %>%
  
  set_header_df(mapping = hkey.df, key = "col_keys") %>%
  
  bg(bg = "grey80", part = "header") %>%
  hline_top(border = border_h, part = "all" ) %>%
  align(align = "left", part = "all") %>%
  set_caption(caption = caption.t)

vfixa <- c("[E+|D+]","[E+|D-]","[E+|D±]")
vfixb <- c(" = a \U00F7 b"," = c \U00F7 d"," = (a + c) \U00F7 (b + d)")

for (i in 1:3) {
  ft <- compose(
    ft, 
    j = "odds", 
    i = i,
    value = as_paragraph(
      as_chunk("O", props = fp_text()),
      as_chunk(vfixa[i], props = fp_text(vertical.align = "subscript")),
      as_chunk(vfixb[i], props = fp_text())
    )
  )
}
ft

## -----------------------------------------------------------------------------
dat.v01 <- c(13,2163,5,3349); dat.v01

# View the data in the usual 2 by 2 table format:
matrix(dat.v01, nrow = 2, byrow = TRUE)

## ----message = FALSE----------------------------------------------------------
library(epiR)

epi.2by2(dat = dat.v01, method = "cohort.count", elab = "Dry food", olab = "FLUTD", digits = 2, conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns")

## ----message = FALSE----------------------------------------------------------
epi.2by2(dat = dat.v01, method = "cohort.count", elab = "Dry food", olab = "FLUTD", digits = 2, conf.level = 0.95, units = 100, interpret = TRUE, outcome = "as.columns")

## ----message = FALSE----------------------------------------------------------
library(MASS)

# Load and view the data:
dat.df02 <- birthwt; head(dat.df02)

## ----message = FALSE----------------------------------------------------------
dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dnn = c("Smoke", "Low BW")); dat.tab02

## ----message = FALSE----------------------------------------------------------
dat.tab02 <- table(dat.df02$smoke, dat.df02$low, dnn = c("Smoke", "Low BW")); dat.tab02
dat.tab02 <- dat.tab02[2:1,2:1]; dat.tab02

## ----message = FALSE----------------------------------------------------------
# Variables low, smoke and race as factors. Put an 'f' in front of the variable names to remind you that they're factors:

dat.df02$flow <- factor(dat.df02$low, levels = c(1,0))
dat.df02$fsmoke <- factor(dat.df02$smoke, levels = c(1,0))
dat.df02$frace <- factor(dat.df02$race, levels = c(1,2,3))

dat.tab02 <- table(dat.df02$fsmoke, dat.df02$flow, dnn = c("Smoke", "Low BW")); dat.tab02

## ----message = FALSE----------------------------------------------------------
dat.epi02 <- epi.2by2(dat = dat.tab02, method = "cohort.count", elab = "Smoke", olab = "Low BW", digits = 2, conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns")
dat.epi02

## ----message = FALSE----------------------------------------------------------
names(dat.epi02$massoc.detail)

## ----message = FALSE----------------------------------------------------------
dat.epi02$massoc.detail$OR.strata.wald
# Wald confidence intervals: 2.0 (95% CI 1.1 to 3.8)

dat.epi02$massoc.detail$OR.strata.score
# Score confidence intervals: 2.0 (95% CI 1.1 to 3.8)


## ----message = FALSE----------------------------------------------------------
library(dplyr); library(tidyr)

dat.df03 <- birthwt; head(dat.df03)

# Here we set the factor levels and tabulate the data in a single call using pipe operators:
dat.tab03 <- dat.df03 %>%
  mutate(flow = factor(low, levels = c(1,0), labels = c("yes","no"))) %>%
  mutate(fsmoke = factor(smoke, levels = c(1,0), labels = c("yes","no"))) %>%
  group_by(fsmoke, flow) %>%
  summarise(n = n()) 

# View the data:
dat.tab03

# View the data in conventional 2 by 2 table format:
pivot_wider(dat.tab03, id_cols = c(fsmoke), 
   names_from = flow, values_from = n)

## ----message = FALSE----------------------------------------------------------
dat.epi03 <- epi.2by2(dat = dat.tab03, method = "cohort.count", elab = "Smoke", olab = "Low BW", digits = 2, conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns")
dat.epi03

## ----message = FALSE----------------------------------------------------------
dat.df04 <- birthwt; head(dat.df04)

dat.df04$flow <- factor(dat.df04$low, levels = c(1,0))

dat.df04$fage <- ifelse(dat.df04$age > 23, 0,1)
dat.df04$fage <- factor(dat.df04$fage, levels = c(1,0))
dat.df04$fsmoke <- factor(dat.df04$smoke, levels = c(1,0))

# Race is coded 1 = white, 2 = black and 3 = other. Set white as the reference level:
dat.df04$frace <- ifelse(dat.df04$race == 1, 0, 1)
dat.df04$frace <- factor(dat.df04$frace, levels = c(1,0))

# Empty vectors to collect results:
rfactor <- ref <- or.est <- or.low <- or.upp <- c() 

# The candidate risk factors are in columns 12 to 14 of data frame dat.df04:
for(i in 12:14){
  tdat.tab04 <- table(dat.df04[,i], dat.df04$flow)
  tdat.epi04 <- epi.2by2(dat = tdat.tab04, method = "cohort.count", 
   digits = 2, conf.level = 0.95, units = 100, interpret = FALSE, outcome = "as.columns")
  
  trfactor <- as.character(names(dat.df04)[i]) 
  rfactor <- c(rfactor, trfactor) 
  
  tref <- as.character(paste("Reference: ", trfactor, " - ", levels(dat.df04[,i])[2], sep = ""))
  ref <- c(ref, tref)
  
  tor.est <- as.numeric(tdat.epi04$massoc.detail$OR.strata.wald[1])
  or.est <- c(or.est, tor.est)
  
  tor.low <- as.numeric(tdat.epi04$massoc.detail$OR.strata.wald[2])
  or.low <- c(or.low, tor.low)
  
  tor.upp <- as.numeric(tdat.epi04$massoc.detail$OR.strata.wald[3])
  or.upp <- c(or.upp, tor.upp)
}

gdat.df04 <- data.frame(ybrk = 1:3, ylab = rfactor, ref, or.est, or.low, or.upp)
gdat.df04

## ----odds_ratios, echo = TRUE, message = FALSE, fig.align = "center", out.width = "80%", fig.show = "hide", fig.cap = "Figure 6: Risk factors for low birth weight babies. Error bar plot showing the point estimate of the odds ratio and its 95% confidence interval for maternal age, smoking and race."----
library(ggplot2); library(scales)

xbrk <- seq(from = -2, to = 2, by = 1)
xlab <- 2^xbrk

ggplot(data = gdat.df04, aes(x = log2(or.est), y = ybrk)) +
  theme_bw() +
  geom_point() + 
  geom_errorbar(aes(xmin = log2(or.low), xmax = log2(or.upp), width = 0.2)) + 
  scale_x_continuous(breaks = xbrk, labels = xlab, limits = range(xbrk), 
   name = "Odds ratio") + 
  scale_y_continuous(breaks = gdat.df04$ybrk, labels = gdat.df04$ylab, name = "Risk factor") + 
  geom_vline(xintercept = log2(1), linetype = "dashed") + 
  annotate("text", x = log2(0.25), y = gdat.df04$ybrk, label = gdat.df04$ref, hjust = 0, size = 3) +
  coord_fixed(ratio = 0.75 / 1) + 
  theme(axis.title.y = element_text(vjust = 0))

## ----message = FALSE----------------------------------------------------------
dat.df05 <- birthwt; head(dat.df05)

dat.df05$flow <- factor(dat.df05$low, levels = c(1,0))

dat.df05$fsmoke <- factor(dat.df05$smoke, levels = c(1,0))
dat.df05$frace <- factor(dat.df05$race, levels = c(1,2,3))

dat.tab05 <- table(dat.df05$fsmoke, dat.df05$flow, dat.df05$frace, 
   dnn = c("Smoke", "Low BW", "Race")); dat.tab05

## ----message = FALSE----------------------------------------------------------
dat.epi05 <- epi.2by2(dat = dat.tab05, method = "cohort.count", elab = "smoke",
   olab = "Low BW", digits = 2, conf.level = 0.95, units = 100, 
   interpret = FALSE, outcome = "as.columns")
dat.epi05

## ----message = FALSE----------------------------------------------------------
dat.df06 <- birthwt

dat.tab06 <- dat.df06 %>%
  mutate(flow = factor(low, levels = c(1,0), labels = c("yes","no"))) %>%
  mutate(fsmoke = factor(smoke, levels = c(1,0), labels = c("yes","no"))) %>%
  mutate(frace = factor(race)) %>%
  group_by(frace, fsmoke, flow) %>%
  summarise(n = n()) 
dat.tab06

# View the data in conventional 2 by 2 table format:
pivot_wider(dat.tab06, id_cols = c(frace, fsmoke), 
   names_from = flow, values_from = n)

## ----message = FALSE----------------------------------------------------------
dat.epi06 <- epi.2by2(dat = dat.tab06, method = "cohort.count", 
   elab = "Smoke", olab = "Low BW", digits = 2, conf.level = 0.95, 
   units = 100, interpret = FALSE, outcome = "as.columns")

dat.epi06

## ----mantel_haenszel, echo = FALSE, message = FALSE, fig.show = "hide", fig.align = "center", out.width = "80%"----

xbrk <- seq(from = -5, to = 5, by = 1)
xlab <- 2^xbrk

nstrata <- dat.epi06$n.strata
ybrk <- c(1:nstrata, max(nstrata) + 1)
ylab <- c("M-H", paste("Strata ", 1:nstrata, sep = ""))

or.est <- c(dat.epi06$massoc.detail$OR.mh$est, 
   dat.epi06$massoc.detail$OR.strata.cfield$est)
or.low <- c(dat.epi06$massoc.detail$OR.mh$lower, 
   dat.epi06$massoc.detail$OR.strata.cfield$lower)
or.upp <- c(dat.epi06$massoc.detail$OR.mh$upper, 
   dat.epi06$massoc.detail$OR.strata.cfield$upper)
gdat.df06 <- data.frame(ybrk, ylab, or.est, or.low, or.upp)

ggplot(data = gdat.df06, aes(x = log2(or.est), y = ybrk)) +
  theme_bw() +
  geom_point() + 
  geom_errorbar(aes(xmin = log2(or.upp), xmax = log2(or.low), width = 0.2)) + 
  scale_x_continuous(breaks = xbrk, labels = xlab, limits = range(xbrk), 
   name = "Odds ratio") + 
  scale_y_continuous(breaks = gdat.df06$ybrk, labels = gdat.df06$ylab, name = "Risk factor") + 
  geom_vline(xintercept = log2(1), linetype = "dashed") + 
  coord_fixed(ratio = 0.75 / 1) + 
  theme(axis.title.y = element_text(vjust = 0))

