% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/region_matrix.R
\name{region_matrix_ops}
\alias{region_matrix_ops}
\alias{region_matrix_to_ids}
\alias{region_matrix_to_count_matrix}
\alias{region_matrix_context}
\title{Get IDs and Counts for Region Matrices.}
\usage{
region_matrix_to_ids(
  corpus,
  p_attribute,
  registry = Sys.getenv("CORPUS_REGISTRY"),
  matrix
)

region_matrix_to_count_matrix(
  corpus,
  p_attribute,
  registry = Sys.getenv("CORPUS_REGISTRY"),
  matrix
)

region_matrix_context(
  corpus,
  registry = Sys.getenv("CORPUS_REGISTRY"),
  matrix,
  p_attribute,
  s_attribute,
  boundary,
  left,
  right
)
}
\arguments{
\item{corpus}{a CWB corpus}

\item{p_attribute}{a positional attribute}

\item{registry}{registry directory}

\item{matrix}{a regions matrix}

\item{s_attribute}{If not \code{NULL}, a structural attribute (length-one
\code{character} vector), typically indicating a sentence ("s").}

\item{boundary}{Structural attribute (length-one \code{character} vector) that
serves as a boundary and that shall not be transgressed.}

\item{left}{An \code{integer} value, number of strucs to move to the left.}

\item{right}{An \code{integer} value, number of strucs to move to the right.}
}
\description{
Get IDs and Counts for Region Matrices.
}
\examples{
# Scenario 1: Get full text for a subcorpus defined by regions
m <- get_region_matrix(
  corpus = "REUTERS", s_attribute = "places",
  strucs = 4L:5L, registry = get_tmp_registry()
  )
ids <- region_matrix_to_ids(
  corpus = "REUTERS", p_attribute = "word",
  registry = get_tmp_registry(), matrix = m
  )
tokenstream <- cl_id2str(
  corpus = "REUTERS", p_attribute = "word",
  registry = get_tmp_registry(), id = ids
  )
txt <- paste(tokenstream, collapse = " ")
txt

# Scenario 2: Get data.frame with counts for region matrix
y <- region_matrix_to_count_matrix(
  corpus = "REUTERS", p_attribute = "word",
  registry = get_tmp_registry(), matrix = m
  )
df <- as.data.frame(y)
colnames(df) <- c("token_id", "count")
df[["token"]] <- cl_id2str(
  "REUTERS", p_attribute = "word",
  registry = get_tmp_registry(), id = df[["token_id"]]
  )
df[order(df[["count"]], decreasing = TRUE),]
head(df)
}
