% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/parse_source.R, R/parse_sources.R
\name{parse_source}
\alias{parse_source}
\alias{parse_sources}
\alias{print.rockParsedSource}
\alias{print.rockParsedSources}
\alias{plot.rockParsedSources}
\title{Parsing sources}
\usage{
parse_source(text, file, codeRegexes = c(codes =
  "\\\\[\\\\[([a-zA-Z0-9._>-]+)\\\\]\\\\]"), idRegexes = c(caseId =
  "\\\\[\\\\[cid=([a-zA-Z0-9._-]+)\\\\]\\\\]", stanzaId =
  "\\\\[\\\\[sid=([a-zA-Z0-9._-]+)\\\\]\\\\]"),
  sectionRegexes = c(paragraphs = "---paragraph-break---", secondary =
  "---<[a-zA-Z0-9]?>---"), autoGenerateIds = c("stanzaId"),
  persistentIds = c("caseId"), noCodes = "^uid:|^dct:|^ci:",
  inductiveCodingHierarchyMarker = ">",
  metadataContainers = c("metadata"), codesContainers = c("codes",
  "dct"), delimiterRegEx = "^---$", ignoreRegex = "^#",
  ignoreOddDelimiters = FALSE, encoding = "UTF-8",
  postponeDeductiveTreeBuilding = FALSE, silent = TRUE)

\method{print}{rockParsedSource}(x, prefix = "### ", ...)

parse_sources(path, extension = "rock|dct", regex = NULL,
  recursive = TRUE, codeRegexes = c(codes =
  "\\\\[\\\\[([a-zA-Z0-9._>-]+)\\\\]\\\\]"), idRegexes = c(caseId =
  "\\\\[\\\\[cid=([a-zA-Z0-9._-]+)\\\\]\\\\]", stanzaId =
  "\\\\[\\\\[sid=([a-zA-Z0-9._-]+)\\\\]\\\\]"),
  sectionRegexes = c(paragraphs = "---paragraph-break---", secondary =
  "---<[a-zA-Z0-9]?>---"), autoGenerateIds = c("stanzaId"),
  persistentIds = c("caseId"), noCodes = "^uid:|^dct:|^ci:",
  inductiveCodingHierarchyMarker = ">",
  metadataContainers = c("metadata"), codesContainers = c("codes",
  "dct"), delimiterRegEx = "^---$", ignoreRegex = "^#",
  ignoreOddDelimiters = FALSE, encoding = "UTF-8", silent = TRUE)

\method{print}{rockParsedSources}(x, prefix = "### ", ...)

\method{plot}{rockParsedSources}(x, ...)
}
\arguments{
\item{text, file}{As \code{text} or \code{file}, you can specify a \code{file} to read with
encoding \code{encoding}, which will then be read using \code{\link[base:readLines]{base::readLines()}}. If the
argument is named \code{text}, whether it is the path to an existing file is checked
first, and if it is, that file is read. If the argument is named \code{file}, and it
does not point to an existing file, an error is produced (useful if calling
from other functions). A \code{text} should be a character vector where every
element is a line of the original source (like provided by \code{\link[base:readLines]{base::readLines()}});
although if a character vector of one element \emph{and} including at least one
newline character (\code{\\n}) is provided as \code{text}, it is split at the newline
characters using \code{\link[base:strsplit]{base::strsplit()}}. Basically, this behavior means that the
first argument can be either a character vector or the path to a file; and if
you're specifying a file and you want to be certain that an error is thrown if
it doesn't exist, make sure to name it \code{file}.}

\item{codeRegexes, idRegexes, sectionRegexes}{These are named character vectors with one
or more regular expressions. For \code{codeRegexes}, these specify how to extract the codes
(that were used to code the sources). For \code{idRegexes}, these specify how to extract the
different types of identifiers. For \code{sectionRegexes}, these specify how to extract the
different types of sections. The \code{codeRegexes} and \code{idRegexes} must each contain one
capturing group to capture the codes and identifiers, respectively.}

\item{autoGenerateIds}{The names of the \code{idRegexes} that, if missing, should receive
autogenerated identifiers (which consist of 'autogenerated_' followed by an incrementing
number).}

\item{persistentIds}{The names of the \code{idRegexes} for the identifiers which, once
attached to an utterance, should be attached to all following utterances as well (until
a new identifier with the same name is encountered, after which that identifier will be
attached to all following utterances, etc).}

\item{noCodes}{This regular expression is matched with all codes after they have been
extracted using the \code{codeRegexes} regular expression (i.e. they're matched against the
codes themselves without, for example, the square brackets in the default code regex). Any
codes matching this \code{noCodes} regular expression will be \strong{ignored}, i.e., removed from the
list of codes.}

\item{inductiveCodingHierarchyMarker}{For inductive coding, this marker is used to indicate
hierarchical relationships between codes. The code at the left hand side of this marker will
be considered the parent code of the code on the right hand side. More than two levels
can be specified in one code (for example, if the \code{inductiveCodingHierarchyMarker} is '>',
the code \code{grandparent>child>grandchild} would indicate codes at three levels.}

\item{metadataContainers}{The name of YAML fragments containing metadata (i.e. attributes
about cases).}

\item{codesContainers}{The name of YAML fragments containing (parts of) deductive coding
trees.}

\item{delimiterRegEx}{The regular expression that is used to extract the YAML fragments.}

\item{ignoreRegex}{The regular expression that is used to delete lines before any other
processing. This can be used to enable adding comments to sources, which are then ignored
during analysis.}

\item{ignoreOddDelimiters}{If an odd number of YAML delimiters is encountered, whether this
should result in an error (\code{FALSE}) or just be silently ignored (\code{TRUE}).}

\item{encoding}{The encoding of the file to read (in \code{file}).}

\item{postponeDeductiveTreeBuilding}{Whether to imediately try to build the deductive
tree(s) based on the information in this file (\code{FALSE}) or whether to skip that. Skipping
this is useful if the full tree information is distributed over multiple files (in which case
you should probably call \code{parse_sources} instead of \code{parse_source}).}

\item{silent}{Whether to provide (\code{FALSE}) or suppress (\code{TRUE}) more detailed progress updates.}

\item{x}{The object to print.}

\item{prefix}{The prefix to use before the 'headings' of the printed result.}

\item{...}{Any additional arguments are passed on to the default print method.}

\item{path}{The path containing the files to read.}

\item{extension}{The extension of the files to read; files with other extensions will
be ignored. Multiple extensions can be separated by a pipe (\code{|}).}

\item{regex}{Instead of specifing an extension, it's also possible to specify a regular
expression; only files matching this regular expression are read. If specified, \code{regex}
takes precedece over \code{extension},}

\item{recursive}{Whether to also process subdirectories (\code{TRUE})
or not (\code{FALSE}).}
}
\description{
These function parse one (\code{parse_source}) or more (\code{parse_sources}) sources and the
contained identifiers, sections, and codes.
}
\examples{
### Get path to example source
examplePath <-
  system.file("extdata", package="rock");

### Get a path to one example file
exampleFile <-
  file.path(examplePath, "example-1.rock");

### Parse single example source
parsedExample <- rock::parse_source(exampleFile);

### Show inductive code tree for the codes
### extracted with the regular expression specified with
### the name 'codes':
parsedExample$inductiveCodeTrees$codes;

### If you want `rock` to be chatty, use:
parsedExample <- rock::parse_source(exampleFile,
                                    silent=FALSE);

### Parse all example sources in that directory
parsedExamples <- rock::parse_sources(examplePath);

### Show combined inductive code tree for the codes
### extracted with the regular expression specified with
### the name 'codes':
parsedExamples$inductiveCodeTrees$codes;

}
