% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/count_if.R
\name{count_if}
\alias{\%in_col\%}
\alias{\%in_row\%}
\alias{apply_col_if}
\alias{apply_row_if}
\alias{count_col_if}
\alias{count_if}
\alias{count_row_if}
\alias{max_col_if}
\alias{max_if}
\alias{max_row_if}
\alias{mean_col_if}
\alias{mean_if}
\alias{mean_row_if}
\alias{median_col_if}
\alias{median_if}
\alias{median_row_if}
\alias{min_col_if}
\alias{min_if}
\alias{min_row_if}
\alias{sd_col_if}
\alias{sd_if}
\alias{sd_row_if}
\alias{sum_col_if}
\alias{sum_if}
\alias{sum_row_if}
\title{Count/sum/average/other functions on values that meet a criterion}
\usage{
count_if(criterion = NULL, ...)

count_row_if(criterion = NULL, ...)

count_col_if(criterion = NULL, ...)

criterion \%in_row\% x

criterion \%in_col\% x

sum_if(criterion = NULL, ..., data = NULL)

sum_row_if(criterion = NULL, ..., data = NULL)

sum_col_if(criterion = NULL, ..., data = NULL)

mean_if(criterion = NULL, ..., data = NULL)

mean_row_if(criterion = NULL, ..., data = NULL)

mean_col_if(criterion = NULL, ..., data = NULL)

sd_if(criterion = NULL, ..., data = NULL)

sd_row_if(criterion = NULL, ..., data = NULL)

sd_col_if(criterion = NULL, ..., data = NULL)

median_if(criterion = NULL, ..., data = NULL)

median_row_if(criterion = NULL, ..., data = NULL)

median_col_if(criterion = NULL, ..., data = NULL)

max_if(criterion = NULL, ..., data = NULL)

max_row_if(criterion = NULL, ..., data = NULL)

max_col_if(criterion = NULL, ..., data = NULL)

min_if(criterion = NULL, ..., data = NULL)

min_row_if(criterion = NULL, ..., data = NULL)

min_col_if(criterion = NULL, ..., data = NULL)

apply_row_if(fun, criterion = NULL, ..., data = NULL)

apply_col_if(fun, criterion = NULL, ..., data = NULL)
}
\arguments{
\item{criterion}{Vector with counted values, logical vector/matrix or
function. See details and examples.}

\item{...}{Data on which criterion will be applied. Vector, matrix,
data.frame, list. Shorter arguments will be recycled.}

\item{x}{Counted values or criterion for counting. Vector, matrix, data.frame,
list, function. Shorter columns in list will be recycled.}

\item{data}{Data on which function will be applied. Doesn't applicable to 
\code{count_*_if} functions. If omitted then function will be applied on
the ... argument.}

\item{fun}{Custom function that will be applied based on criterion.}
}
\value{
\code{*_if} return single value (vector of length 1). 
\code{*_row_if} returns vector for each row of supplied arguments.
\code{*_col_if} returns vector for each column of supplied arguments.
\code{\%in_row\%}/\code{\%in_col\%} return logical vector - indicator of
presence of criterion in each row/column.
}
\description{
These functions calculate count/sum/average/etc on values that meet a 
criterion that you specify. \code{apply_if_*} apply custom functions. There
are different flavors of these functions: \code{*_if} work on entire
dataset/matrix/vector, \code{*_row_if} works on each row and \code{*_col_if}
works on each column.
}
\details{
Possible type for criterion argument:
\itemize{
\item{vector/single value}{ All values in \code{...} which equal to elements of
vector in criteria will be used as function argument.}
\item{function}{ Values for which function gives TRUE will be used as 
function argument. There are some special functions for convenience (e. g.
\code{gt(5)} is equivalent ">5" in spreadsheet) - see \link{criteria}.}
\item{logical vector/matrix/data.frame}{ Values for which element of
criterion equals to TRUE will be used as function argument. Logical vector
will be recycled across all columns of \code{...}\code{data}. If criteria is
logical matrix/data.frame then column from this matrix/data.frame will be
used for corresponding column/element of \code{...}\code{data}. Note that
this kind of criterion doesn't use \code{...} so \code{...} can be used
instead of \code{data} argument.}}

If criterion is missing (or is NULL) then non-NA's values will be
  used for function.

\code{count*} and \code{\%in*\%} never returns NA's. Other functions remove
NA's before calculations (as \code{na.rm = TRUE} in base R functions).

Function criterion should return logical vector of same size and shape as its
argument. This function will be applied to each column of supplied data and
TRUE results will be used. There is asymmetrical behavior in \code{*_row_if}
and \code{*_col_if} for function criterion: in both cases function criterion
will be applied columnwise.
}
\examples{
set.seed(123)
dfs = as.data.frame(
       matrix(sample(c(1:10,NA),30,replace = TRUE),10)
)

result  = modify(dfs, {
             # count 8
             exact = count_row_if(8, V1, V2, V3)
             # count values greater than 8
             greater = count_row_if(gt(8), V1, V2, V3)
             # count integer values between 5 and 8, e. g. 5, 6, 7, 8
             integer_range = count_row_if(5:8, V1, V2, V3)
             # count values between 5 and 8 
             range = count_row_if(5 \%thru\% 8, V1, V2, V3)
             # count NA 
             na = count_row_if(is.na, V1, V2, V3)
             # count not-NA 
             not_na = count_row_if(, V1, V2, V3) 
             # are there any 5 in each row?
             has_five = 5 \%in_row\% cbind(V1, V2, V3)  
         })  
result
 
mean_row_if(6, dfs$V1, data = dfs)
median_row_if(gt(2), dfs$V1, dfs$V2, dfs$V3) 
sd_row_if(5 \%thru\% 8, dfs$V1, dfs$V2, dfs$V3)
 
if_na(dfs) = 5 # replace NA 

# custom apply
apply_col_if(prod, gt(2), dfs$V1, data = dfs) # product of all elements by columns
apply_row_if(prod, gt(2), dfs$V1, data = dfs) # product of all elements by rows
 
# Examples borrowed from Microsoft Excel help for COUNTIF
df1 = data.frame(
    a=c("apples",   "oranges",     "peaches",     "apples"),
    b = c(32, 54, 75, 86)
)

count_if("apples",df1$a) # 2

count_if("apples",df1) # 2

with(df1,count_if("apples",a,b)) # 2

count_if(gt(55),df1$b) # greater than 55 = 2

count_if(neq(75),df1$b) # not equal 75 = 3

count_if(gte(32),df1$b) # greater than or equal 32 = 4

count_if(gt(32) & lt(86),df1$b) # 2

# count only integer values between 33 and 85
count_if(33:85,df1$b) # 2

# values with letters
count_if(regex("^[A-z]+$"),df1) # 4

# values that started on 'a'
count_if(regex("^a"),df1) # 2

# count_row_if
count_row_if(regex("^a"),df1) # c(1,0,0,1)

'apples' \%in_row\% df1  # c(TRUE,FALSE,FALSE,TRUE)

# Some of Microsoft Excel examples for SUMIF/AVERAGEIF/etc 
dfs = read.csv(
    text = "
    property_value,commission,data
    100000,7000,250000
    200000,14000,	
    300000,21000,	
    400000,28000,"
)

# Sum of commision for property value greater than 160000
with(dfs, sum_if(gt(160000), property_value, data = commission)) # 63000
    
# Sum of property value greater than 160000
with(dfs, sum_if(gt(160000), property_value)) # 900000

# Sum of commision for property value equals to 300000
with(dfs, sum_if(300000, property_value, data = commission)) # 21000
    
# Sum of commision for property value greater than first value of data
with(dfs, sum_if(gt(data[1]), property_value, data = commission)) # 49000
    

dfs = data.frame(
    category = c("Vegetables", "Vegetables", "Fruits", "", "Vegetables", "Fruits"),
    food = c("Tomatoes", "Celery", "Oranges", "Butter", "Carrots", "Apples"),
    sales = c(2300, 5500, 800, 400, 4200, 1200),
    stringsAsFactors = FALSE
)

# Sum of sales for Fruits
with(dfs, sum_if("Fruits", category, data = sales)) # 2000

# Sum of sales for Vegetables    
with(dfs, sum_if("Vegetables", category, data = sales)) # 12000

# Sum of sales for food which is ending on 'es' 
with(dfs, sum_if(perl("es$"), food, data = sales)) # 4300

# Sum of sales for empty category
with(dfs, sum_if("", category, data = sales))  # 400


dfs = read.csv(
    text = "
    property_value,commission,data
    100000,7000,250000
    200000,14000,	
    300000,21000,	
    400000,28000,"
)

# Commision average for comission less than 23000
with(dfs, mean_if(lt(23000), commission)) # 14000


# Property value average for property value less than 95000
with(dfs, mean_if(lt(95000), property_value)) #  NaN

# Commision average for property value greater than 250000
with(dfs, mean_if(gt(250000), property_value, data = commission)) # 24500


dfs = data.frame(
    region = c("East", "West", "North", "South (New Office)",  "MidWest"),
    profits = c(45678, 23789, -4789, 0, 9678),
    stringsAsFactors = FALSE
)


# Mean profits for 'west' regions
with(dfs, mean_if(fixed("West"), region, data = profits)) # 16733.5


# Mean profits for regions wich doesn't contain New Office
with(dfs, mean_if(!fixed("(New Office)"), region, data = profits))  # 18589


dfs = read.csv(
    text = '
    grade,weight 
    89,1
    93,2
    96,2
    85,3
    91,1
    88,1'
    ,stringsAsFactors = FALSE
)

# Minimum gade for weight equals to 1
with(dfs, min_if(1, weight, data = grade)) # 88


# Maximum gade for weight equals to 1
with(dfs, max_if(1, weight, data = grade)) #91


# Example with offset
dfs = read.csv(
    text = '
    weight,grade 
    10,b
    11,a
    100,a
    111,b
    1,a
    1,a'
    ,stringsAsFactors = FALSE
)

with(dfs, min_if("a", grade[2:5], data = weight[1:4])) # 10


}

