Add a column to an existing data frame
- Syntax 1: By equation
- Syntax 2: R’s transform() function
- Syntax 3: R’s apply function
- Syntax 4: mapply()
- {dplyr}::mute()
Syntax 1: By equation
dataFrame$newColumn <- dataFrame$oldColumn1 + dataFrame$oldColumn2
companiesData$margin <- (companiesData$profit / companiesData$revenue) * 100
companiesData$margin <- round(companiesData$margin, 1)
Syntax 2: R’s transform() function
dataFrame <- transform(dataFrame, newColumnName = some equation)
dataFrame <- transform(dataFrame, newColumn = oldColumn1 + oldColumn2)
companiesData <- transform(companiesData, margin = (profit/revenue) * 100)
companiesData <- transform(companiesData, margin = round((profit/revenue) * 100, 1))
# pmin():mininum date for each record(row)
Data_ori <- transform(Data_ori, days_Event = pmin(Data_ori$days_20111231OD1, Data_ori$days_OD2OD1, Data_ori$days_Endo1OD1, Data_ori$days_Ext1OD1, na.rm=TRUE))
- Syntax 3: R’s apply function
dataFrame$newColumn <- apply(dataFrame, 1, function(x) { . . . } )
- The second argument
- 1 by row
- 2 by column
- third argument: function(x)
- { . . . }
- whatever you want to be doing with each item you're iterating over
apply(companiesData, 1, function(x) sum(x))
apply(companiesData[,c('revenue', 'profit')], 1, function(x) sum(x))
companiesData$sums <- apply(companiesData[,c('revenue', 'profit')], 1, function(x) sum(x))
companiesData$margin <- apply(companiesData[,c('revenue', 'profit')], 1, function(x) { (x[2]/x[1]) * 100 } )
- Syntax 4: mapply()
dataFrame$newColumn <- mapply(someFunction, dataFrame$column1, dataFrame$column2, dataFrame$column3)
- first argument of mapply()
- the name of a function, not an equation or formula
profitMargin <- function(netIncome, revenue) {
mar <- (netIncome/revenue) * 100
mar <- round(mar, 1)
}
companiesData$margin <- mapply(profitMargin, companiesData$profit, companiesData$revenue)
Or
- create an anonymous function within mapply():
companiesData$margin <- mapply(function(x, y) round((x/y) * 100, 1), companiesData$profit, companiesData$revenue)
- applying functions to vectors of data when a function takes more than one argument
mapply(someFunction, vector1, vector2, vector3)
dplyr
install.packages("dplyr")
library("dplyr")
companiesData <- mutate(companiesData, margin = round((profit/revenue) * 100, 1))