rm(list=ls(all=TRUE)) #define vectors age<- c(40, 50, 60) wt<- c(100, 130, 150) #create a data frame MyDF <- data.frame(age, wt, NewName = c(2, 3, 4)) View(MyDF) MyDF #observations are rows and variables are columns or called attributes age <-age+3 age MyDF #note that age in data frame didn't change because it was created before the change age[2] MyDF[1,2] #reference elements in brackets, row number 1 and column number 2 MyDF[2,2] #reference elements in brackets, row number 2 and column number 2 MyDF[1,] MyDF[,2] MyDF[1,1] <- MyDF[2,2] MyNewFrame <- MyDF[,2] mean(MyDF$age) #use $ to reference one column mean(MyDF$age) age #this is different than what I am calculating #add the new age to the DF that I already have in a new attribute MyDF$NewAge <- age MyDF MyDF$newAge <-age #ANOTHER WAY IS #MyDF$age <- age MyDF$newAge <-NULL MyDF <- rbind(MyDF, c(5, 6)) MyDF MyDF[order(MyDF$age),] #sort ascending MyDF <- MyDF[order(MyDF$age),] rownames(MyDF) <- NULL #change column names colnames(MyDF) <- c("Name", "name2", "name3") #change the name of one column colnames(MyDF)[3] <- "myName" #___________ #Install a package in R and use a funtion on it to rename rows install.packages("data.table") library(data.table) ?data.table setattr(MyDF, "row.names", 1:4) setnames(MyDF, 2, "wt") #___________ #view the data set View(MyDF) #mtcars is a data set that is within R #to get the list of available data use data() data() #seek info about the data set ?mtcars #list the data mtcars #This is a data frame that has observations and variables. row values is not a variable in the data set. #structure command to get an idea about the meta data (structure) str(mtcars) # Summary summary(mtcars) MyCars <- mtcars MyCars[1,] MyCars[1,3] MyCars[,3] mean(MyCars$mpg) MySortedCars <- MyCars[order(MyCars$mpg, -MyCars$cyl),] head(MySortedCars) tail(MySortedCars) tail(MySortedCars, 1) #descending with the negative - high to low MySortedCars <- MyCars[order(-MyCars$mpg),] MeanMPG <- mean(MyCars$mpg) MySortedCars$mpg>MeanMPG MyCars[MyCars$mpg>=21 & MyCars$mpg<=24,] mtcars[mtcars$mpg>30,] MyCars[MyCars$mpg>MeanMPG,1] max(MyCars$mpg) #the actual value which.max(MyCars$mpg) #which position includes the maximum value #store them in new vectors MaxMPG <- max(MyCars$mpg) MaxIndex <- which.max(MyCars$mpg) MyCars[MaxIndex,] MyCars[which.min(MyCars$mpg),] #show the minimum row.names(MyCars[MaxIndex,]) CarName <- row.names(MyCars[MaxIndex,]) #assign the row names to a new column MyCars$CarName <- row.names(MyCars) setattr(MyCars, "row.names", c(1:32)) cat("Max mpg & car name:", MaxMPG, CarName) #which.max when there is more than one value with the max MyCars[cyl==max(cyl),]