Step 1: Go to the R Project for Statistical Computing at www.r-project.org, download and install R
Step 2: Go to RStudio at https://rstudio.com/products/rstudio/, download and install RStudio Desktop (Open Source Edition)
ALTERNATIVELY:
Use RStudio Cloud at https://rstudio.cloud
getwd()
## [1] "C:/Users/admin/Dropbox/myWebsite/yuriygdv.pmap4041spring2020.Rproj"
setwd(".")
Commenting and executing commands:
# This is a comment
# There is no capacity for multi-line comments although you can highlight multiple lines by pressing Shift+Ctl+C
# Execute command with Ctrl+Enter
Four primary data structures in R:
Vectors:
2 + 2 # Basic Math
## [1] 4
sqrt(4) # Square root of four
## [1] 2
x <- 1 # <- "gets" operator
x # Displays the values in x
## [1] 1
y <- 2
y
## [1] 2
z <- x+y
z
## [1] 3
1:10 # ":" produces integers
## [1] 1 2 3 4 5 6 7 8 9 10
c(1, 2, 3, 4) # produces floating-point numbers
## [1] 1 2 3 4
1==1 # Test for equality
## [1] TRUE
1!=1 # Test for inequality
## [1] FALSE
# <, >, <=, >= # more tests
1:10 # Prints numbers 1 to 10
## [1] 1 2 3 4 5 6 7 8 9 10
print("Hello World!")
## [1] "Hello World!"
Vectors - the R workhorse, a container for values Let’s make a simple dataset (vector):
scores1 <- c(1, 2, 3, 4, 5) # Puts the numbers 1-5 in the variable x
scores2 <- 1:5 # Puts the numbers 1-5 in the variable x
scores3 <- c(scores1, scores2, 1, 2, 3, 5)
seq(from=1, to=5, by=1)
## [1] 1 2 3 4 5
rep(1, times=5)
## [1] 1 1 1 1 1
x * 2 # Multiplies each element in x by 2
## [1] 2
x + y # Adds corresponding elements in x and y
## [1] 3
max(x)
## [1] 1
min(scores3)
## [1] 1
length(x)
## [1] 1
x[1] # chooses 1st element of the vector x
## [1] 1
x[2:3]
## [1] NA NA
v <- c(1, 15, 2020); names(v) <- c("Month", "Day", "Year")
v["Year"] # If vector elements have names - select by name
## Year
## 2020
x[-1] # exclude element 1
## numeric(0)
x[-1:-2] # exclude elements 1 through 2
## numeric(0)
v <- 1:10
v>5
## [1] FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE
v <- seq(from = 1, to = 20, by = 2)
v[v>5]
## [1] 7 9 11 13 15 17 19
subset(v,v > 5) # NA values are removed when using subset()
## [1] 7 9 11 13 15 17 19
which(v>5) # find the positions within v at which the condition occurs
## [1] 4 5 6 7 8 9 10
v <- 1:10
v
## [1] 1 2 3 4 5 6 7 8 9 10
v.dummy <- ifelse(v>5, 1,0) # creating a binary variable
a <- c(1,2,3)
b <- "1,2,3"
c <- paste("abc","de","f") # concatenate the strings
d <- strsplit(c," ") # split the string according to blanks
length(a); length(b)
## [1] 3
## [1] 1
A matrix - a rectangular array of numbers
m <- matrix(1:4, nrow = 2) # Create a 2x2 matrix
m <- matrix(1:4, ncol = 2)
m <- rbind(c(1,4),c(2,2))
Indexing/subsetting a matrix:
m[1,] # select firs row
## [1] 1 4
m[,2] # select second column
## [1] 4 2
m[2,1] # select the element in the second row, first column
## [1] 2
An R list is a container for values, but its contents can be items of different data types.
l <- list(test_scores=25, student_names="xyz")
l
## $test_scores
## [1] 25
##
## $student_names
## [1] "xyz"
The expression l$name1
refers to the name1 component in the list l:
l$name1
## NULL
str(l)
## List of 2
## $ test_scores : num 25
## $ student_names: chr "xyz"
A data frame in R is a list, each element of which is a vector of the same lenght
df <- data.frame(Month = c("Jan", "Feb", "March"), Day = c(15, 20, 22), Year = rep(2020, times = 3))
df
## Month Day Year
## 1 Jan 15 2020
## 2 Feb 20 2020
## 3 March 22 2020
v <- c(1,NA,3,4,5)
max(v)
## [1] NA
max(v,na.rm=T)
## [1] 5
v <- NA
c(v, 1)
## [1] NA 1
v0 <- NULL
c(v0, 1) # NULL values really are counted as nonexistent
## [1] 1
a <- 1:10; b <- "1 to 10"
typeof(a)
## [1] "integer"
typeof(b)
## [1] "character"
mode(a)
## [1] "numeric"
mode(b)
## [1] "character"
class(a)
## [1] "integer"
class(b)
## [1] "character"
df <- data.frame(Month = c("Jan", "Feb", "March"), Day = c(15, 20, 22), Year = rep(2020, times = 3))
typeof(df)
## [1] "list"
mode(df)
## [1] "list"
class(df)
## [1] "data.frame"
rm(x) # Remove an object from workspace
rm(a, b) # Remove more than one
rm(list = ls()) # Clear entire workspace
browseURL("https://jef.works/R-style-guide/")
help(functionname)
help(mean)
?mean
example(max)
?"<"
####R Packages
See existing packages online:
browseURL("https://cran.r-project.org/web/packages/available_packages_by_name.html")
library() # List of installed packages
search() # Shows packages that are currently loaded
Installing & loading packages:
install.packages("summarytools") # Download and install package in R
library("ggplot2") # Load a package into R
require("ggplot2") # Load a package into R
library(help = "ggplot2") # Brings up documentation on a package
vignette(package = "grid") # Brings up list of vignettes (examples) in editor window
browseVignettes(package = "grid") # Open web page with hyperlinks for vignette PDFs etc.
update.packages() # Checks for updates
detach("package:ggplot2", unload = TRUE) # detach package
remove.packages("psytabs") # Deletes it
See the available datasets online:
browseURL("http://stat.ethz.ch/R-manual/R-devel/library/datasets/html/00Index.html")
library(help = "datasets") # See the list of available datasets
data() # See the list of available datasets
try(data(package = "rpart") ) # list the data sets in the rpart package
?datasets::cars # Information about a particular dataset in the package
library(datasets) # Load the library with the built-in datasets
require(datasets) # Load the library with the built-in datasets
data(cars) # Load dataset cars
str(cars) # See the structure of the dataset cars
data(Cars93, package="MASS")
txt.file <- read.table("Data/file.txt", header = TRUE)
txt.file <- read.table("Data/file.txt", header = TRUE, sep = "\t")
csv.file <- read.csv("Data/file.csv", header = TRUE)
save(myData, file = "myData.RData") # save the object to a file
load(file = "myData.RData") # load previously saved data from .RData file
Download this source file: