Week 1. Introduction to R

R Primer

RStudio Cloud

Use the following link to access today’s workshop on RStudio Cloud.

RStudio Interface

Console,
Source,
Environment,
Viewer

Setting work directory

getwd()

## [1] "C:/Users/admin/Dropbox/myWebsite/yuriygdv.pmap4041spring2020.Rproj"

setwd(".")

Commenting and executing commands:

# This is a comment
# There is no capacity for multi-line comments although you can highlight multiple lines by pressing Shift+Ctl+C
# Execute command with Ctrl+Enter

Data Structures

Four primary data structures in R:

Vectors
Matrices
Data Frames
Lists

Vectors:

Numeric
Integer
Character
Factor
Logical

Scalars

2 + 2   # Basic Math

## [1] 4

sqrt(4)  # Square root of four

## [1] 2

Variables & assignment operator

x <- 1      # <- "gets" operator
x           # Displays the values in x

## [1] 1

y <- 2
y

## [1] 2

z <- x+y
z

## [1] 3

Other Operators

1:10    # ":" produces integers

##  [1]  1  2  3  4  5  6  7  8  9 10

c(1, 2, 3, 4)     # produces floating-point numbers

## [1] 1 2 3 4

1==1    # Test for equality

## [1] TRUE

1!=1    # Test for inequality

## [1] FALSE

# <, >, <=, >=   # more tests

1:10    # Prints numbers 1 to 10

##  [1]  1  2  3  4  5  6  7  8  9 10

print("Hello World!")

## [1] "Hello World!"

Vectors

Vectors - the R workhorse, a container for values Let’s make a simple dataset (vector):

scores1 <- c(1, 2, 3, 4, 5)   # Puts the numbers 1-5 in the variable x
scores2 <- 1:5                # Puts the numbers 1-5 in the variable x
scores3 <- c(scores1, scores2, 1, 2, 3, 5)
seq(from=1, to=5, by=1)

## [1] 1 2 3 4 5

rep(1, times=5)

## [1] 1 1 1 1 1

Vector Math

x * 2  # Multiplies each element in x by 2

## [1] 2

x + y  # Adds corresponding elements in x and y

## [1] 3

Applying functions to vectors:

max(x)

## [1] 1

min(scores3)

## [1] 1

length(x)

## [1] 1

Indexing/subsetting vectors:

x[1]  # chooses 1st element of the vector x

## [1] 1

x[2:3]

## [1] NA NA

v <- c(1, 15, 2020); names(v) <- c("Month", "Day", "Year")
v["Year"]   # If vector elements have names - select by name

## Year 
## 2020

Negative subscripts mean that we want to exclude the given elements in our output.

x[-1]    # exclude element 1

## numeric(0)

x[-1:-2] # exclude elements 1 through 2

## numeric(0)

Logical operations

v <- 1:10
v>5

##  [1] FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE

Filtering

v <- seq(from = 1, to = 20, by = 2)
v[v>5]

## [1]  7  9 11 13 15 17 19

subset(v,v > 5)  # NA values are removed when using subset()

## [1]  7  9 11 13 15 17 19

which(v>5)       # find the positions within v at which the condition occurs

## [1]  4  5  6  7  8  9 10

Transforming

v <- 1:10
v

##  [1]  1  2  3  4  5  6  7  8  9 10

v.dummy <- ifelse(v>5, 1,0)  # creating a binary variable

Characters

a <- c(1,2,3)
b <- "1,2,3"
c <- paste("abc","de","f") # concatenate the strings
d <- strsplit(c," ") # split the string according to blanks

length(a); length(b)

## [1] 3

## [1] 1

Matrices

A matrix - a rectangular array of numbers

m <- matrix(1:4, nrow = 2)    # Create a 2x2 matrix
m <- matrix(1:4, ncol = 2)
m <- rbind(c(1,4),c(2,2))

Indexing/subsetting a matrix:

m[1,]      # select firs row

## [1] 1 4

m[,2]      # select second column

## [1] 4 2

m[2,1]     # select the element in the second row, first column

## [1] 2

Lists

An R list is a container for values, but its contents can be items of different data types.

l <- list(test_scores=25, student_names="xyz")
l

## $test_scores
## [1] 25
## 
## $student_names
## [1] "xyz"

The expression l$name1 refers to the name1 component in the list l:

l$name1

## NULL

str(l)

## List of 2
##  $ test_scores  : num 25
##  $ student_names: chr "xyz"

Data Frames

A data frame in R is a list, each element of which is a vector of the same lenght

df <- data.frame(Month = c("Jan", "Feb", "March"), Day = c(15, 20, 22), Year = rep(2020, times = 3))
df

##   Month Day Year
## 1   Jan  15 2020
## 2   Feb  20 2020
## 3 March  22 2020

NA and NULL Values

v <- c(1,NA,3,4,5)
max(v)

## [1] NA

max(v,na.rm=T)

## [1] 5

v <- NA
c(v, 1)

## [1] NA  1

v0 <- NULL
c(v0, 1)   # NULL values really are counted as nonexistent

## [1] 1

Checking Data Types

a <- 1:10; b <- "1 to 10"

typeof(a)

## [1] "integer"

typeof(b)

## [1] "character"

mode(a)

## [1] "numeric"

mode(b)

## [1] "character"

class(a)

## [1] "integer"

class(b)

## [1] "character"

df <- data.frame(Month = c("Jan", "Feb", "March"), Day = c(15, 20, 22), Year = rep(2020, times = 3))
typeof(df)

## [1] "list"

mode(df)

## [1] "list"

class(df)

## [1] "data.frame"

Clean Up

rm(x)  # Remove an object from workspace
rm(a, b)  # Remove more than one
rm(list = ls())  # Clear entire workspace

R-Style Guide

browseURL("https://jef.works/R-style-guide/")

Help

help(functionname) 
help(mean)
?mean
example(max) 
?"<"

####R Packages

See existing packages online:

browseURL("https://cran.r-project.org/web/packages/available_packages_by_name.html")

library()  # List of installed packages
search()   # Shows packages that are currently loaded

Installing & loading packages:

install.packages("summarytools")  # Download and install package in R
library("ggplot2")  # Load a package into R
require("ggplot2")  # Load a package into R

library(help = "ggplot2")  # Brings up documentation on a package
vignette(package = "grid")  # Brings up list of vignettes (examples) in editor window
browseVignettes(package = "grid")  # Open web page with hyperlinks for vignette PDFs etc.

update.packages()  # Checks for updates

detach("package:ggplot2", unload = TRUE)  # detach package

remove.packages("psytabs")   # Deletes it

Built-in Datasets

See the available datasets online:

browseURL("http://stat.ethz.ch/R-manual/R-devel/library/datasets/html/00Index.html")

library(help = "datasets")     # See the list of available datasets
data()                         # See the list of available datasets
try(data(package = "rpart") )  # list the data sets in the rpart package
?datasets::cars                # Information about a particular dataset in the package

library(datasets)  # Load the library with the built-in datasets
require(datasets)  # Load the library with the built-in datasets

data(cars)   # Load dataset cars
str(cars)    # See the structure of the dataset cars

data(Cars93, package="MASS")

Import Data

txt.file <- read.table("Data/file.txt", header = TRUE)           
txt.file <- read.table("Data/file.txt", header = TRUE, sep = "\t")
csv.file <- read.csv("Data/file.csv", header = TRUE)

Saving Data

save(myData, file = "myData.RData")  # save the object to a file 
load(file = "myData.RData")          # load previously saved data from .RData file

Download this source file: