## ALL LINES BEGINNING WITH one or more "#" are "comments" and are notes for you, ## the programmer. They will not be read by R. ### STEP 1: Set PATH ### ## The first thing we need to do is set the path. ## The path is a line of text that specifies the working directory (folder) that ## our inputs are held and our outputs will be written. ## The path will be formatted differently in mac/linux compared to Windows. ## Remember, R uses the "<-" operator for assignment, ## although "=" will work in most cases as well ## To assign the path, use the function setwd() - learn more by typing ?setwd into the console ## Inside the parentheses () we can paste the path text, surrounded by quotes " " setwd("YOUR PATH GOES HERE") #In windows, your path will look something like this. You will need to convert backslash "\" to slash "/" # WINDOWS_PATH_EXAMPLE <- "C:/Program Files/R" # With the path set, we can now read the file, which is located in the working directory we have set! # This line assigns our data's filename to a variable called "FILE_NAME" FILE_NAME <- "Happiness_2019.csv" # And this line assigns the data to an R object named "df" df <- read.csv(FILE_NAME) ## Always check our data to make sure it loaded properly, and to see what we're working with head(df) # We can view individual columns by using a $, for example, this will give just the score column keep_columns <- c("Country.or.region", "Score") clean_df <- df[keep_columns] # now lets sort by score, with the highest at the top clean_df <- clean_df[ order(clean_df$Score, decreasing = TRUE) ,] # and finally, just report the top 10 - this will keep only the first ten rows clean_df <- clean_df[1:10 , ] # This cleans up the old row numbers, which will look a bit weird otherwise rownames(clean_df) <- NULL #Finally, we can write this cleaner data to our computer! write.csv(clean_df, file="top-10-happiness2019.csv") # Lets also make a simple plot of our happiness data plot(df$GDP.per.capita, df$Score) # This graph needs labels and graphical parameters, add them like this: plot(df$GDP.per.capita, df$Score, xlab = "GDP per capita (relative)", ylab="2019 Happiness Score", main = "2019 Happiness Score vs. GDP", col="green", pch = 19 ) # To save as a PDF, we can do the following: pdf(file="Score-Vs-GDP.pdf") plot(df$GDP.per.capita, df$Score, xlab = "GDP per capita (relative)", ylab="2019 Happiness Score", main = "2019 Happiness Score vs. GDP", col="green", pch = 19 ) dev.off() #And maybe lets make one for "Social support" as well: pdf(file="Score-Vs-SocSupp.pdf") plot(df$Social.support, df$Score, xlab = "Social Support", ylab="2019 Happiness Score", main = "2019 Happiness Score vs. Social Support", col="blue", pch = 19 ) dev.off() # NOTE - the data for the world happiness report comes from the "GALLUP WORLD POLL" # For methodology and more, visit https://www.gallup.com/178667/gallup-world-poll-work.aspx