#Reading in zip file that contians several files #Setup up directory and temporary file zipdir<-tempfile() temp<-tempfile() #Create directory dir.create(zipdir) #Downlaod the file, happens to be a zip file thus must be unpacked download.file("https://www.ssa.gov/oact/babynames/state/namesbystate.zip", temp, mode="wb") #Unzip the file unzip(temp, exdir=zipdir) #Get list of file contained in this directory, only bring in *.TXT files files <- list.files(zipdir, pattern="\\.TXT$") #Initialize a data.frame namedata<-data.frame() #Loop to read in files and concatenate them together via rbind for(i in 1:length(files)){ filepath <- file.path(zipdir,files[i]) temp <- read.csv(filepath,header=F) namedata<-rbind(namedata, temp) } #Unlink connection to directory unlink(zipdir) #Load the dplyr package library(dplyr) #Join the tables namedata<-left_join(namedata,StateRegions) #Obtain all rows that match Name == "Chris" Chris<-filter(namedata,Name == "Chris") #Summarize Chris by Gender, Year, Region by_variable <- group_by(Chris,Gender,Year,StateRegion) output<-summarize(by_variable,Total=sum(Count)) library(lattice) xyplot(Total~Year|StateRegion, data=output,groups=Gender, type="l") #Summarize Gender, Year, Region by_variable <- group_by(namedata,Gender,Year,StateRegion) output2<-summarize(by_variable,Total2=sum(Count)) #Join Chris to output2 output3<-left_join(output,output2) #Get percent for Chris output3<-mutate(output3,Percent = Total/Total2) #Creating the plot xyplot(Percent~Year|StateRegion, data=output3,groups=Gender, type="l") #Obtain all rows that match Name == "Francis Francis<-filter(namedata,Name == "Francis") #Summarize Francis by Gender, Year, Region by_variable <- group_by(Francis,Gender,Year,StateRegion) output<-summarize(by_variable,Total=sum(Count)) #Joining Francis with output2, i.e. total counts output3<-left_join(output,output2) output3<-mutate(output3,Percent = Total/Total2) #Create plot xyplot(Percent~Year|StateRegion, data=output3,groups=Gender, type="l") #Automating this process LookupName ="William" #Obtain all rows that match Name data<-filter(namedata,Name == LookupName) #Summarize Chris by Gender, Year, Region by_variable <- group_by(data,Gender,Year,StateRegion) output<-summarize(by_variable,Total=sum(Count)) output3<-left_join(output,output2) output3<-mutate(output3,Percent = Total/Total2) xyplot(Percent~Year|StateRegion, data=output3,groups=Gender, type="l") #Obtain all rows that match Name == "William" William<-filter(namedata,Name == "William") #Summarize William by Year, Region by_variable <- group_by(William, Year,StateRegion) output<-summarize(by_variable,Total=sum(Count)) #Summarize Gender, Year, Region by_variable <- group_by(namedata,Year,StateRegion) output2<-summarize(by_variable,Total2=sum(Count)) #Join Chris to output2 output3<-left_join(output,output2) #Get percent for Chris output3<-mutate(output3,Percent = Total/Total2) library(latticeExtra) mypanel = function(x, y){ panel.xyarea(x,y, fill=TRUE) } #Creating the plot xyplot(Percent~Year|StateRegion, data=output3, panel=mypanel)