Example code for reading and plotting data from:
Global Historical Climatology Network - Daily (GHCN-Daily)
http://gis.ncdc.noaa.gov/all-records/catalog/search/resource/details.page?id=gov.noaa.ncdc:C00861
Load the packages used in the code
require(dplyr, quietly = TRUE)
require(utils, quietly = TRUE)
require(LaF, quietly = TRUE)
require(tidyr, quietly = TRUE)
require(ggplot2, quietly = TRUE)
require(ggfortify, quietly = TRUE)
Specific information about location and format of the datasets:
http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt
Read the stations data
# IV. FORMAT OF "ghcnd-stations.txt"
#
# ------------------------------
# Variable Columns Type
# ------------------------------
# ID 1-11 Character
# LATITUDE 13-20 Real
# LONGITUDE 22-30 Real
# ELEVATION 32-37 Real
# STATE 39-40 Character
# NAME 42-71 Character
# GSN FLAG 73-75 Character
# HCN/CRN FLAG 77-79 Character
# WMO ID 81-85 Character
# ------------------------------
download.file("ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt", destfile = "stations.txt")
stations_width<-c(12, 9, 10, 7, 3, 31)
stations_colname<-c("id", "latitude", "longitude", "elevation", "state", "name")
laf <- laf_open_fwf("stations.txt", column_widths = stations_width,
column_types=rep("character",6),
column_names = stations_colname)
stations<-laf[,]
The stations from Spain are selected
sp_stations<-filter(stations, grepl("SP00", substr(stations$id,1,4)))
sp_stations$url<-paste0("ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/all/", trimws(sp_stations$id), ".dly")
Once the url’s for station datasets are known the files are downloaded and loaded into a data frame
# ------------------------------
# Variable Columns Type
# ------------------------------
# ID 1-11 Character
# YEAR 12-15 Integer
# MONTH 16-17 Integer
# ELEMENT 18-21 Character
# VALUE1 22-26 Integer
# MFLAG1 27-27 Character
# QFLAG1 28-28 Character
# SFLAG1 29-29 Character
# VALUE2 30-34 Integer
# MFLAG2 35-35 Character
# QFLAG2 36-36 Character
# SFLAG2 37-37 Character
# . . .
# . . .
# . . .
# VALUE31 262-266 Integer
# MFLAG31 267-267 Character
# QFLAG31 268-268 Character
# SFLAG31 269-269 Character
# ------------------------------
col_width<-c(11, 4, 2, 4, rep(c(5, 1, 1, 1), 31))
col_types<-c("character", "integer", "integer", "character", rep(c("integer", "character", "character", "character"),31))
col_names<-c("id", "year", "month", "element")
for(i in 1:31) col_names<-c(col_names, paste0("value", i), paste0("mflag", i), paste0("qflag", i), paste0("sflag", i))
for (i in 1:nrow(sp_stations)){
download.file(sp_stations$url[i], destfile = as.character(sp_stations$id[i]))
laf <- laf_open_fwf(as.character(sp_stations$id[i]), column_widths = col_width,
column_types=col_types,
column_names = col_names)
if(i == 1){
df<-laf[,]
}else{
df<-rbind(df, laf[,])
}
}
The data is arranged into a structure easier to use with ggplot
dff<-select(df, 1:4, starts_with("value"))
dffg<-gather(dff, key = day, value = value, 5:35)
dffg$value<-ifelse(dffg$value == -9999,NA,dffg$value)
dffg$day<-as.integer(substr(dffg$day,6,7))
dffg<-mutate(dffg, date=as.Date(paste(year,month,day), "%Y%m%d"))
dffg<-arrange(dffg, id, element, date)
Factor the id
with the stations name and save TMAX
and TMIN
for later use.
dffg$id<-factor(dffg$id)
levels(dffg$id) = sp_stations$name
TData<-filter(dffg, element %in% c("TMAX", "TMIN"))
save(TData, file="TData.rda")
The monthly averages are calculated
pru_gby<-group_by(dffg, id, element, month, year)
pru<-summarize(pru_gby, mval=mean(value, na.rm=TRUE ))
pru$month<-factor(pru$month)
levels(pru$month)<-month.name
Plot the monthly averages of TMAX and TMIN elements
pruTMAX<-filter(pru, element == "TMAX")
pruTMIN<-filter(pru, element == "TMIN")
p<-ggplot()
p<-p+geom_line(data=pruTMAX, aes(x=month, y=mval, group = interaction(year,element)), alpha=0.5, color="gray")
p<-p+geom_smooth(data=pruTMAX, aes(x=month, y=mval, group=1), size=1.5, se=FALSE, color="red")
p<-p+geom_line(data=pruTMIN, aes(x=month, y=mval, group = interaction(year,element)), alpha=0.5, color="gray")
p<-p+geom_smooth(data=pruTMIN, aes(x=month, y=mval, group=1), size=1.5, se=FALSE, color="blue")
p<-p+facet_wrap(~ id, ncol=4)
p<-p+ylab("Temp °C x 10")
p<-p+ggtitle("MONTHLY TMAX / TMIN")
p<-p+theme(axis.title.y=element_text(hjust = 0.5, size = 12),
axis.title.x=element_blank(),
plot.title = element_text(hjust = 0.5, size = 16),
axis.text.x = element_text(size=8, angle = 45, hjust = 1))
suppressMessages(print(p))
Decompose the TMAX time series for one station
pru<-filter(dffg, element == "TMAX", id == sp_stations$name[13], !is.na(value))
pru<-select(pru, date, value)
pruts<-ts(pru$value, frequency = 365, start=c(as.numeric(format(pru$date[1], "%Y"), 1)))
prucomp<-decompose(pruts, type = "additive")
and plot the result
p<-ggplot(pru, aes(x=date, y=value))
p<-p+geom_line(size=0.1, color="red")
p<-p+xlab("")
p<-p+ylab("Temp °C x 10")
p<-p+ggtitle(paste("TMAX of station ", sp_stations$name[13]))
print(p)
p<-autoplot(prucomp, ts.colour = "blue")
p<-p + xlab("Years")
p<-p+ylab("Temp °C x 10")
p<-p + ggtitle(paste("TMAX decomposition of station ", sp_stations$name[13]))
print(p)
I hope it will be useful!