München 2018: Kalter Frühling, warmer Herbst

Edward Tufte ist so etwas wie der Papst der Datenvisualisierung. Das dargestellte Temperaturprofil ist einer Grafik von Tufte angelehnt. Das Diagramm zeigt den Temperaturverlauf des Jahres 2018 für München.
Die Daten für das Diagramm sind der Datei http://academic.udayton.edu/kissock/http/Weather/gsod95-current/DLMUNICH.txt entnommen. Sie enthält für jeden Tag seit dem 1. Januar 1995 die durchschnittliche Tagestemperatur in Fahrenheit. Die Daten erfordern jedoch etwas Nachbearbeitung, da sie Fehler und Doppler enthalten. Das Skript wurde auf der Seite https://www.petitessen.net/tufte/ bereits ausführlich beschrieben, ich gebe es hier wieder. Das Ergebnis: 2018 hatte in München im Februar und März 9 kälteste Tage seit 1995, und im Sommer und Herbst 19 wärmste Tage seit 1995. 2018 war insgesamt ein sehr warmes Jahr, wobei sich der Jahrhundertsommer temperaturmäßig eher in anderen Teilen Deutschlands niedergeschlagen hat.

# Preprocessing & summarizing data
library(dplyr)
library(tidyr)
# Visualizating development
library(ggplot2)
heuer <- 2019
# "http://academic.udayton.edu/kissock/http/Weather/gsod95-current/DLMUNICH.txt" %>%
 "E:/Source/R-3.3.2/Projekte/daten/DLMUNICH.txt" %>%
  read.table() %>% data.frame %>% tbl_df -> DAY
names(DAY) <- c("Month", "Day", "Year", "Temp")
DAY$Temp <- (DAY$Temp - 32) * 5 / 9
DAY <- DAY %>% filter(Month != 2 | Day != 29)
# create dataframe that represents 1995-2013 historical data
Past <- DAY %>%
  group_by(Year, Month) %>%
  arrange(Day) %>%
  ungroup() %>%
  group_by(Year) %>%
  mutate(newDay = seq(1, length(Day))) %>%   # label days as 1:365 (will represent x-axis)         
  ungroup() %>%
  filter(Temp > -70 & Year < heuer) %>%     # filter out missing data (identified with '-99' value) & current year data
  group_by(newDay) %>%
  mutate(upper = max(Temp), # identify max value for each day
         lower = min(Temp), # identify min value for each day
         avg = mean(Temp),  # calculate mean value for each day
         se = sd(Temp)/sqrt(length(Temp))) %>%  # calculate standard error of mean
  mutate(avg_upper = avg+(2.101*se),  # calculate 95% CI for mean
         avg_lower = avg-(2.101*se)) %>%  # calculate 95% CI for mean
  ungroup()

# create dataframe that represents current year data
Present <- DAY %>%
  group_by(Year, Month) %>%
  arrange(Day) %>%
  ungroup() %>%
  group_by(Year) %>%
  mutate(newDay = seq(1, length(Day))) %>%  # create matching x-axis as historical data
  ungroup() %>%
  filter(Temp > -70 & Year == heuer)  # filter out missing data & select current year data

# create dataframe that represents the lowest temp for each day for the historical data
PastLows <- Past %>%
  group_by(newDay) %>%
  summarise(Pastlow = min(Temp)) # identify lowest temp for each day from 1995-2013

# create dataframe that identifies the days in 2014 in which the temps were lower than all previous 19 years
PresentLows <- Present %>%
  left_join(PastLows) %>%  # merge historical lows to current year low data
  mutate(record = ifelse(Temp<Pastlow, "Y", "N")) %>% # identifies if current year was record low
  filter(record == "Y")  # filter for days that represent current year record lows

# create dataframe that represents the highest temp for each day for the historical data
PastHighs <- Past %>%
  group_by(newDay) %>%
  summarise(Pasthigh = max(Temp))  # identify highest temp for each day from 1995-2013

# create dataframe that identifies the days in 2014 in which the temps were higher than all previous 19 years
PresentHighs <- Present %>%
  left_join(PastHighs) %>%  # merge historical highs to current year low data
  mutate(record = ifelse(Temp>Pasthigh, "Y", "N")) %>% # identifies if current year was record high
  filter(record == "Y")  # filter for days that represent current year record highs

# create dataframe that represents mean temperature for each month
PastMeans <- Past %>%
  group_by(newDay) %>%
  summarise(Pastmean = mean(Temp)) # identify lowest temp for each day from 1995-2013

# function to turn y-axis labels into degree formatted values
dgr_fmt <- function(x, ...) {
  parse(text = paste(x, "*degree", sep = ""))
}

# create y-axis variable
a <- dgr_fmt(seq(-20,40, by = 5))

# some calculations
blue_dots <- nrow(PresentLows)
red_dots <- nrow(PresentHighs)
dtemp <- round(mean(Present$Temp), digits = 2)
DAY <- DAY %>% filter(Temp > -70)
gruppe <- group_by(DAY, Month)
s <- summarize(gruppe, Temp = mean(Temp))
s$rTemp <- round(s$Temp, digits = 2)
gruppe <- group_by(DAY, Year)
y <- summarize(gruppe, Temp = mean(Temp))
gruppe <- group_by(Present, Month)
t <- summarize(gruppe, Temp = mean(Temp))
t$rTemp <- round(t$Temp, digits = 2)
v <- merge(s, t, by = "rTemp", all = TRUE)
v <- v[order(v$Month.y),]
rownames(v) <- NULL
v$Temp.y <- round(v$Temp.y, digits = 2)

p <- ggplot(Past, aes(newDay, Temp)) +
  theme(plot.background = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank(),
        axis.ticks = element_blank(),
        #axis.text = element_blank(),  
        axis.title = element_blank()) +
  geom_linerange(Past, mapping = aes(x = newDay, ymin = lower, ymax = upper), colour = "wheat2", alpha = .1)

p <- p + 
  geom_linerange(Past, mapping = aes(x = newDay, ymin = avg_lower, ymax = avg_upper), colour = "wheat4")

p <- p + 
  geom_line(Present, mapping = aes(x = newDay, y = Temp, group = 1)) +
  geom_vline(xintercept = 0, colour = "wheat4", linetype = 1, size = 1) + 
  geom_line(data=PastMeans, aes(x = newDay, y = Pastmean), size = 0.2, colour = "red")

p <- p + 
  geom_hline(yintercept = -20, colour = "white", linetype = 1) +
  geom_hline(yintercept = -15, colour = "white", linetype = 1) +
  geom_hline(yintercept = -10, colour = "white", linetype = 1) +
  geom_hline(yintercept = -5, colour = "white", linetype = 1) +
  geom_hline(yintercept = 0, colour = "white", linetype = 1) +
  geom_hline(yintercept = 5, colour = "white", linetype = 1) +
  geom_hline(yintercept = 10, colour = "white", linetype = 1) +
  geom_hline(yintercept = 15, colour = "white", linetype = 1) +
  geom_hline(yintercept = 20, colour = "white", linetype = 1) +
  geom_hline(yintercept = 25, colour = "white", linetype = 1) +
  geom_hline(yintercept = 30, colour = "white", linetype = 1) +
  geom_hline(yintercept = 35, colour = "white", linetype = 1) +
  geom_hline(yintercept = 40, colour = "white", linetype = 1)

p <- p + 
  geom_vline(xintercept = 31, colour = "wheat4", linetype = 3, size = .5) +
  geom_vline(xintercept = 59, colour = "wheat4", linetype = 3, size = .5) +
  geom_vline(xintercept = 90, colour = "wheat4", linetype = 3, size = .5) +
  geom_vline(xintercept = 120, colour = "wheat4", linetype = 3, size = .5) +
  geom_vline(xintercept = 151, colour = "wheat4", linetype = 3, size = .5) +
  geom_vline(xintercept = 181, colour = "wheat4", linetype = 3, size = .5) +
  geom_vline(xintercept = 212, colour = "wheat4", linetype = 3, size = .5) +
  geom_vline(xintercept = 243, colour = "wheat4", linetype = 3, size = .5) +
  geom_vline(xintercept = 273, colour = "wheat4", linetype = 3, size = .5) +
  geom_vline(xintercept = 304, colour = "wheat4", linetype = 3, size = .5) +
  geom_vline(xintercept = 334, colour = "wheat4", linetype = 3, size = .5) +
  geom_vline(xintercept = 365, colour = "wheat4", linetype = 3, size = .5) 

p <- p +
  coord_cartesian(ylim = c(-20,40)) +
  scale_y_continuous(breaks = seq(-20,40, by = 5), labels = a) +
  scale_x_continuous(expand = c(0, 0), 
                     breaks = c(15,45,75,105,135,165,195,228,258,288,320,350),
                     labels = c("January", "February", "March", "April",
                                "May", "June", "July", "August", "September",
                                "October", "November", "December"))
p <- p +
  geom_point(data = PresentLows, aes(x = newDay, y = Temp), colour = "blue3") +
  geom_point(data = PresentHighs, aes(x = newDay, y = Temp), colour = "firebrick3")

p <- p +
  ggtitle(paste("Munich Weather", heuer)) +
  theme(plot.title = element_text(face = "bold",hjust = 0,vjust = 0.8,colour = "#3C3C3C",size = 20)) +
  annotate("text", x = 1, y = 40 , label  =  "Temperature", size = 4, fontface = "bold", hjust= 0)

p <- p +
  annotate("text", x = 2, y = 38, 
           label = "Data represents average daily temperatures back to January 1, 1995", 
           size = 3, colour = "gray30", hjust = 0)

p <- p +
  annotate("text", x = 2, y = 36, 
           label = paste("Average temperature for the year was", dtemp, "degrees celsius"),
           size = 3, colour = "gray30", hjust = 0)

p <- p +
  annotate("text", x = 33, y = -15, label = paste("We had", toString(blue_dots), "days that were the"),
           size = 3, colour = "blue3", hjust = 0) +
  annotate("text", x = 33, y = -17, label = "coldest since 1995", size = 3, colour = "blue3", hjust = 0) +
  annotate("text", x = 290, y = 36, label = paste("We had", toString(red_dots), "days that were the"),
           size = 3, colour = "firebrick3", hjust = 0) +
  annotate("text", x = 290, y = 34, label = "hottest since 1995", size = 3, colour = "firebrick3", hjust = 0)

p <- p +
  annotate("segment", x = 181, xend = 181, y = -15, yend = -5, colour = "wheat2", size = 3) +
  annotate("segment", x = 181, xend = 181, y = -9, yend = -11, colour = "wheat4", size = 3) +
  annotate("text", x = 198, y = -10, label = "NORMAL RANGE", size = 2, colour = "gray30") +
  annotate("text", x = 197, y = -5, label = "RECORD HIGH", size = 2, colour = "gray30") +
  annotate("text", x = 197, y = -14, label = "RECORD LOW", size = 2, colour = "gray30") +
  annotate("text", x = 15, y = -22, label = toString(s$rTemp[1]), size = 2.5, colour = "gray30") +
  annotate("text", x = 45, y = -22, label = toString(s$rTemp[2]), size = 2.5, colour = "gray30") +
  annotate("text", x = 75, y = -22, label = toString(s$rTemp[3]), size = 2.5, colour = "gray30") +
  annotate("text", x = 105, y = -22, label = toString(s$rTemp[4]), size = 2.5, colour = "gray30") +
  annotate("text", x = 135, y = -22, label = toString(s$rTemp[5]), size = 2.5, colour = "gray30") +
  annotate("text", x = 165, y = -22, label = toString(s$rTemp[6]), size = 2.5, colour = "gray30") +
  annotate("text", x = 195, y = -22, label = toString(s$rTemp[7]), size = 2.5, colour = "gray30") +
  annotate("text", x = 228, y = -22, label = toString(s$rTemp[8]), size = 2.5, colour = "gray30") +
  annotate("text", x = 258, y = -22, label = toString(s$rTemp[9]), size = 2.5, colour = "gray30") +
  annotate("text", x = 288, y = -22, label = toString(s$rTemp[10]), size = 2.5, colour = "gray30") +
  annotate("text", x = 320, y = -22, label = toString(s$rTemp[11]), size = 2.5, colour = "gray30") +
  annotate("text", x = 350, y = -22, label = toString(s$rTemp[12]), size = 2.5, colour = "gray30") + 
  annotate("text", x = 15, y = -20, label = toString(v$Temp.y[1]), size = 2.5, colour = "red") +
  annotate("text", x = 45, y = -20, label = toString(v$Temp.y[2]), size = 2.5, colour = "red") +
  annotate("text", x = 75, y = -20, label = toString(v$Temp.y[3]), size = 2.5, colour = "red") +
  annotate("text", x = 105, y = -20, label = toString(v$Temp.y[4]), size = 2.5, colour = "red") +
  annotate("text", x = 135, y = -20, label = toString(v$Temp.y[5]), size = 2.5, colour = "red") +
  annotate("text", x = 165, y = -20, label = toString(v$Temp.y[6]), size = 2.5, colour = "red") +
  annotate("text", x = 195, y = -20, label = toString(v$Temp.y[7]), size = 2.5, colour = "red") +
  annotate("text", x = 228, y = -20, label = toString(v$Temp.y[8]), size = 2.5, colour = "red") +
  annotate("text", x = 258, y = -20, label = toString(v$Temp.y[9]), size = 2.5, colour = "red") +
  annotate("text", x = 288, y = -20, label = toString(v$Temp.y[10]), size = 2.5, colour = "red") +
  annotate("text", x = 320, y = -20, label = toString(v$Temp.y[11]), size = 2.5, colour = "red") +
  annotate("text", x = 350, y = -20, label = toString(v$Temp.y[12]), size = 2.5, colour = "red") +
  annotate("text", x = 280, y = -15, label = "present monthly average", size = 3, colour = "red", hjust = 0) +
  annotate("text", x = 280, y = -17, label = "historical monthly average", size = 3, colour = "gray30", hjust = 0)

print(p)

Temperaturdiagremm nach Tufte

Klicke auf das Bild für eine vergrößerte Darstellung

Teile diesen Beitrag!