library(readr)
library(tidyverse)
library(ggplot2)
library(leaflet)
library(mgcv)
library(htmltools)
library(htmlwidgets)
setwd('C:/Users/rnussba1/OneDrive/ARK/ARK - Science/Madagascar_pratincole_publication')
d <- read_csv('./data/data.csv', col_types = cols(
source = col_character(),
date = col_date(format = "%d/%m/%Y"),
number = col_integer(),
location = col_character(),
latitude = col_double(),
longitude = col_double(),
country = col_character(),
observer = col_character(),
picture = col_logical(),
description = col_character(),
validity = col_character()
))
## Warning: 22 parsing failures.
## row col expected actual file
## 2 number an integer x './data/data.csv'
## 16 number an integer x './data/data.csv'
## 30 number an integer x './data/data.csv'
## 51 number an integer x './data/data.csv'
## 79 number an integer x './data/data.csv'
## ... ...... .......... ...... .................
## See problems(...) for more details.
# Filter
df = d %>%
filter(is.na(validity)) %>%
mutate(number = as.numeric(number)) %>%
mutate(julian = as.numeric(format(date,'%j'))) %>%
mutate(year = as.numeric(format(date,'%Y'))) %>%
mutate(source = if_else(grepl("East African Bird Report",source),"East African Bird Report",source)) %>%
mutate(period = if_else(year <2000, '<2000','>2000')) %>%
mutate(popup = iconv(paste('<b>Number</b>: ', number ,
'<br><b>Date</b>: ',date,
'<br><b>Observer</b>: ', observer,
'<br><b>Validity</b>: ', validity,
'<br><b>Picture</b>: ', picture,
'<br><b>Description</b>: ', description,
'<br><b>Source</b>: ', source
)),
"UTF-8", "UTF-8",sub='')
#View Dataset
df
## # A tibble: 245 x 17
## source date number validity location latitude longitude country observer picture description julian year period
## <chr> <date> <dbl> <chr> <chr> <dbl> <dbl> <chr> <chr> <lgl> <chr> <dbl> <dbl> <chr>
## 1 Ash &~ 1923-01-01 NA <NA> Giumbo ~ -0.25 42.6 Somalia Moltoni~ NA "year only" 1 1923 <2000
## 2 Ash &~ 1929-01-01 1 <NA> Mogadis~ 2.03 45.4 Somalia Moltoni~ NA "year only" 1 1929 <2000
## 3 Ash &~ 1934-06-01 500 <NA> Belet A~ 0.2 42.8 Somalia <NA> NA "500+, mon~ 152 1934 <2000
## 4 Ash &~ 1934-06-30 1 <NA> Belet A~ 0.2 42.8 Somalia Moltoni~ NA <NA> 181 1934 <2000
## 5 Ash &~ 1934-07-01 1 <NA> Torda -0.0833 42.7 Somalia Moltoni~ NA "month" 182 1934 <2000
## 6 Ash &~ 1939-05-25 1 <NA> Jiohar 2.78 45.5 Somalia Moltoni~ NA "at . sub-~ 145 1939 <2000
## 7 Ash &~ 1939-06-03 1 <NA> Jiohar 2.78 45.5 Somalia Moltoni~ NA <NA> 154 1939 <2000
## 8 Britt~ 1952-09-25 3 <NA> Kidugal~ -6.80 38.2 Tanzan~ <NA> NA <NA> 269 1952 <2000
## 9 Berli~ 1959-08-14 1 <NA> Kurtumu~ 1.57 44.2 Somalia <NA> NA "adult fem~ 226 1959 <2000
## 10 Ash &~ 1964-09-04 1 <NA> Giamama~ 0.0667 42.8 Somalia Roche 1~ NA <NA> 248 1964 <2000
## # ... with 235 more rows, and 3 more variables: popup <chr>, `"UTF-8"` <chr>, sub <chr>
# Map sightings
leaflet::leaflet(df) %>%
leaflet::addTiles() %>%
leaflet::addAwesomeMarkers(lng = ~longitude, lat = ~latitude, popup = ~htmlEscape(popup))
# Plot count over years for each countries.
df %>%
filter(!is.na(number)) %>%
ggplot(aes(x = year, y = number, col=country)) +
geom_point() +
geom_smooth(method = "glm", formula = y ~ x) +
facet_grid(rows = vars(country =='Kenya'), scales = "free")
# Count over years at Sabaki
df %>%
filter(location=="Sabaki") %>%
#filter(number>0) %>%
#filter(source == "Britton (1977)" | source == "Water Bird Count" | source == "eBird") %>%
filter(source %in% c("Water Bird Count","East African Bird Report","Britton (1977)")) %>%
ggplot(aes(x= year, y= number, col = number==0)) +
geom_point() +
geom_smooth(method = "glm", formula = y ~ x , method.args = list(family = "poisson")) +
ylim(-10,3600)
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
# Phenology curve at Sabaki
df %>%
filter(location=="Sabaki") %>%
#filter(number>0) %>%
#filter(source == "Britton (1977)" | source == "Water Bird Count" | source == "eBird") %>%
filter(source %in% c("Water Bird Count","East African Bird Report","Britton (1977)")) %>%
ggplot(aes(x= julian, y= number, col = period)) +
geom_point() +
geom_smooth(method = "gam", formula = y ~ s(x, k=5) , method.args = list(family = "poisson")) +
scale_x_continuous(breaks=cumsum(c(1,31,28,31,30,31,30,31,31,30,31,30)),
labels =c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec") ) +
ylim(-10,3600)
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
# Fit GAM
dff = df %>%
filter(location=="Sabaki") %>%
filter(number>0) %>%
#filter(number>0) %>%
#filter(source == "Britton (1977)" | source == "Water Bird Count" | source == "eBird") %>%
filter(source %in% c("Water Bird Count","East African Bird Report","Britton (1977)"))
gam.fitted = gam(dff$number ~ s(dff$julian, k=6) + dff$year, family = 'poisson')
# rmarkdown::render("analysis/analysis.R")