library(readr)
library(tidyverse)
library(ggplot2)
library(leaflet)
library(mgcv)
library(htmltools)
library(htmlwidgets)

setwd('C:/Users/rnussba1/OneDrive/ARK/ARK - Science/Madagascar_pratincole_publication')
d <- read_csv('./data/data.csv', col_types = cols(
  source = col_character(),
  date = col_date(format = "%d/%m/%Y"),
  number = col_integer(),
  location = col_character(),
  latitude = col_double(),
  longitude = col_double(),
  country = col_character(),
  observer = col_character(),
  picture = col_logical(),
  description = col_character(),
  validity = col_character()
))
## Warning: 22 parsing failures.
## row    col   expected actual              file
##   2 number an integer      x './data/data.csv'
##  16 number an integer      x './data/data.csv'
##  30 number an integer      x './data/data.csv'
##  51 number an integer      x './data/data.csv'
##  79 number an integer      x './data/data.csv'
## ... ...... .......... ...... .................
## See problems(...) for more details.
# Filter
df = d %>%
  filter(is.na(validity)) %>% 
  mutate(number = as.numeric(number)) %>%
  mutate(julian = as.numeric(format(date,'%j'))) %>% 
  mutate(year = as.numeric(format(date,'%Y'))) %>% 
  mutate(source = if_else(grepl("East African Bird Report",source),"East African Bird Report",source)) %>% 
  mutate(period = if_else(year <2000, '<2000','>2000')) %>% 
  mutate(popup = iconv(paste('<b>Number</b>: ', number ,
                       '<br><b>Date</b>: ',date,
                       '<br><b>Observer</b>: ', observer,
                       '<br><b>Validity</b>: ', validity,
                       '<br><b>Picture</b>: ', picture,
                       '<br><b>Description</b>: ', description,
                       '<br><b>Source</b>: ', source
                       )),
          "UTF-8", "UTF-8",sub='')

#View Dataset
df
## # A tibble: 245 x 17
##    source date       number validity location latitude longitude country observer picture description julian  year period
##    <chr>  <date>      <dbl> <chr>    <chr>       <dbl>     <dbl> <chr>   <chr>    <lgl>   <chr>        <dbl> <dbl> <chr> 
##  1 Ash &~ 1923-01-01     NA <NA>     Giumbo ~  -0.25        42.6 Somalia Moltoni~ NA      "year only"      1  1923 <2000 
##  2 Ash &~ 1929-01-01      1 <NA>     Mogadis~   2.03        45.4 Somalia Moltoni~ NA      "year only"      1  1929 <2000 
##  3 Ash &~ 1934-06-01    500 <NA>     Belet A~   0.2         42.8 Somalia <NA>     NA      "500+, mon~    152  1934 <2000 
##  4 Ash &~ 1934-06-30      1 <NA>     Belet A~   0.2         42.8 Somalia Moltoni~ NA       <NA>          181  1934 <2000 
##  5 Ash &~ 1934-07-01      1 <NA>     Torda     -0.0833      42.7 Somalia Moltoni~ NA      "month"        182  1934 <2000 
##  6 Ash &~ 1939-05-25      1 <NA>     Jiohar     2.78        45.5 Somalia Moltoni~ NA      "at . sub-~    145  1939 <2000 
##  7 Ash &~ 1939-06-03      1 <NA>     Jiohar     2.78        45.5 Somalia Moltoni~ NA       <NA>          154  1939 <2000 
##  8 Britt~ 1952-09-25      3 <NA>     Kidugal~  -6.80        38.2 Tanzan~ <NA>     NA       <NA>          269  1952 <2000 
##  9 Berli~ 1959-08-14      1 <NA>     Kurtumu~   1.57        44.2 Somalia <NA>     NA      "adult fem~    226  1959 <2000 
## 10 Ash &~ 1964-09-04      1 <NA>     Giamama~   0.0667      42.8 Somalia Roche 1~ NA       <NA>          248  1964 <2000 
## # ... with 235 more rows, and 3 more variables: popup <chr>, `"UTF-8"` <chr>, sub <chr>
# Map sightings
leaflet::leaflet(df) %>%
  leaflet::addTiles() %>%
  leaflet::addAwesomeMarkers(lng = ~longitude, lat = ~latitude, popup = ~htmlEscape(popup))
# Plot count over years for each countries. 
df %>% 
  filter(!is.na(number)) %>% 
  ggplot(aes(x = year, y = number, col=country)) +
  geom_point() +
  geom_smooth(method = "glm", formula = y ~ x) +
  facet_grid(rows = vars(country =='Kenya'), scales = "free") 

# Count over years at Sabaki
df %>% 
  filter(location=="Sabaki") %>% 
  #filter(number>0) %>% 
  #filter(source == "Britton (1977)" | source == "Water Bird Count" | source == "eBird") %>%
  filter(source %in% c("Water Bird Count","East African Bird Report","Britton (1977)")) %>% 
  ggplot(aes(x= year, y= number, col = number==0)) +
  geom_point() +
  geom_smooth(method = "glm", formula = y ~ x , method.args = list(family = "poisson")) + 
  ylim(-10,3600)
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).

# Phenology curve at Sabaki
df %>% 
  filter(location=="Sabaki") %>% 
  #filter(number>0) %>% 
  #filter(source == "Britton (1977)" | source == "Water Bird Count" | source == "eBird") %>%
  filter(source %in% c("Water Bird Count","East African Bird Report","Britton (1977)")) %>% 
  ggplot(aes(x= julian, y= number, col = period)) +
  geom_point() +
  geom_smooth(method = "gam", formula = y ~ s(x, k=5) , method.args = list(family = "poisson")) +
  scale_x_continuous(breaks=cumsum(c(1,31,28,31,30,31,30,31,31,30,31,30)), 
                     labels =c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec") ) + 
  ylim(-10,3600)
## Warning: Removed 2 rows containing non-finite values (stat_smooth).

## Warning: Removed 2 rows containing missing values (geom_point).

# Fit GAM
dff = df %>% 
  filter(location=="Sabaki") %>% 
  filter(number>0) %>% 
  #filter(number>0) %>% 
  #filter(source == "Britton (1977)" | source == "Water Bird Count" | source == "eBird") %>%
  filter(source %in% c("Water Bird Count","East African Bird Report","Britton (1977)"))

gam.fitted = gam(dff$number ~ s(dff$julian, k=6) + dff$year, family = 'poisson')

# rmarkdown::render("analysis/analysis.R")