Analysis of sightings of Red-necked Phalarope in East Africa

Raphaƫl Nussbaumer

25/02/2021

Load and display all data

dm <- read.csv('sightings.csv') %>% 
  mutate( 
    count = ifelse(count=="x",countClass,count),      
    count = as.numeric(count),
    countClass = factor(countClass),
    date= as.Date(date, format="%d/%m/%Y"),
    dayOfYear = as.numeric(strftime(date,format="%j")),
    year = as.numeric(strftime(date,format="%Y") ) 
  )

dm %>% 
  dplyr::select(-c(countClass,validity,latitude,longitude,dayOfYear,year)) %>% 
  mutate(ebird = paste0('<a href="',ebird,'#renpha">',str_replace(ebird,'https://ebird.org/checklist/',''),'</a>')) %>% 
  datatable(filter = 'top', rownames = FALSE, escape = FALSE)

Maps of the sightings

pal <- colorNumeric(hsv(1 - ((1:365) + (365/4))%%365/365, s = 0.8, v = 0.8), domain=c(1,365))
pal2 <- colorFactor(palette="Set1" ,domain=dm$cost)

# create popup
dm %>%
  mutate(popup = iconv(paste('<b>Number</b>: ', count ,
                             '<br><b>Location</b>: ', location,
                             '<br><b>Date</b>: ',date,
                             '<br><b>Observer</b>: ', observer,
                             '<br><b>Description</b>: ', description,
                             '<br><b>Source</b>: ', source,
                             ifelse(source=='eBird',paste0(' - <a href="',ebird,'#renpha">',str_replace(ebird,'https://ebird.org/checklist/',''),'</a>'),''),
                             ifelse(picture,'<br><b>Photo</b>','')
  )
  ),  "UTF-8", "UTF-8", sub='') %>% 
  filter(!is.na(latitude)) %>% 
  #filter(coast !="inland") %>% 
  arrange( desc(count) ) %>% 
  leaflet(width = "100%") %>%
  #addTiles() %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addCircleMarkers(lng = ~longitude, lat = ~latitude, popup = ~popup, 
                   radius = ~ifelse(is.na(count),7,7+log(count)*3),
                   fillColor = ~pal(dayOfYear),
                   stroke = 0,
                   opacity= 0.4,
                   weight = 2,
                   color = ~pal2(coast), 
                   fillOpacity = .2,
                   # clusterOptions = markerClusterOptions()#maxClusterRadius = 1)
  ) %>%
  addLegend("bottomright", pal = pal,
            title="Date",
            values = ~seq(1,365, length.out = 12),
            bins = seq(1,365, length.out = 12),
            labFormat = function(type = "numeric", cuts){
              format(ISOdate(2004,1,1)+days(round(cuts)),"%B")
            },
            opacity = 1
  )
dmt <- dm %>%
  filter(!is.na(latitude)) %>% 
  arrange( desc(count) ) %>% 
  mutate(
    # size = ifelse(is.na(count), 1, log(count))
    size = 1+3*log(as.numeric(as.character(countClass))),
    size = ifelse(is.na(size),1,size)
  )

spdf <- SpatialPointsDataFrame(coords = cbind( dmt$longitude,dmt$latitude), data = dmt)

tm_shape(World, bbox = st_bbox(spdf)+cbind(-1,-1,1,1)) +
  tm_polygons() +
  tm_shape(spdf) +
  tm_symbols(size="size") 

Counts vs date

Cumulative distribution of the counts

(dm %>% 
   filter(!is.na(count)) %>% 
   arrange(count) %>% 
   ggplot( aes(count)) + 
   stat_ecdf(geom = "step") +
   scale_x_log10(name="Count") +
   theme_light() +
   scale_y_continuous(name="Cumulative PDF")
) %>% ggplotly()

Counts per period and coast

dm %>% 
  filter(count<3000) %>%  # remove the count to avoid baising the mean
  mutate(coast = ifelse(coast!="inland","coast","insland")) %>% 
  group_by(coast) %>% 
  summarize(
    nb_sightings = n(), 
    mean_count = mean(count, na.rm = T),
    max_count = max(count, na.rm = T)
  ) %>% 
  kableExtra::kable()
coast nb_sightings mean_count max_count
coast 37 29.78378 200
insland 64 4.65625 40

Counts along the year

dm %>% 
  ggplot(aes( dayOfYear, fill = as.factor(countClass))) +
  geom_histogram(binwidth = 14,position = "stack") +
  scale_x_continuous(breaks=as.numeric(format(ISOdate(2000,1:12,1),"%j")),
                     labels=format(ISOdate(2000,1:12,1),"%b"),
                     minor_breaks = c(),
                     expand = c(0,0),
                     name="Date"
                     #limits = c(100,365)
  ) + 
  scale_y_continuous(expand = c(0,0), name="Number of sightings" ) +
  theme_light() + 
  # scale_fill_viridis(option="magma", discrete = TRUE, direction=-1)
  scale_fill_brewer(palette ='YlOrBr')

dm %>% 
  mutate(coast = ifelse(coast!="inland",'coast',coast)) %>% 
  ggplot(aes( dayOfYear, fill = as.factor(coast))) +
  geom_histogram(binwidth = 14,position = "stack") +
  scale_x_continuous(breaks=as.numeric(format(ISOdate(2000,1:12,1),"%j")),
                     labels=format(ISOdate(2000,1:12,1),"%b"),
                     minor_breaks = c(),
                     expand = c(0,0),
                     name="Date"
                     #limits = c(100,365)
  ) + 
  scale_y_continuous(expand = c(0,0), name="Number of sightings" ) +
  theme_light()

Habitat on the coast

dmc <- dm %>% 
  mutate(peak = ifelse(dayOfYear>170 & dayOfYear<349, "sept-dec","jan-may")) %>% 
  #filter(coast!="inland") %>% 
  mutate(coast = ifelse(coast!="inland",'coast',coast)) %>% 
  group_by(coast,peak) %>% 
  filter(count<3000) %>% 
  summarize(
    nb_sightings = n(),
    mean_count = mean(count),
    min_count = min(count),
    max_count = max(count),
    .groups="drop"
  ) 

dmc %>% 
  kableExtra::kable()
coast peak nb_sightings mean_count min_count max_count
coast jan-may 31 32.774194 1 200
coast sept-dec 6 14.333333 2 50
inland jan-may 30 5.933333 1 39
inland sept-dec 34 3.529412 1 40
dmc %>% 
  filter(peak == "sept-dec") %>% 
  ggplot(aes(x="", y=nb_sightings, fill=coast)) +
  geom_bar(stat="identity", width=1) +
  coord_polar("y", start=0) +
  theme_light()

dmc %>% 
  filter(peak == "jan-may") %>% 
  ggplot(aes(x="", y=nb_sightings, fill=coast)) +
  geom_bar(stat="identity", width=1) +
  coord_polar("y", start=0) +
  theme_light()

dm %>% 
  filter(count<3000) %>%  # remove the count to avoid baising the mean
  mutate(peak = ifelse(dayOfYear>170 & dayOfYear<349, "sept-dec","jan-may")) %>% 
  group_by(coast) %>% 
  summarize(
    nb_sightings = n(),
    mean_count = mean(count, na.rm = T),
    max_count = max(count, na.rm = T),
    min_count = min(count, na.rm = T)
  ) %>% 
  kableExtra::kable()
coast nb_sightings mean_count max_count min_count
coast 2 42.500000 75 10
inland 64 4.656250 40 1
offshore 20 47.100000 200 1
onshore 7 1.142857 2 1
saltwork 8 8.375000 28 1

Change along the year

dm %>% 
  mutate(coast = fct_rev(factor(ifelse(coast!="inland",'coast',coast),ordered = TRUE))) %>% 
  filter(!is.na(year)) %>% 
  ggplot(aes( year, fill = coast)) +
  geom_histogram(binwidth = 1,position = "stack") +
  scale_x_continuous(minor_breaks = c(),
                     expand = c(0,0),
                     name="Date",
                     breaks=seq(1960,2020,by=5),
                     limits = c(1960,2020)
  ) + 
  scale_y_continuous(expand = c(0,0), name="Number of sightings" ) +
  theme_light()

# Dipole Mode Index (DMI)

source: https://psl.noaa.gov/gcos_wgsp/Timeseries/DMI/

dmi <- read_excel("dipole_mode_index.xlsx",sheet="dmiwest", col_types = "numeric") %>% 
  pivot_longer(!year, names_to = "month", values_to = "dmi")  %>% 
  # filter(year<2021 & year>min(dm$year)-1) %>% 
  mutate(date=as.Date(parse_date_time(paste0(month(parse_date_time(month, "%m")),'/',year), "%m/%y")) )

dmi_s <- dmi %>% 
  filter(month %in% c('January','February','March','October','November','December')) %>% 
  mutate(year = ifelse(month %in% c('October','November','December'),year+1,year)) %>% 
  group_by(year) %>% 
  summarise(dmi=mean(dmi)) %>% 
  mutate(year=as.Date(parse_date_time(year,"%y"))) %>% 
  filter(year(year)<2021)


ggplot() +
  geom_line(data=dmi, aes(x=date,y=dmi),color="grey") + 
  geom_point(data=dmi_s , aes(x=year,y=dmi),color="black") + 
  scale_x_date(date_labels = "%m-%Y",minor_breaks = c(),
                     expand = c(0,0), name="Date",
                     limits = c(ymd("1960-01-01"),ymd("2021-01-01"))
  ) + 
  scale_y_continuous(expand = c(0,0), name="Dipole Mode Index", limits = c(-1,1) ) +
  theme_light()

Sightings vs DMI + year

dmycoast = dm %>% 
  filter(coast!="inland") %>% 
  mutate(year=ifelse(dayOfYear>365/2,year+1,year)) %>% 
  mutate(year=as.Date(parse_date_time(year,"%y"))) %>% 
  group_by(year) %>% 
  summarise(
    nb_sightings=n(),
    avg_count=mean(count,na.rm=T),
    max_count=max(count,na.rm=T),
    )

tmp <- dmi_s %>% merge(dmycoast,'year',all=TRUE) %>% 
  replace(is.na(.),0) %>% 
  filter(year(year)>1960) %>% 
  mutate(year=year(year))

Check Poisson distribution fit

fit <- fitdistr(tmp$nb_sightings,"Poisson")
hist(tmp$nb_sightings,prob=TRUE)
curve(dpois(x, fit$estimate[1]), from=0, to=5,n=6,col="red", add=T,type="p")

Fit GLM

glmfit <- tmp %>% glm(formula="nb_sightings ~ dmi + year",family = 'poisson')
glmfit %>% summary()
## 
## Call:
## glm(formula = "nb_sightings ~ dmi + year", family = "poisson", 
##     data = .)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5004  -1.1668  -0.9140   0.4127   2.4905  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept) -22.65631   26.72310  -0.848    0.397
## dmi           0.46780    0.63138   0.741    0.459
## year          0.01117    0.01339   0.834    0.404
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 88.761  on 59  degrees of freedom
## Residual deviance: 84.334  on 57  degrees of freedom
## AIC: 144.85
## 
## Number of Fisher Scoring iterations: 6

Check overdispersion

dispersiontest(glmfit)
## 
##  Overdispersion test
## 
## data:  glmfit
## z = 2.3944, p-value = 0.008323
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion 
##   1.515923

Partial Plot

effect_plot(glmfit, pred = dmi, interval = TRUE, plot.points = TRUE)

effect_plot(glmfit, pred = year, interval = TRUE, plot.points = TRUE)

Plot sightings vs dmi

tmp %>% # filter(year<2020) %>% 
  ggplot(aes(x=dmi,y=nb_sightings,size=log(max_count+1),color=year)) +
  geom_point() +
  #geom_smooth(method = 'glm', method.args = list(family = 'poisson')) +
  theme_light() + 
  scale_color_viridis_c() 

NPP

time series

NPP_ts <- read_csv('NPP_east_africa_timeserie.csv',show_col_types = FALSE)
NPP_ts$Date <- as.Date(NPP_ts$`system:time_start`,format="%b %d, %Y")

NPP_tsy <- NPP_ts %>% filter(month(Date)>9|month(Date)<4) %>%
  mutate(year=ifelse(month(Date)>6,year(Date)+1,year(Date))) %>% 
  group_by(year) %>% 
  summarise(NPP = mean(NPP)) %>% 
  mutate(year=as.Date(parse_date_time(year,"%y")))

ggplot() +
  geom_line(data=NPP_ts,aes(x=Date, y=NPP) ) +
  geom_point(data=NPP_tsy,aes(x=year, y=NPP) ) +
  theme_light()

plot1 <- dmi_s %>% filter(year(year)>1997 & year(year)<2021) %>% ggplot(aes(x=year,y=dmi)) + geom_point() +
  theme_light()
plot2 <- NPP_tsy %>% ggplot(aes(x=year,y=NPP)) + geom_point() +
  theme_light() 
plot3 <- dmycoast %>% filter(year(year)>1997 & year(year)<2021) %>% ggplot(aes(x=year,y=nb_sightings)) + geom_point() +
  theme_light()
plot4 <- dmycoast %>% filter(year(year)>1997 & year(year)<2021) %>% ggplot(aes(x=year,y=max_count)) + geom_point() +
  theme_light()
grid.arrange(plot1, plot2, plot3, plot4, ncol=1)

Combine all dataset

da <- data.frame(year=seq(1997,2021)) %>% 
  merge(NPP_tsy %>% mutate(year=year(year)), 'year',all=TRUE) %>% 
  merge(dmycoast%>% mutate(year=year(year)),'year',all=TRUE) %>% 
  merge(dmi_s%>% mutate(year=year(year)),'year',all=TRUE) %>% 
  filter(year<2021 & year>1997) %>% 
  replace(is.na(.), 0) 

Plot

da %>% 
  ggplot( aes(NPP,dmi,label=year)) + 
  geom_point(aes(size=nb_sightings)) +  geom_text() +
  geom_smooth(method='lm') +
  theme_light()
## `geom_smooth()` using formula 'y ~ x'

Fit linear model and compute correlation

cor(da$NPP,da$dmi)
## [1] -0.7046759
da %>% lm(formula='dmi ~ NPP') %>% summary()
## 
## Call:
## lm(formula = "dmi ~ NPP", data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30053 -0.11515  0.00347  0.15018  0.40231 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.2825058  0.2427229   5.284 3.08e-05 ***
## NPP         -0.0025880  0.0005686  -4.551 0.000174 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2027 on 21 degrees of freedom
## Multiple R-squared:  0.4966, Adjusted R-squared:  0.4726 
## F-statistic: 20.71 on 1 and 21 DF,  p-value: 0.000174
da %>% lm(formula='nb_sightings ~ NPP') %>% summary()
## 
## Call:
## lm(formula = "nb_sightings ~ NPP", data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7272 -0.7507 -0.4683  0.6863  3.4563 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  2.982787   1.441477   2.069   0.0511 .
## NPP         -0.005234   0.003377  -1.550   0.1361  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.204 on 21 degrees of freedom
## Multiple R-squared:  0.1027, Adjusted R-squared:  0.05993 
## F-statistic: 2.403 on 1 and 21 DF,  p-value: 0.1361

Map

v <- c(0,5500)
col <- RColorBrewer::brewer.pal(9,"Greens")
pal <- colorNumeric(c(col, rep(col[length(col)],1,60)), v, na.color = "transparent")

leaflet() %>% 
  addTiles(urlTemplate = 'https://api.mapbox.com/styles/v1/rafnuss/cklnbuqev2ubk17npj2u8uqee/tiles/{z}/{x}/{y}?access_token=pk.eyJ1IjoicmFmbnVzcyIsImEiOiIzMVE1dnc0In0.3FNMKIlQ_afYktqki-6m0g') %>% 
  addRasterImage(raster("npp_1998.tif"), colors = pal, opacity = 0.8, group = "1998") %>% 
  addRasterImage(raster("npp_2020.tif"), colors = pal, opacity = 0.8, group = "2020") %>% 
  addRasterImage(raster("npp_meanAll.tif"), colors = pal, opacity = 0.8, group = "All") %>% 
  addLayersControl(
    overlayGroups = c("1998", "2020", "All"),
    options = layersControlOptions(collapsed = FALSE)
  ) %>%
  addLegend("bottomright", pal = pal, values = v, title = "Net Primary Productivity")
file = c('1998','2020','meanAll')
max_npp <- 700
p <- list()

for (i in 1:length(file)){
  NPP_df <- as.data.frame(raster(paste0("npp_", file[i],".tif")), xy = TRUE) %>% 
    rename(npp=starts_with('npp')) %>% 
    mutate(
      npp = ifelse(npp>max_npp,max_npp,npp)
    )
  
  world_map <- map_data("world")
  
  p[[i]] <-ggplot() +
    geom_raster(data = NPP_df , aes(x = x, y = y, fill = npp), na.rm = TRUE) +
    scale_fill_viridis_c(limits = c(0,max_npp)) +
    geom_polygon(data=world_map, aes(x = long, y = lat, group = group), fill="lightgray", colour = "white")+
    # theme_nothing() +
    xlim(range(NPP_df$x)+c(-12,10)) +
    ylim(range(NPP_df$y)+c(-5,5)) +
    coord_quickmap()
}

p <- do.call(grid.arrange,p)

# ggsave('map.eps',p,device="eps")