Load and display all data
<- read.csv('sightings.csv') %>%
dm mutate(
count = ifelse(count=="x",countClass,count),
count = as.numeric(count),
countClass = factor(countClass),
date= as.Date(date, format="%d/%m/%Y"),
dayOfYear = as.numeric(strftime(date,format="%j")),
year = as.numeric(strftime(date,format="%Y") )
)
%>%
dm ::select(-c(countClass,validity,latitude,longitude,dayOfYear,year)) %>%
dplyrmutate(ebird = paste0('<a href="',ebird,'#renpha">',str_replace(ebird,'https://ebird.org/checklist/',''),'</a>')) %>%
datatable(filter = 'top', rownames = FALSE, escape = FALSE)
Maps of the sightings
<- colorNumeric(hsv(1 - ((1:365) + (365/4))%%365/365, s = 0.8, v = 0.8), domain=c(1,365))
pal <- colorFactor(palette="Set1" ,domain=dm$cost)
pal2
# create popup
%>%
dm mutate(popup = iconv(paste('<b>Number</b>: ', count ,
'<br><b>Location</b>: ', location,
'<br><b>Date</b>: ',date,
'<br><b>Observer</b>: ', observer,
'<br><b>Description</b>: ', description,
'<br><b>Source</b>: ', source,
ifelse(source=='eBird',paste0(' - <a href="',ebird,'#renpha">',str_replace(ebird,'https://ebird.org/checklist/',''),'</a>'),''),
ifelse(picture,'<br><b>Photo</b>','')
)"UTF-8", "UTF-8", sub='') %>%
), filter(!is.na(latitude)) %>%
#filter(coast !="inland") %>%
arrange( desc(count) ) %>%
leaflet(width = "100%") %>%
#addTiles() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addCircleMarkers(lng = ~longitude, lat = ~latitude, popup = ~popup,
radius = ~ifelse(is.na(count),7,7+log(count)*3),
fillColor = ~pal(dayOfYear),
stroke = 0,
opacity= 0.4,
weight = 2,
color = ~pal2(coast),
fillOpacity = .2,
# clusterOptions = markerClusterOptions()#maxClusterRadius = 1)
%>%
) addLegend("bottomright", pal = pal,
title="Date",
values = ~seq(1,365, length.out = 12),
bins = seq(1,365, length.out = 12),
labFormat = function(type = "numeric", cuts){
format(ISOdate(2004,1,1)+days(round(cuts)),"%B")
},opacity = 1
)
<- dm %>%
dmt filter(!is.na(latitude)) %>%
arrange( desc(count) ) %>%
mutate(
# size = ifelse(is.na(count), 1, log(count))
size = 1+3*log(as.numeric(as.character(countClass))),
size = ifelse(is.na(size),1,size)
)
<- SpatialPointsDataFrame(coords = cbind( dmt$longitude,dmt$latitude), data = dmt)
spdf
tm_shape(World, bbox = st_bbox(spdf)+cbind(-1,-1,1,1)) +
tm_polygons() +
tm_shape(spdf) +
tm_symbols(size="size")
Counts vs date
Cumulative distribution of the counts
%>%
(dm filter(!is.na(count)) %>%
arrange(count) %>%
ggplot( aes(count)) +
stat_ecdf(geom = "step") +
scale_x_log10(name="Count") +
theme_light() +
scale_y_continuous(name="Cumulative PDF")
%>% ggplotly() )
Counts per period and coast
%>%
dm filter(count<3000) %>% # remove the count to avoid baising the mean
mutate(coast = ifelse(coast!="inland","coast","insland")) %>%
group_by(coast) %>%
summarize(
nb_sightings = n(),
mean_count = mean(count, na.rm = T),
max_count = max(count, na.rm = T)
%>%
) ::kable() kableExtra
coast | nb_sightings | mean_count | max_count |
---|---|---|---|
coast | 37 | 29.78378 | 200 |
insland | 64 | 4.65625 | 40 |
Counts along the year
%>%
dm ggplot(aes( dayOfYear, fill = as.factor(countClass))) +
geom_histogram(binwidth = 14,position = "stack") +
scale_x_continuous(breaks=as.numeric(format(ISOdate(2000,1:12,1),"%j")),
labels=format(ISOdate(2000,1:12,1),"%b"),
minor_breaks = c(),
expand = c(0,0),
name="Date"
#limits = c(100,365)
+
) scale_y_continuous(expand = c(0,0), name="Number of sightings" ) +
theme_light() +
# scale_fill_viridis(option="magma", discrete = TRUE, direction=-1)
scale_fill_brewer(palette ='YlOrBr')
%>%
dm mutate(coast = ifelse(coast!="inland",'coast',coast)) %>%
ggplot(aes( dayOfYear, fill = as.factor(coast))) +
geom_histogram(binwidth = 14,position = "stack") +
scale_x_continuous(breaks=as.numeric(format(ISOdate(2000,1:12,1),"%j")),
labels=format(ISOdate(2000,1:12,1),"%b"),
minor_breaks = c(),
expand = c(0,0),
name="Date"
#limits = c(100,365)
+
) scale_y_continuous(expand = c(0,0), name="Number of sightings" ) +
theme_light()
Habitat on the coast
<- dm %>%
dmc mutate(peak = ifelse(dayOfYear>170 & dayOfYear<349, "sept-dec","jan-may")) %>%
#filter(coast!="inland") %>%
mutate(coast = ifelse(coast!="inland",'coast',coast)) %>%
group_by(coast,peak) %>%
filter(count<3000) %>%
summarize(
nb_sightings = n(),
mean_count = mean(count),
min_count = min(count),
max_count = max(count),
.groups="drop"
)
%>%
dmc ::kable() kableExtra
coast | peak | nb_sightings | mean_count | min_count | max_count |
---|---|---|---|---|---|
coast | jan-may | 31 | 32.774194 | 1 | 200 |
coast | sept-dec | 6 | 14.333333 | 2 | 50 |
inland | jan-may | 30 | 5.933333 | 1 | 39 |
inland | sept-dec | 34 | 3.529412 | 1 | 40 |
%>%
dmc filter(peak == "sept-dec") %>%
ggplot(aes(x="", y=nb_sightings, fill=coast)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0) +
theme_light()
%>%
dmc filter(peak == "jan-may") %>%
ggplot(aes(x="", y=nb_sightings, fill=coast)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0) +
theme_light()
%>%
dm filter(count<3000) %>% # remove the count to avoid baising the mean
mutate(peak = ifelse(dayOfYear>170 & dayOfYear<349, "sept-dec","jan-may")) %>%
group_by(coast) %>%
summarize(
nb_sightings = n(),
mean_count = mean(count, na.rm = T),
max_count = max(count, na.rm = T),
min_count = min(count, na.rm = T)
%>%
) ::kable() kableExtra
coast | nb_sightings | mean_count | max_count | min_count |
---|---|---|---|---|
coast | 2 | 42.500000 | 75 | 10 |
inland | 64 | 4.656250 | 40 | 1 |
offshore | 20 | 47.100000 | 200 | 1 |
onshore | 7 | 1.142857 | 2 | 1 |
saltwork | 8 | 8.375000 | 28 | 1 |
Change along the year
%>%
dm mutate(coast = fct_rev(factor(ifelse(coast!="inland",'coast',coast),ordered = TRUE))) %>%
filter(!is.na(year)) %>%
ggplot(aes( year, fill = coast)) +
geom_histogram(binwidth = 1,position = "stack") +
scale_x_continuous(minor_breaks = c(),
expand = c(0,0),
name="Date",
breaks=seq(1960,2020,by=5),
limits = c(1960,2020)
+
) scale_y_continuous(expand = c(0,0), name="Number of sightings" ) +
theme_light()
# Dipole Mode Index (DMI)
source: https://psl.noaa.gov/gcos_wgsp/Timeseries/DMI/
<- read_excel("dipole_mode_index.xlsx",sheet="dmiwest", col_types = "numeric") %>%
dmi pivot_longer(!year, names_to = "month", values_to = "dmi") %>%
# filter(year<2021 & year>min(dm$year)-1) %>%
mutate(date=as.Date(parse_date_time(paste0(month(parse_date_time(month, "%m")),'/',year), "%m/%y")) )
<- dmi %>%
dmi_s filter(month %in% c('January','February','March','October','November','December')) %>%
mutate(year = ifelse(month %in% c('October','November','December'),year+1,year)) %>%
group_by(year) %>%
summarise(dmi=mean(dmi)) %>%
mutate(year=as.Date(parse_date_time(year,"%y"))) %>%
filter(year(year)<2021)
ggplot() +
geom_line(data=dmi, aes(x=date,y=dmi),color="grey") +
geom_point(data=dmi_s , aes(x=year,y=dmi),color="black") +
scale_x_date(date_labels = "%m-%Y",minor_breaks = c(),
expand = c(0,0), name="Date",
limits = c(ymd("1960-01-01"),ymd("2021-01-01"))
+
) scale_y_continuous(expand = c(0,0), name="Dipole Mode Index", limits = c(-1,1) ) +
theme_light()
Sightings vs DMI + year
= dm %>%
dmycoast filter(coast!="inland") %>%
mutate(year=ifelse(dayOfYear>365/2,year+1,year)) %>%
mutate(year=as.Date(parse_date_time(year,"%y"))) %>%
group_by(year) %>%
summarise(
nb_sightings=n(),
avg_count=mean(count,na.rm=T),
max_count=max(count,na.rm=T),
)
<- dmi_s %>% merge(dmycoast,'year',all=TRUE) %>%
tmp replace(is.na(.),0) %>%
filter(year(year)>1960) %>%
mutate(year=year(year))
Check Poisson distribution fit
<- fitdistr(tmp$nb_sightings,"Poisson")
fit hist(tmp$nb_sightings,prob=TRUE)
curve(dpois(x, fit$estimate[1]), from=0, to=5,n=6,col="red", add=T,type="p")
Fit GLM
<- tmp %>% glm(formula="nb_sightings ~ dmi + year",family = 'poisson')
glmfit %>% summary() glmfit
##
## Call:
## glm(formula = "nb_sightings ~ dmi + year", family = "poisson",
## data = .)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5004 -1.1668 -0.9140 0.4127 2.4905
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -22.65631 26.72310 -0.848 0.397
## dmi 0.46780 0.63138 0.741 0.459
## year 0.01117 0.01339 0.834 0.404
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 88.761 on 59 degrees of freedom
## Residual deviance: 84.334 on 57 degrees of freedom
## AIC: 144.85
##
## Number of Fisher Scoring iterations: 6
Check overdispersion
dispersiontest(glmfit)
##
## Overdispersion test
##
## data: glmfit
## z = 2.3944, p-value = 0.008323
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion
## 1.515923
Partial Plot
effect_plot(glmfit, pred = dmi, interval = TRUE, plot.points = TRUE)
effect_plot(glmfit, pred = year, interval = TRUE, plot.points = TRUE)
Plot sightings vs dmi
%>% # filter(year<2020) %>%
tmp ggplot(aes(x=dmi,y=nb_sightings,size=log(max_count+1),color=year)) +
geom_point() +
#geom_smooth(method = 'glm', method.args = list(family = 'poisson')) +
theme_light() +
scale_color_viridis_c()
NPP
time series
<- read_csv('NPP_east_africa_timeserie.csv',show_col_types = FALSE)
NPP_ts $Date <- as.Date(NPP_ts$`system:time_start`,format="%b %d, %Y")
NPP_ts
<- NPP_ts %>% filter(month(Date)>9|month(Date)<4) %>%
NPP_tsy mutate(year=ifelse(month(Date)>6,year(Date)+1,year(Date))) %>%
group_by(year) %>%
summarise(NPP = mean(NPP)) %>%
mutate(year=as.Date(parse_date_time(year,"%y")))
ggplot() +
geom_line(data=NPP_ts,aes(x=Date, y=NPP) ) +
geom_point(data=NPP_tsy,aes(x=year, y=NPP) ) +
theme_light()
<- dmi_s %>% filter(year(year)>1997 & year(year)<2021) %>% ggplot(aes(x=year,y=dmi)) + geom_point() +
plot1 theme_light()
<- NPP_tsy %>% ggplot(aes(x=year,y=NPP)) + geom_point() +
plot2 theme_light()
<- dmycoast %>% filter(year(year)>1997 & year(year)<2021) %>% ggplot(aes(x=year,y=nb_sightings)) + geom_point() +
plot3 theme_light()
<- dmycoast %>% filter(year(year)>1997 & year(year)<2021) %>% ggplot(aes(x=year,y=max_count)) + geom_point() +
plot4 theme_light()
grid.arrange(plot1, plot2, plot3, plot4, ncol=1)
Combine all dataset
<- data.frame(year=seq(1997,2021)) %>%
da merge(NPP_tsy %>% mutate(year=year(year)), 'year',all=TRUE) %>%
merge(dmycoast%>% mutate(year=year(year)),'year',all=TRUE) %>%
merge(dmi_s%>% mutate(year=year(year)),'year',all=TRUE) %>%
filter(year<2021 & year>1997) %>%
replace(is.na(.), 0)
Plot
%>%
da ggplot( aes(NPP,dmi,label=year)) +
geom_point(aes(size=nb_sightings)) + geom_text() +
geom_smooth(method='lm') +
theme_light()
## `geom_smooth()` using formula 'y ~ x'
Fit linear model and compute correlation
cor(da$NPP,da$dmi)
## [1] -0.7046759
%>% lm(formula='dmi ~ NPP') %>% summary() da
##
## Call:
## lm(formula = "dmi ~ NPP", data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.30053 -0.11515 0.00347 0.15018 0.40231
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2825058 0.2427229 5.284 3.08e-05 ***
## NPP -0.0025880 0.0005686 -4.551 0.000174 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2027 on 21 degrees of freedom
## Multiple R-squared: 0.4966, Adjusted R-squared: 0.4726
## F-statistic: 20.71 on 1 and 21 DF, p-value: 0.000174
%>% lm(formula='nb_sightings ~ NPP') %>% summary() da
##
## Call:
## lm(formula = "nb_sightings ~ NPP", data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7272 -0.7507 -0.4683 0.6863 3.4563
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.982787 1.441477 2.069 0.0511 .
## NPP -0.005234 0.003377 -1.550 0.1361
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.204 on 21 degrees of freedom
## Multiple R-squared: 0.1027, Adjusted R-squared: 0.05993
## F-statistic: 2.403 on 1 and 21 DF, p-value: 0.1361
Map
<- c(0,5500)
v <- RColorBrewer::brewer.pal(9,"Greens")
col <- colorNumeric(c(col, rep(col[length(col)],1,60)), v, na.color = "transparent")
pal
leaflet() %>%
addTiles(urlTemplate = 'https://api.mapbox.com/styles/v1/rafnuss/cklnbuqev2ubk17npj2u8uqee/tiles/{z}/{x}/{y}?access_token=pk.eyJ1IjoicmFmbnVzcyIsImEiOiIzMVE1dnc0In0.3FNMKIlQ_afYktqki-6m0g') %>%
addRasterImage(raster("npp_1998.tif"), colors = pal, opacity = 0.8, group = "1998") %>%
addRasterImage(raster("npp_2020.tif"), colors = pal, opacity = 0.8, group = "2020") %>%
addRasterImage(raster("npp_meanAll.tif"), colors = pal, opacity = 0.8, group = "All") %>%
addLayersControl(
overlayGroups = c("1998", "2020", "All"),
options = layersControlOptions(collapsed = FALSE)
%>%
) addLegend("bottomright", pal = pal, values = v, title = "Net Primary Productivity")
= c('1998','2020','meanAll')
file <- 700
max_npp <- list()
p
for (i in 1:length(file)){
<- as.data.frame(raster(paste0("npp_", file[i],".tif")), xy = TRUE) %>%
NPP_df rename(npp=starts_with('npp')) %>%
mutate(
npp = ifelse(npp>max_npp,max_npp,npp)
)
<- map_data("world")
world_map
<-ggplot() +
p[[i]] geom_raster(data = NPP_df , aes(x = x, y = y, fill = npp), na.rm = TRUE) +
scale_fill_viridis_c(limits = c(0,max_npp)) +
geom_polygon(data=world_map, aes(x = long, y = lat, group = group), fill="lightgray", colour = "white")+
# theme_nothing() +
xlim(range(NPP_df$x)+c(-12,10)) +
ylim(range(NPP_df$y)+c(-5,5)) +
coord_quickmap()
}
<- do.call(grid.arrange,p) p
# ggsave('map.eps',p,device="eps")