This is an introduction to graph theoretic and gravity modeling approaches for quantifying geneflow in spatial systems.
p <- c("raster", "rgdal", "igraph", "sp", "GeNetIt", "spatialEco",
"sf", "terra", "sfnetworks", "spdep", "dplyr", "tmap", "devtools")
if(any(!unlist(lapply(p, requireNamespace, quietly=TRUE)))) {
m = which(!unlist(lapply(p, requireNamespace, quietly=TRUE)))
suppressMessages(invisible(lapply(p[-m], require,
character.only=TRUE)))
stop("Missing library, please install ", paste(p[m], collapse = " "))
} else {
if(packageVersion("GeNetIt") < "0.1-5") {
remotes::install_github("jeffreyevans/GeNetIt")
}
suppressMessages(invisible(lapply(p, require, character.only=TRUE)))
}
# set your working directory here
setwd(wd <- "C:/evans/GITS/Gravity")
# indicates UTM 11 NAD83 projection
prj = 32611
# Some needed functions
back.transform <- function(y) exp(y + 0.5 * stats::var(y))
rmse = function(p, o){ sqrt(mean((p - o)^2)) }
# Download data
d = "https://spatialr.s3.us-west-2.amazonaws.com/Gravity/data.zip"
download.file(d, destfile="data.zip", mode="wb")
unzip("data.zip")
file.remove("data.zip")
## [1] TRUE
In this sections, we will read in all wetland locations in the study area and calculate a few graph-based metrics to assign to wetland sites that data was collected at. This allows us to put our samples into the context of the larger wetland system thus, accounting for proximity and juxtaposition.
Read file “Wetlands.csv” and make a spatial object.
Hints: read.csv, coordinates, st_as_sf
wetlands <- read.csv(file.path(wd, "data", "Wetlands.csv"),
header = TRUE)
head(wetlands)
## ID X Y RALU SiteName
## 1 1 688835 5002939 y AirplaneLake
## 2 2 687460 4994400 n AlpineInletCreek
## 3 3 687507 4994314 n AlpineInletMeadow
## 4 4 687637 4994117 n AlpineLake
## 5 5 688850 4997750 n AxeHandleMeadow
## 6 6 688500 4998900 y BachelorMeadow
wetlands <- st_as_sf(wetlands, coords = c("X", "Y"),
crs = 32611, agr = "constant")
Create Gabriel graph from the wetlands to represent a “realization” of connectivity and spatial arrangement.
What other types of graphs could you build? What wetlands are connected to each other based on the graph?
# Derive Gabriel graph
gg <- graph2nb(gabrielneigh(st_coordinates(wetlands)),sym=TRUE)
plot(gg, coords=st_coordinates(wetlands))
# Coerce to sf line object (will be used to create igraph object)
gg <- nb2lines(gg, coords = sf::st_coordinates(wetlands),
proj4string = prj, as_sf=TRUE)
Using the sf line/graph we just made coerce to sfnetworks, then calculate graph metrics of betweenness and closeness with weights and degree.
Do you think this graph is ecologically meaningful? What information, in the graph structure itself, might this graph contain?
degree - the number of connections a node has Calculate betweenness - the number of shortest paths going through a node closensess -
# Create sfnetwork, igraph object
wg <- as_sfnetwork(gg, edges=gg, nodes=wetlands, directed = FALSE,
node_key = "SiteName", length_as_weight = TRUE,
edges_as_lines = TRUE)
# Calculate weights and graph metrics
w <- wg %>% activate("edges") %>% pull(weight) %>% as.numeric()
w[w <= 0] <- 1
w = w / sum(w)
# calculate and add betweenness, degree and closeness
wetlands$betweenness <- igraph::betweenness(wg, directed=FALSE, weights=w)
wetlands$degree <- igraph::degree(wg)
wetlands$closeness <- igraph::closeness(wg, weights=w)
Plot results using the graph edges and wetlands points with the attribute “betweenness”
plot(st_geometry(gg), col="grey")
plot(wetlands["betweenness"], pch=19,
cex=0.75, add=TRUE)
box()
title("Wetlands Gabriel graph betweenness")
In this section we will read the field data, add the node metrics we just calculated.
Using names is dangerous as, small changes in names can result in non-matches. In this case, the ID fields are not consistent (data were collected at different times for different purposes originally). However, names are standardized in a drop-down list of a database. So they are a matching field. My preference is do to this type of operation on a numeric field.
sites <- read.csv(file.path(wd, "data", "RALU_Site.csv"),
header = TRUE)
sites$SiteID <- as.character(sites$SiteID)
nodestats <- st_drop_geometry(wetlands[,c(3,5:7)])
nodestats <- nodestats[which(nodestats$SiteName %in% sites$SiteName),]
sites <- merge(nodestats, sites, by="SiteName")
sites <- st_as_sf(sites, coords = c("X", "Y"),
crs = prj, agr = "constant")
To assess connectivity using a gravity model, we need to build a graph from the occupied frog sites create a graph. This could be any type of graph, but I generally use saturated or pruned by some maximum distance.
dist.graph <- knn.graph(sites, row.names = sites$SiteName)
dist.graph <- merge(dist.graph, st_drop_geometry(sites),
by.y="SiteName", by.x="from_ID")
dist.graph <- dist.graph[,-c(11:19)] # drop extra columns
## Can create a distance-constrained graph with max.dist arg (not run)
# dist.graph <- knn.graph(sites, row.names = sites$SiteName, max.dist=5000)
This involves: reading in RALU_Dps.csv genetic distance, convert to flow (1-distance), unfold matrix into a dataframe then, merge graph with genetic distances
Note; if you get stuck on this step, read in gdist.csv
gdist <- read.csv(file.path(wd, "data", "RALU_Dps.csv"), header=TRUE)
rownames(gdist) <- t(names(gdist))
# Make a matrix, gdist, unfold data
gdist <- dmatrix.df(as.matrix(gdist))
names(gdist) <- c("FROM", "TO", "GDIST") #unfold the file
gdist <- gdist[!gdist$FROM == gdist$TO ,]
gdist[,1] <-sub("X", "", gdist[,1])
gdist[,2] <-sub("X", "", gdist[,2])
gdist <- cbind(from.to=paste(gdist[,1], gdist[,2], sep="."),
gdist)
# Transform genetic distance to genetic flow
gdist$GDIST <- flow(gdist$GDIST)
# Merge data
dist.graph$from.to <- paste(dist.graph$i, dist.graph$j, sep=".")
dist.graph <- merge(dist.graph, gdist, by = "from.to")
Note; R uses regular expressions so, a wildcard is not “*”
list.files(file.path(wd,"data"), "tif$")
## [1] "cti.tif" "dd5.tif" "ffp.tif" "gsp.tif" "hli.tif"
## [6] "nlcd.tif" "pratio.tif" "rough27.tif" "srr.tif"
xvars <- rast(list.files(file.path(wd,"data"), "tif$",
full.names = TRUE))
Reclassify NLCD, in xvars, into a single class
Hint: matrix, reclassify
m <- c(0,10.8, 0,10.9,12.1,1,12.9,89.5,0, 89.1,95.1,1,95.9,100,0 )
reclass <- matrix(m, ncol=3, byrow=TRUE)
wetlnd <- classify(xvars[["nlcd"]], reclass)
names(wetlnd) <- "wetlnd"
xvars <- c(xvars, wetlnd)
Assign proportion of landcover that is wetland to sites as pwetland
You want to know if areas of dense wetlands produce more frogs? What buffer distance will you use?
## method 1 (can result in Inf if all zero)
# prop.land <- function(x) {
# length(x[x==1]) / length(x)
# }
## method 2 (no divide by zero error)
prop.land <- function(x) {
prop.table(table(factor(x, levels=c(0,1))))[2]
}
b <- st_buffer(sites, 300)
pwetland <- extract(wetlnd, vect(b))
pwetland <- tapply(pwetland[,2], pwetland[,1], prop.land)
# Add the % wetland back to the dataframe
sites$pwetland <- as.numeric(pwetland)
This adds potential “at site” variables, keep as sf POINT class object
stats <- extract(xvars[[-6]], vect(sites))
sites <- st_sf(data.frame(as.data.frame(sites), stats),
geometry=sites$geometry)
Remove nlcd and wetlnd rasters before calculating statistics
idx <- which(names(xvars) %in% c("nlcd","wetlnd"))
suppressWarnings(
stats <- graph.statistics(dist.graph, r = xvars[[-idx]],
buffer= NULL, stats = c("min",
"mean","max", "var", "median")) )
dist.graph <- st_sf(data.frame(as.data.frame(dist.graph),
stats), geometry=dist.graph$geometry)
Moments are nonsensical. Create a function for returning the % wetland between sites. Then use it to calculate an additional statistic and, add result to the graph.
Are there other categorical variables that you think may be ecologically important?
wet.pct <- function(x) {
x <- ifelse(x == 11 | x == 12 | x == 90 | x == 95, 1, 0)
prop.table(table(factor(x, levels=c(0,1))))[2]
}
suppressWarnings(
wetstats <- graph.statistics(dist.graph, r=xvars[["nlcd"]],
buffer= NULL, stats = c("wet.pct")) )
dist.graph$wet.pct.nlcd <- as.numeric(wetstats[,1])
## Evaluate node and edge correlations
We need to evaluate correlations in the data to not overdispersion our models. Note, we are not going to actually remove the correlated variables but, just go through a few methods of evaluating them. The code to remove colinear variables is commented out for reference. We do have to log transform the data as to evaluate the actual model structure.
Hint: cor, collinear
node.var <- c("degree", "betweenness", "Elev", "Length", "Area", "Perim",
"Depth", "pH","Dforest","Drock", "Dshrub", "pwetland", "cti",
"dd5", "ffp","gsp","pratio","hli","rough27","srr")
p = 0.8
s <- st_drop_geometry(sites)[,node.var]
for(i in 1:ncol(s)) {
s[,i] <- ifelse(s[,i] <= 0, 0.00001, log(s[,i]))
}
#### site correlations
site.cor <- cor(s, y = NULL,
use = "complete.obs",
method = "pearson")
diag(site.cor) <- 0
cor.idx <- which(site.cor > p | site.cor < -p, arr.ind = TRUE)
cor.names <- vector()
cor.p <- vector()
for(i in 1:nrow(cor.idx)) {
cor.p[i] <- site.cor[cor.idx[i,][1], cor.idx[i,][2]]
cor.names [i] <- paste(rownames(site.cor)[cor.idx[i,][1]],
colnames(site.cor)[cor.idx[i,][2]], sep="_")
}
data.frame(parm=cor.names, p=cor.p)
## parm p
## 1 dd5_Elev -0.8924399
## 2 ffp_Elev -0.8873475
## 3 gsp_Elev 0.8728832
## 4 pratio_Elev -0.8659980
## 5 Area_Length 0.8715535
## 6 Perim_Length 0.8392532
## 7 Length_Area 0.8715535
## 8 Perim_Area 0.9912114
## 9 Length_Perim 0.8392532
## 10 Area_Perim 0.9912114
## 11 Elev_dd5 -0.8924399
## 12 ffp_dd5 0.9787231
## 13 gsp_dd5 -0.9843433
## 14 pratio_dd5 0.8965794
## 15 Elev_ffp -0.8873475
## 16 dd5_ffp 0.9787231
## 17 gsp_ffp -0.9705124
## 18 pratio_ffp 0.8553544
## 19 Elev_gsp 0.8728832
## 20 dd5_gsp -0.9843433
## 21 ffp_gsp -0.9705124
## 22 pratio_gsp -0.8820946
## 23 Elev_pratio -0.8659980
## 24 dd5_pratio 0.8965794
## 25 ffp_pratio 0.8553544
## 26 gsp_pratio -0.8820946
( node.cor <- collinear(s, p=p) )
## Collinearity between Area and Perim correlation = 0.9912
## Correlation means: 0.407 vs 0.272
## recommend dropping Area
## Collinearity between Area and Length correlation = 0.8716
## Correlation means: 0.407 vs 0.274
## recommend dropping Area
## Collinearity between Perim and Length correlation = 0.8393
## Correlation means: 0.402 vs 0.274
## recommend dropping Perim
## Collinearity between Elev and dd5 correlation = 0.8924
## Correlation means: 0.384 vs 0.273
## recommend dropping Elev
## Collinearity between Elev and ffp correlation = 0.8873
## Correlation means: 0.384 vs 0.274
## recommend dropping Elev
## Collinearity between Elev and gsp correlation = 0.8729
## Correlation means: 0.384 vs 0.274
## recommend dropping Elev
## Collinearity between Elev and pratio correlation = 0.866
## Correlation means: 0.384 vs 0.275
## recommend dropping Elev
## Collinearity between dd5 and ffp correlation = 0.9787
## Correlation means: 0.38 vs 0.274
## recommend dropping dd5
## Collinearity between dd5 and gsp correlation = 0.9843
## Correlation means: 0.38 vs 0.274
## recommend dropping dd5
## Collinearity between dd5 and pratio correlation = 0.8966
## Correlation means: 0.38 vs 0.275
## recommend dropping dd5
## Collinearity between ffp and gsp correlation = 0.9705
## Correlation means: 0.363 vs 0.274
## recommend dropping ffp
## Collinearity between ffp and pratio correlation = 0.8554
## Correlation means: 0.363 vs 0.275
## recommend dropping ffp
## Collinearity between gsp and pratio correlation = 0.8821
## Correlation means: 0.357 vs 0.275
## recommend dropping gsp
## [1] "Elev" "Area" "Perim" "dd5" "ffp" "gsp"
# node.var <- node.var[-which(node.var %in% cp)]
## Add node data
Build and add node (at site) level data to graph then merge edge (distance graph) and edge (site) data.
Hint: build.node.data, merge
node.var <- c("degree", "betweenness", "Elev", "Length", "Area", "Perim",
"Depth", "pH","Dforest","Drock", "Dshrub", "pwetland", "cti",
"dd5", "ffp","gsp","pratio","hli","rough27","srr")
node <- build.node.data(st_drop_geometry(sites), group.ids = "SiteID",
from.parms = node.var)
gdata <- merge(st_drop_geometry(dist.graph)[c(1,2,5,11,14,7)], node,
by.x="SiteID", by.y="SiteID")
gdata <- merge(gdata, st_drop_geometry(dist.graph)[c(11, 8:10, 15:55)],
by.x="SiteID", by.y="SiteID")
# log transform matrix
for(i in 5:ncol(gdata)) {
gdata[,i] <- ifelse(gdata[,i] <= 0, 0.00001, log(gdata[,i]))
}
Think about type of hypothesis that you want to test and What types of constraint are needed? Write out model statements that group parameters into hypothesis and run models. Remember to run a NULL that is just distance.
# null model (under Maximum Likelihood)
( null <- gravity(y = "GDIST", x = c("length"), d = "length", group = "from_ID",
data = gdata, fit.method = "ML", ln = FALSE) )
## [1] "Running singly-constrained gravity model"
## Gravity model
##
## Linear mixed-effects model fit by maximum likelihood
## Data: gdata
## AIC BIC logLik
## -13067.38 -13036.13 6537.689
##
## Random effects:
## Formula: GDIST ~ 1 | from_ID
## (Intercept) Residual
## StdDev: 0.08385395 0.168483
##
## Fixed effects: stats::as.formula(paste(paste(y, "~", sep = ""), paste(x, collapse = "+")))
## Value Std.Error DF t-value p-value
## (Intercept) -0.4372945 0.020854014 18224 -20.969322 0.0000
## length -0.0002220 0.001624861 18224 -0.136607 0.8913
## Correlation:
## (Intr)
## length -0.63
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -5.07692374 -0.58927383 0.02094334 0.65955114 3.21867810
##
## Number of Observations: 18252
## Number of Groups: 27
# Fish hypothesis (under Maximum Likelihood)
( depth <- gravity(y = "GDIST", x = c("length","from.Depth"), d = "length",
group = "from_ID", data = gdata, fit.method = "ML", ln = FALSE) )
## [1] "Running singly-constrained gravity model"
## Gravity model
##
## Linear mixed-effects model fit by maximum likelihood
## Data: gdata
## AIC BIC logLik
## -13067.22 -13028.16 6538.61
##
## Random effects:
## Formula: GDIST ~ 1 | from_ID
## (Intercept) Residual
## StdDev: 0.08102556 0.168483
##
## Fixed effects: stats::as.formula(paste(paste(y, "~", sep = ""), paste(x, collapse = "+")))
## Value Std.Error DF t-value p-value
## (Intercept) -0.4101360 0.028357541 18224 -14.463032 0.0000
## length -0.0002202 0.001624886 18224 -0.135530 0.8922
## from.Depth -0.0200540 0.014527936 25 -1.380377 0.1797
## Correlation:
## (Intr) length
## length -0.462
## from.Depth -0.693 -0.002
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -5.07683320 -0.58959052 0.02162005 0.65913788 3.21875035
##
## Number of Observations: 18252
## Number of Groups: 27
# Productivity hypothesis (under Maximum Likelihood)
( product <- gravity(y = "GDIST", x = c("length", "from.ffp", "from.hli"),
d = "length", group = "from_ID", data = gdata,
fit.method = "ML", ln = FALSE) )
## [1] "Running singly-constrained gravity model"
## Gravity model
##
## Linear mixed-effects model fit by maximum likelihood
## Data: gdata
## AIC BIC logLik
## -13069.56 -13022.69 6540.779
##
## Random effects:
## Formula: GDIST ~ 1 | from_ID
## (Intercept) Residual
## StdDev: 0.07472813 0.168483
##
## Fixed effects: stats::as.formula(paste(paste(y, "~", sep = ""), paste(x, collapse = "+")))
## Value Std.Error DF t-value p-value
## (Intercept) 7.844732 3.217229 18224 2.4383506 0.0148
## length -0.000216 0.001625 18224 -0.1327679 0.8944
## from.ffp -0.182845 0.092655 24 -1.9734052 0.0601
## from.hli 9.858960 3.806810 24 2.5898215 0.0161
## Correlation:
## (Intr) length frm.ff
## length -0.001
## from.ffp -0.873 -0.002
## from.hli 1.000 0.003 -0.858
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -5.07745485 -0.59059645 0.02038188 0.65656315 3.21927307
##
## Number of Observations: 18252
## Number of Groups: 27
# Climate hypothesis (under Maximum Likelihood)
( climate <- gravity(y = "GDIST", x = c("length", "from.ffp", "from.pratio"),
d = "length", group = "from_ID", data = gdata,
fit.method = "ML", ln = FALSE) )
## [1] "Running singly-constrained gravity model"
## Gravity model
##
## Linear mixed-effects model fit by maximum likelihood
## Data: gdata
## AIC BIC logLik
## -13063.57 -13016.7 6537.784
##
## Random effects:
## Formula: GDIST ~ 1 | from_ID
## (Intercept) Residual
## StdDev: 0.08355748 0.168483
##
## Fixed effects: stats::as.formula(paste(paste(y, "~", sep = ""), paste(x, collapse = "+")))
## Value Std.Error DF t-value p-value
## (Intercept) -0.4970409 0.3704759 18224 -1.3416281 0.1797
## length -0.0002206 0.0016250 18224 -0.1357570 0.8920
## from.ffp 0.0229046 0.0534088 24 0.4288546 0.6719
## from.pratio 0.0018406 0.0484518 24 0.0379885 0.9700
## Correlation:
## (Intr) length frm.ff
## length -0.038
## from.ffp -0.187 0.002
## from.pratio -0.956 0.002 -0.104
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -5.0769193 -0.5889330 0.0210659 0.6592652 3.2188611
##
## Number of Observations: 18252
## Number of Groups: 27
# Wetlands hypothesis (under Maximum Likelihood)
( wetlands <- gravity(y = "GDIST", x = c("length", "from.degree", "from.betweenness", "from.pwetland"),
d = "length", group = "from_ID", data = gdata, fit.method = "ML",
ln = FALSE) )
## [1] "Running singly-constrained gravity model"
## Gravity model
##
## Linear mixed-effects model fit by maximum likelihood
## Data: gdata
## AIC BIC logLik
## -13061.71 -13007.03 6537.856
##
## Random effects:
## Formula: GDIST ~ 1 | from_ID
## (Intercept) Residual
## StdDev: 0.08333405 0.168483
##
## Fixed effects: stats::as.formula(paste(paste(y, "~", sep = ""), paste(x, collapse = "+")))
## Value Std.Error DF t-value p-value
## (Intercept) -0.3992700 0.10461517 18224 -3.816559 0.0001
## length -0.0002201 0.00162516 18224 -0.135444 0.8923
## from.degree -0.0016877 0.01219259 23 -0.138422 0.8911
## from.betweenness -0.0211050 0.05709693 23 -0.369635 0.7150
## from.pwetland -0.0042020 0.00998733 23 -0.420733 0.6779
## Correlation:
## (Intr) length frm.dg frm.bt
## length -0.129
## from.degree -0.273 0.013
## from.betweenness -0.770 -0.006 -0.335
## from.pwetland 0.015 -0.002 0.348 -0.015
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -5.07678625 -0.58904996 0.02101676 0.65966584 3.21863968
##
## Number of Observations: 18252
## Number of Groups: 27
# Topography hypothesis (under Maximum Likelihood)
( topo <- gravity(y = "GDIST", x = c("length", "median.srr", "median.rough27"), d = "length",
group = "from_ID", data = gdata, fit.method = "ML",
ln = FALSE) )
## [1] "Running singly-constrained gravity model"
## Gravity model
##
## Linear mixed-effects model fit by maximum likelihood
## Data: gdata
## AIC BIC logLik
## -13063.38 -13016.51 6537.691
##
## Random effects:
## Formula: GDIST ~ 1 | from_ID
## (Intercept) Residual
## StdDev: 0.08384168 0.168483
##
## Fixed effects: stats::as.formula(paste(paste(y, "~", sep = ""), paste(x, collapse = "+")))
## Value Std.Error DF t-value p-value
## (Intercept) -0.4362752 0.028684761 18222 -15.209304 0.0000
## length -0.0002219 0.001624950 18222 -0.136584 0.8914
## median.srr 0.0000915 0.012808227 18222 0.007141 0.9943
## median.rough27 -0.0001394 0.002224808 18222 -0.062664 0.9500
## Correlation:
## (Intr) length mdn.sr
## length -0.458
## median.srr 0.449 0.000
## median.rough27 -0.574 0.000 -0.132
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -5.0777083 -0.5892049 0.0209408 0.6596691 3.2190372
##
## Number of Observations: 18252
## Number of Groups: 27
# Habitat hypothesis (under Maximum Likelihood)
( habitat <- gravity(y = "GDIST", x = c("length", "wet.pct.nlcd", "median.gsp"),
d = "length", group = "from_ID", data = gdata, fit.method = "ML",
ln = FALSE, method="ML") )
## [1] "Running singly-constrained gravity model"
## Gravity model
##
## Linear mixed-effects model fit by maximum likelihood
## Data: gdata
## AIC BIC logLik
## -13063.38 -13016.51 6537.689
##
## Random effects:
## Formula: GDIST ~ 1 | from_ID
## (Intercept) Residual
## StdDev: 0.08385112 0.168483
##
## Fixed effects: stats::as.formula(paste(paste(y, "~", sep = ""), paste(x, collapse = "+")))
## Value Std.Error DF t-value p-value
## (Intercept) -0.4347231 0.3358747 18222 -1.2943015 0.1956
## length -0.0002220 0.0016250 18222 -0.1365927 0.8914
## wet.pct.nlcd 0.0000233 0.0008368 18222 0.0278113 0.9778
## median.gsp -0.0004444 0.0592399 18222 -0.0075024 0.9940
## Correlation:
## (Intr) length wt.pc.
## length -0.039
## wet.pct.nlcd -0.131 0.000
## median.gsp -0.998 0.000 0.137
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -5.07735336 -0.58922468 0.02089058 0.65952539 3.21896772
##
## Number of Observations: 18252
## Number of Groups: 27
# Global model (under Maximum Likelihood)
( global <- gravity(y = "GDIST", x = c("length", "wet.pct.nlcd", "median.gsp",
"from.Depth", "from.ffp", "from.hli", "from.pratio", "from.degree",
"from.betweenness", "from.pwetland", "median.srr", "median.rough27"),
d = "length", group = "from_ID", data = gdata, fit.method = "ML",
ln = FALSE) )
## [1] "Running singly-constrained gravity model"
## Gravity model
##
## Linear mixed-effects model fit by maximum likelihood
## Data: gdata
## AIC BIC logLik
## -13054.63 -12937.45 6542.316
##
## Random effects:
## Formula: GDIST ~ 1 | from_ID
## (Intercept) Residual
## StdDev: 0.07055036 0.168483
##
## Fixed effects: stats::as.formula(paste(paste(y, "~", sep = ""), paste(x, collapse = "+")))
## Value Std.Error DF t-value p-value
## (Intercept) 7.594838 3.142709 18220 2.4166536 0.0157
## length -0.000222 0.001626 18220 -0.1363422 0.8916
## wet.pct.nlcd 0.000015 0.000848 18220 0.0182482 0.9854
## median.gsp 0.006423 0.074369 18220 0.0863647 0.9312
## from.Depth -0.028416 0.016951 19 -1.6763414 0.1100
## from.ffp -0.160716 0.092478 19 -1.7378814 0.0984
## from.hli 9.870505 3.708767 19 2.6613980 0.0154
## from.pratio 0.021924 0.044485 19 0.4928338 0.6278
## from.degree -0.002106 0.010941 19 -0.1924986 0.8494
## from.betweenness 0.041878 0.059756 19 0.7008170 0.4919
## from.pwetland 0.004308 0.009342 19 0.4611556 0.6499
## median.srr 0.000243 0.012888 18220 0.0188312 0.9850
## median.rough27 -0.000201 0.002850 18220 -0.0705163 0.9438
## Correlation:
## (Intr) length wt.pc. mdn.gs frm.Dp frm.ff frm.hl frm.pr frm.dg
## length -0.004
## wet.pct.nlcd -0.006 0.000
## median.gsp -0.130 0.000 0.018
## from.Depth 0.062 -0.001 -0.002 -0.001
## from.ffp -0.861 0.001 0.006 0.017 -0.179
## from.hli 0.985 0.001 -0.004 0.002 0.035 -0.858
## from.pratio 0.015 0.007 -0.009 0.015 -0.076 -0.132 0.120
## from.degree -0.114 0.016 -0.005 -0.004 -0.074 0.084 -0.075 0.300
## from.betweenness 0.054 -0.007 0.003 -0.012 -0.533 -0.009 0.062 -0.159 -0.292
## from.pwetland 0.085 0.000 0.005 0.000 -0.339 -0.070 0.117 0.254 0.379
## median.srr 0.001 0.000 -0.075 0.077 0.005 -0.009 0.007 0.001 0.010
## median.rough27 0.085 0.000 0.150 -0.603 -0.004 -0.010 0.009 -0.015 0.009
## frm.bt frm.pw mdn.sr
## length
## wet.pct.nlcd
## median.gsp
## from.Depth
## from.ffp
## from.hli
## from.pratio
## from.degree
## from.betweenness
## from.pwetland 0.137
## median.srr 0.002 -0.006
## median.rough27 0.003 0.004 -0.161
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -5.07862043 -0.59088464 0.02199601 0.65522607 3.22091896
##
## Number of Observations: 18252
## Number of Groups: 27
Should you use ML or REML? Create diagnostic plots
Can you directly compare ML and REML? Why not?
compare.models(null, depth, product, climate, wetlands,
topo, habitat, global)
## model AIC BIC log.likelihood RMSE nparms deltaAIC deltaBIC
## 1 null -13067.38 -13036.13 6537.689 0.1786 0 2.180525 0.000000
## 2 depth -13067.22 -13028.16 6538.610 0.1779 2 2.339018 7.970522
## 3 product -13069.56 -13022.69 6540.779 0.1765 3 0.000000 13.443535
## 4 climate -13063.57 -13016.70 6537.784 0.1785 3 5.990375 19.433909
## 5 wetlands -13061.71 -13007.03 6537.856 0.1784 4 7.846638 29.102202
## 6 topo -13063.38 -13016.51 6537.691 0.1786 3 6.176597 19.620132
## 7 habitat -13063.38 -13016.51 6537.689 0.1786 3 6.179621 19.623156
## 8 global -13054.63 -12937.45 6542.316 0.1757 12 14.926651 98.678455
compare.models(depth, product, climate, wetlands, topo,
habitat, global)
## model AIC BIC log.likelihood RMSE nparms deltaAIC deltaBIC
## 1 depth -13067.22 -13028.16 6538.610 0.1779 2 2.339018 0.000000
## 2 product -13069.56 -13022.69 6540.779 0.1765 3 0.000000 5.473012
## 3 climate -13063.57 -13016.70 6537.784 0.1785 3 5.990375 11.463387
## 4 wetlands -13061.71 -13007.03 6537.856 0.1784 4 7.846638 21.131680
## 5 topo -13063.38 -13016.51 6537.691 0.1786 3 6.176597 11.649610
## 6 habitat -13063.38 -13016.51 6537.689 0.1786 3 6.179621 11.652634
## 7 global -13054.63 -12937.45 6542.316 0.1757 12 14.926651 90.707933
par(mfrow=c(2,3))
for (i in 1:6) { plot(null, type=i) }
THen compair them and calculate effect size
# Habitat fit (under REML)
h <- c("length", "wet.pct.nlcd", "median.gsp")
habitat_fit <- gravity(y = "GDIST", x = h, d = "length", group = "from_ID",
data = gdata, ln=FALSE)
## [1] "Running singly-constrained gravity model"
# global fit (under REML)
g <- c("length", "wet.pct.nlcd", "median.gsp", "from.Depth", "from.ffp",
"from.hli", "from.pratio", "from.degree", "from.betweenness",
"from.pwetland", "median.srr", "median.rough27")
global_fit <- gravity(y = "GDIST", x = g, d = "length",
group = "from_ID", data = gdata, ln=FALSE)
## [1] "Running singly-constrained gravity model"
gravity.es(habitat_fit)
## t.value df cohen.d p.value low.ci up.ci
## length -0.135786502 18222 -0.0020118175 0.891992 -0.02254667 0.01852303
## wet.pct.nlcd 0.026782551 18222 0.0003968112 0.978633 -0.02013803 0.02093166
## median.gsp -0.007225329 18222 -0.0001070507 0.994235 -0.02064189 0.02042779
gravity.es(global_fit)
## t.value df cohen.d p.value low.ci
## length -0.12995103 18220 -0.0019254646 0.896607 -0.02246144
## wet.pct.nlcd 0.01285573 18220 0.0001904814 0.989743 -0.02034549
## median.gsp 0.06080170 18220 0.0009008895 0.951518 -0.01963508
## from.Depth -1.40651839 19 -0.6453548981 0.175716 -1.33971267
## from.ffp -1.45856008 19 -0.6692332641 0.161020 -1.36495273
## from.hli 2.23310806 19 1.0246202508 0.037767 0.30328454
## from.pratio 0.41324402 19 0.1896093623 0.684054 -0.48802350
## from.degree -0.16130151 19 -0.0740102093 0.873559 -0.75028579
## from.betweenness 0.58817704 19 0.2698741332 0.563340 -0.40939769
## from.pwetland 0.38694297 19 0.1775416115 0.703098 -0.49989408
## median.srr 0.01325052 18220 0.0001963309 0.989428 -0.02033964
## median.rough27 -0.04964361 18220 -0.0007355618 0.960407 -0.02127153
## up.ci
## length 0.01861051
## wet.pct.nlcd 0.02072645
## median.gsp 0.02143686
## from.Depth 0.04900287
## from.ffp 0.02648620
## from.hli 1.74595596
## from.pratio 0.86724222
## from.degree 0.60226537
## from.betweenness 0.94914596
## from.pwetland 0.85497730
## median.srr 0.02073230
## median.rough27 0.01980041
par(mfrow=c(2,3))
for (i in 1:6) { plot(global_fit, type=i) }
dev.new()
par(mfrow=c(2,3))
for (i in 1:6) { plot(habitat_fit, type=i) }
gd <- back.transform(gdata$GDIST)
# Make individual-level (group) predictions (per slope) and
# show RMSE
global.p <- predict(global_fit, y = "GDIST", x = g,
newdata=gdata, groups = gdata$from_ID,
back.transform = "simple")
## Making individual-level (per-slope group)
## constrained predictions
## Back-transforming exp(y-hat)*0.5*variance(y-hat),
## assumes normally distributed errors
habitat.p <- predict(habitat_fit, y = "GDIST", x = h,
newdata=gdata, groups = gdata$from_ID,
back.transform = "simple")
## Making individual-level (per-slope group)
## constrained predictions
## Back-transforming exp(y-hat)*0.5*variance(y-hat),
## assumes normally distributed errors
cat("RMSE of global", rmse(global.p, gd), "\n")
## RMSE of global 0.1104084
cat("RMSE of habitat", rmse(habitat.p, gd), "\n")
## RMSE of habitat 0.1104121
We can aggregrate estimates back to the edges and nodes. An intercative map can be created using the tmap package
# Aggregate estimates to graph and node
global.p <- data.frame(EID = gdata$from.to,
NID = gdata$from_ID,
p=global.p)
edge.p <- tapply(global.p$p, global.p$EID, mean)
dist.graph$global.flow <- edge.p
node.p <- tapply(global.p$p, global.p$NID, mean)
node.var <- tapply(global.p$p, global.p$NID, var)
idx <- which(sites$SiteName %in% names(node.p))
sites$global.flow[idx] <- node.p
sites$global.var[idx] <- node.var
# Plot results using tmap
pal <- colorRampPalette(rev(c("red","orange","blue")), bias=0.15)
tmap_mode(c("plot", "view")[2])
## tmap mode set to interactive viewing
tm_shape(dist.graph) +
tm_lines("global.flow", palette=pal(10)) +
tm_shape(sites) +
tm_symbols(col = "global.flow",
size = "global.var",
shape = 20, scale = 0.75, palette=pal(10))
## Legend for symbol sizes not available in view mode.