第15章 空间插值

Jianghao Wang

2017-11-27

引言

基于《地理信息系统导论(第7版)》中第15章的内容(pp. 376 - 381),设计用R语言实现对应的空间插值练习.

包括5个练习:

  • 练习1:用趋势面模型作插值
  • 练习2:利用核密度估算法
  • 练习3:用IDW作插值
  • 练习4:用普通克里金作插值
  • 练习5:用泛克里金作插值

练习1:用趋势面模型作插值

所需数据:

  • stations.shp: 爱荷华州及其周围175个气象站点数据的shapefile;
  • idoutlgd: 爱荷华州轮廓的栅格

step 1: 加载数据和显示数据

stations数据探索

## 加载stations数据
stations <- read_sf("stations.shp")
## 显示stations的属性表格
kable(head(stations), caption = "Attibute of Stations")
Attibute of Stations
No COOPID STATION_NA Lat Long ELEV ANN_PREC geometry
1 100010 ABERDEEN EXPERIMNT STN 42.95000 -112.8333 4405 9.24 595173.3, 206145.1
2 100227 AMERICAN FALLS 3 NW 42.78333 -112.9167 4405 12.43 588613.2, 187546.5
3 100282 ANDERSON DAM 43.36667 -115.4500 3882 20.36 382514.0, 252776.6
4 100347 ARBON 2 NW 42.50000 -112.5833 5210 16.34 616406.3, 156488.5
5 100375 ARCO 43.63333 -113.3000 5325 10.25 556468.0, 281608.9
6 100448 ARROWROCK DAM 43.61667 -115.9167 3275 19.50 345341.2, 281304.7
str(stations)
## Classes 'sf', 'tbl_df', 'tbl' and 'data.frame':  175 obs. of  8 variables:
##  $ No        : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ COOPID    : num  1e+05 1e+05 1e+05 1e+05 1e+05 ...
##  $ STATION_NA: chr  "ABERDEEN EXPERIMNT STN" "AMERICAN FALLS 3 NW" "ANDERSON DAM" "ARBON 2 NW" ...
##  $ Lat       : num  43 42.8 43.4 42.5 43.6 ...
##  $ Long      : num  -113 -113 -115 -113 -113 ...
##  $ ELEV      : num  4405 4405 3882 5210 5325 ...
##  $ ANN_PREC  : num  9.24 12.43 20.36 16.34 10.25 ...
##  $ geometry  :sfc_POINT of length 175; first list element: Classes 'XY', 'POINT', 'sfg'  num [1:2] 595173 206145
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA
##   ..- attr(*, "names")= chr  "No" "COOPID" "STATION_NA" "Lat" ...

可视化stations数据,包括显示降水的时空变化情况。分别采用ggplot2

stations %>% ggplot() + geom_sf(aes(size = ANN_PREC, colour = ANN_PREC))

plot(stations)

idoutlgd轮廓的栅格数据

bound <- raster("idoutlgd/w001001.adf")
sum(bound)
## class       : RasterLayer 
## dimensions  : 392, 252, 98784  (nrow, ncol, ncell)
## resolution  : 2000, 2000  (x, y)
## extent      : 241697.3, 745697.3, 98724.95, 882724.9  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=tmerc +lat_0=42 +lon_0=-114 +k=0.9996 +x_0=500000 +y_0=100000 +datum=NAD27 +units=m +no_defs +ellps=clrk66 +nadgrids=@conus,@alaska,@ntv2_0.gsb,@ntv1_can.dat 
## data source : C:\Users\wangjh\Documents\GitHub\ucasgis\script\chap15\idoutlgd\w001001.adf 
## names       : w001001 
## values      : 1, 1  (min, max)
plot(bound)

step 2: 探索性数据分析

降水数据的统计分布

ggplot(stations, aes(ANN_PREC)) +
  geom_histogram() +
  geom_density()

研究降水的空间趋势变化

  1. 降水沿着经度的变化
ggplot(stations, aes(x = Long, y = ANN_PREC)) +
  geom_point(aes(colour = ANN_PREC)) +
  geom_smooth()

  1. 降水沿着纬度的变化
ggplot(stations, aes(x = Lat, y = ANN_PREC)) +
  geom_point(aes(size = ANN_PREC)) +
  geom_smooth()

step 3: 整体多项式插值

1st order polynomial fit

# Define the 1st order polynomial equation
f.1 <- as.formula(ANN_PREC ~ X + Y)
# Add X and Y to station and bound
coord <- st_coordinates(stations)
stations$X <- as.vector(coord[,1])
stations$Y <- as.vector(coord[,2])
# Run the regression model
lm.1 <- lm( f.1, data = stations)
# Use the regression model output to interpolate the surface
grd <- as(bound, 'SpatialGridDataFrame')
coord <- coordinates(grd)
grd$X <- coord[,1]
grd$Y <- coord[,2]
dat.1st <- SpatialGridDataFrame(grd, data.frame(var1.pred = predict(lm.1, newdata=grd))) 
# Clip the interpolated raster to bound
r <- raster(dat.1st)
r.m <- mask(r, bound)
# Plot the map
tm_shape(r.m) + 
  tm_raster(n=10, palette="RdBu", auto.palette.mapping=FALSE, 
            title="Predicted precipitation") +
  tm_shape(stations) + tm_dots(size=0.2) +
  tm_legend(legend.outside=TRUE)

2st order polynomial fit

# Define the 2nd order polynomial equation
f.2 <- as.formula(ANN_PREC ~ X + Y + I(X*X)+I(Y*Y) + I(X*Y))
# Add X and Y to station and bound
coord <- st_coordinates(stations)
stations$X <- as.vector(coord[,1])
stations$Y <- as.vector(coord[,2])
# Run the regression model
lm.2 <- lm( f.2, data= stations)
# Use the regression model output to interpolate the surface
dat.2nd <- SpatialGridDataFrame(grd, data.frame(var1.pred = predict(lm.2, newdata=grd))) 
# Clip the interpolated raster to bound
r   <- raster(dat.2nd)
r.m <- mask(r, bound)
# Plot the map
tm_shape(r.m) + 
  tm_raster(n=10, palette="RdBu", auto.palette.mapping=FALSE, 
            title="Predicted precipitation") +
  tm_shape(stations) + tm_dots(size=0.2) +
  tm_legend(legend.outside=TRUE)

更高阶多项式插值

方法同上。

练习2:利用核密度估计法

所需数据:

  • deer.shp: 显示鹿的位置的点shapefile。

习作二利用核密度估计法计算每公顷范围内鹿的平均数目。鹿的位置数据的最小分辨率为50m。

step 1:加载鹿的数据集

deer <- read_sf("deer.shp")
deer %>% ggplot() + geom_sf(aes(size = SIGHTINGS, colour = SIGHTINGS))

plot(deer)

deer <- sf::st_transform(deer, "+init=epsg:4326")
coord <- st_coordinates(deer)
deer$X <- as.vector(coord[,1])
deer$Y <- as.vector(coord[,2])
map <- get_map(location = c(lon = -133.205, lat = 55.912), zoom = 15, 
               maptype = "terrain", source = "google", language = "zh-CN")
ggmap(map) +
  geom_density2d(data = deer, aes(x = X, y = Y), alpha = 0.5, colour = "blue", bins = 8) +
  stat_density2d(data = deer, aes(x = X, y = Y, fill = ..level.., alpha = ..level..),
                   bins = 30, geom = "polygon", show.legend = T, na.rm = T) +
  scale_fill_gradient(low = "green", high = "red") +
  scale_alpha(range = c(0.00, 0.3), guide = FALSE) +
  geom_point(data = deer, aes(x = X, y = Y, size = SIGHTINGS, colour = SIGHTINGS)) +
  labs(x="Longitude", y="Latitude") +
  theme(legend.position = "none")

练习3:利用IDW做插值

stations.sp <- as(stations, 'Spatial')
# Interpolate the grid cells using a power value of 2 (idp=2.0)
P.idw <- gstat::idw(ANN_PREC ~ 1, stations.sp, newdata=grd, idp=2.0, nmax = 10)
## [inverse distance weighted interpolation]
# Convert to raster object then clip to Texas
r       <- raster(P.idw)
r.m     <- mask(r, bound)
# Plot
tm_shape(r.m) + 
  tm_raster(n=10,palette = "RdBu", auto.palette.mapping = FALSE,
            title="Predicted precipitation") + 
  tm_shape(stations.sp) + tm_dots(size=0.2) +
  tm_legend(legend.outside=TRUE)

练习4:用普通克里金作插值

所需数据同练习1. 主要利用到R中的gstat包中的krige函数, 用法如下:

krige.spatial(formula, locations, newdata, model, ..., beta, nmax
= Inf, nmin = 0, omax = 0, maxdist = Inf, block, nsim = 0, indicators = FALSE,
na.action = na.pass, debug.level = 1)
# calculate point variogram
v <- variogram(ANN_PREC ~ 1, stations.sp, width = 82000, cutoff = 82000 * 12, cloud = TRUE)
plot(v)

v <- variogram(ANN_PREC ~ 1, stations.sp, width = 82000, cutoff = 82000 * 12, cloud = FALSE)
plot(v)

v <- variogram(ANN_PREC ~ 1, stations.sp, width = 10000, cutoff = 10000 * 20, cloud = FALSE)
plot(v)

v
##     np       dist     gamma dir.hor dir.ver   id
## 1   12   6364.006  4.896962       0       0 var1
## 2   39  15937.429  5.741090       0       0 var1
## 3   84  25886.166  6.885303       0       0 var1
## 4  109  35001.288 15.556365       0       0 var1
## 5  165  45257.275 19.140268       0       0 var1
## 6  175  55293.822 21.017925       0       0 var1
## 7  196  64905.289 25.041095       0       0 var1
## 8  204  75090.839 32.039652       0       0 var1
## 9  227  84848.605 25.311715       0       0 var1
## 10 250  95258.406 35.023428       0       0 var1
## 11 255 105279.148 35.500864       0       0 var1
## 12 252 115009.221 35.359237       0       0 var1
## 13 278 124967.455 40.520411       0       0 var1
## 14 281 135216.481 43.365183       0       0 var1
## 15 283 145311.521 34.323459       0       0 var1
## 16 276 155062.186 34.500968       0       0 var1
## 17 346 164874.382 43.555624       0       0 var1
## 18 321 175020.046 46.734423       0       0 var1
## 19 267 185135.665 35.246195       0       0 var1
## 20 320 194879.755 39.995502       0       0 var1
# fit variogram
v.fit <- fit.variogram(v, vgm(psill = 40, model = "Sph", range = 100000, nugget = 0))
v.fit
##   model     psill  range
## 1   Nug  1.034207      0
## 2   Sph 39.194364 153090
plot(v, v.fit)

# kriging
ok <- krige(ANN_PREC ~ 1, stations.sp, grd, v.fit)
## [using ordinary kriging]
# Convert kriged surface to a raster object for clipping
r <- raster(ok["var1.pred"])
r.m <- mask(r, bound)
rv <- raster(ok["var1.var"])
rv.m <- mask(rv, bound)
# Plot the prediction
tm_shape(r.m) + 
  tm_raster(n=10, palette="RdBu", auto.palette.mapping=FALSE, 
            title="Predicted precipitation") +
  tm_shape(stations) + tm_dots(size=0.2) +
  tm_legend(legend.outside=TRUE)

# Plot the prediction error variance
tm_shape(rv.m) + 
  tm_raster(n=10, palette="RdBu", auto.palette.mapping=FALSE, 
            title="Predicted precipitation\n error variance") +
  tm_shape(stations) + tm_dots(size=0.2) +
  tm_legend(legend.outside=TRUE)

练习5:用泛克里金作插值

所需数据同练习1.

# calculate point variogram
v <- variogram(ANN_PREC ~ X + Y, stations.sp, width = 10000, cutoff = 10000 * 20, cloud = FALSE)
plot(v)

# fit variogram
v.fit <- fit.variogram(v, vgm(psill = 40, model = "Sph", range = 100000, nugget = 0))
v.fit
##   model     psill    range
## 1   Nug  1.287904      0.0
## 2   Sph 35.609932 151016.1
plot(v, v.fit)

# kriging
uk <- krige(ANN_PREC ~ X + Y, stations.sp, grd, v.fit)
## [using universal kriging]
# Convert kriged surface to a raster object for clipping
r <- raster(uk["var1.pred"])
r.m <- mask(r, bound)
rv <- raster(uk["var1.var"])
rv.m <- mask(rv, bound)
# Plot the map
tm_shape(r.m) + 
  tm_raster(n=10, palette="RdBu", auto.palette.mapping=FALSE, 
            title="Predicted precipitation") +
  tm_shape(stations) + tm_dots(size=0.2) +
  tm_legend(legend.outside=TRUE)

# Plot the map
tm_shape(rv.m) + 
  tm_raster(n=10, palette="RdBu", auto.palette.mapping=FALSE, 
            title="Predicted precipitation\n error variance") +
  tm_shape(stations) + tm_dots(size=0.2) +
  tm_legend(legend.outside=TRUE)