geogThin <- function(pointsFrame, minDist, distFunct=NULL, longLatVars=c('longitude', 'latitude'), verbose=FALSE) {
# geogThin For a set of geographic points, this script eliminates the least number of points necessary to remove all points within minDist of one another. This is useful for reducing spatial autocorrelation in geographic data. The coordinates must be in unprojected (WGS84) decimal degrees format. For a set of points, some of which are within minDist of one another, points are removed in order so that the first points removed have the greatest number of neighbors within minDist, where ties are broken so that the point with the *minimum* distance ( 1 because one remaining point will have been too close to the last removed point)
while (numClosePairs >= 1 & nrow(pointsFrame) > 1) {
# diplay progress
if (verbose) { cat(numClosePairs, 'pair(s) with neighbors too close...\n' ); flush.console() }
# initialize variable to store minimum distance to any point
metricOfDistToAnyPointTooClose <- numeric()
# variable to store number of points too close to this point
numPointsTooClose <- numeric()
# for each point calculate distance to all other points
for (countPoints in 1:nrow(pointsFrame)) {
theseDists <- distFunct(
pointsFrame[-countPoints, longLatVars],
pointsFrame[countPoints, longLatVars]
)
# remember number of points too close to this one and some metric (mean, min) of dist to all points too close
numPointsTooClose <- c( numPointsTooClose, sum(theseDists < minDist) )
metricOfDistToAnyPointTooClose <- c( metricOfDistToAnyPointTooClose, ifelse( sum(theseDists < minDist) < 1, Inf, min(theseDists) ) )
} # for each point calculate distance to all other points
# calculate geographic center of all points (assumes points on a plane)
center <- gCentroid(spgeom=SpatialPoints(coords=pointsFrame[ , longLatVars], proj4string=CRS('+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0')))
# initialize variable to store distance to center of all points
distToCenterOfAllPoints <- numeric()
# for each point calculate distance to center of all points
for (countPoints in 1:nrow(pointsFrame)) {
distToCenterOfAllPoints <- c(
distToCenterOfAllPoints,
distFunct(
center,
pointsFrame[countPoints, longLatVars]
)
)
} # for each point calculate distance to center of all points
# sort by number of points each point is too close to, break ties with min dist, then mean geographic distance to center of all points
pointsFrame <- pointsFrame[
order(
-numPointsTooClose,
metricOfDistToAnyPointTooClose,
distToCenterOfAllPoints
),
]
# remove most offensive point (point with most number of closest neighbors/closest neighbor)
pointsFrame <- pointsFrame[ -1, ]
# remove value with greatest number of points too close
numPointsTooClose <- numPointsTooClose[ -which.max( numPointsTooClose ) ]
dists <- apply(X=pointsFrame[ , longLatVars], MARGIN=1, FUN=distFunct, pointsFrame[ , longLatVars])
numClosePairs <- sum((dists < minDist) * upper.tri(dists), na.rm=TRUE)
} # loop while there is at least one point with another point too close
pointsFrame <- pointsFrame[sort(rownames(pointsFrame)), ]
return(pointsFrame)
}