::p_load(tidyverse, sf, httr, tmap)
pacmantmap_mode("plot")
tmap_style("natural")
In-class Ex 4: Geocoding
Setup Environment
httr
- work with HTML pages
Geocoding
Use https://www.onemap.gov.sg/apidocs/
X, Y in SVY21, longitude latitude in WGS84
<- "https://www.onemap.gov.sg/api/common/elastic/search"
url
<- read_csv("data/aspatial/GeneralInformationofschools.csv")
csv <- csv$"postal_code"
postcodes
<- data.frame()
found <- data.frame()
not_found
for(postcode in postcodes){
<- list("searchVal"=postcode,"returnGeom"="Y", "getAddrDetails"="Y","pageNum"="1")
query <- GET(url, query=query)
res
if((content(res)$found) !=0){
<- rbind(found, data.frame(content(res))[4:13])
found else {
} = data.frame(postcode)
not_found
}
}
write_rds(found, "data/rds/found.rds")
write_rds(not_found, "data/rds/not_found.rds")
<- read_csv("data/aspatial/GeneralInformationofschools.csv")
csv <- read_rds("data/rds/found.rds")
found <- read_rds("data/rds/not_found.rds")
not_found = merge(csv, found, by.x = "postal_code", by.y = "results.POSTAL", all = TRUE)
merged write.csv(merged, file = "data/aspatial/schools.csv")
write.csv(not_found, file = "data/aspatial/not_found.csv")
Important
Use Google to look for the school without geospatial data.
Replace latitude with 1.3887, longitude with 103.7652.
Do this before proceeding to the next step. Or else st_as_sf()
will complain about missing longitude and latitude
Preparation
Importing geocoded schools
Import
schools.csv
Rename column names
Retain only relevant columns
<- read_csv("data/aspatial/schools.csv") %>%
schools rename(latitude = results.LATITUDE,
longitude = results.LONGITUDE) %>%
select(postal_code, school_name, latitude, longitude)
<- st_as_sf(schools,
schools_sf coords = c("longitude", "latitude"),
crs=4326) %>%
st_transform(crs = 3414)
Confirming projection
Note
I used the projected mpsz from previous exercises.
<- read_rds("data/rds/mpsz.rds") mpsz
tm_shape(mpsz) +
tm_polygons(col = "lightgreen") +
tm_shape(schools_sf) +
tm_dots(col = "red", size = 0.025) #+
#tm_view(set.zoom.limits = c(11, 14))
Finding how many schools per planning subzone using point in polygon count
$SCHOOL_COUNT <- lengths(st_intersects(mpsz, schools_sf)) mpsz
Inspect the summary of statistics
summary(mpsz$SCHOOL_COUNT)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.000 0.000 0.000 1.054 2.000 12.000
Importing business geospatial data
<- st_read(dsn = "data/geospatial",
businesses layer = "Business")
Reading layer `Business' from data source
`/Users/kjcpaas/Documents/Grad School/ISSS624/Project/ISSS624/In-class_Ex4/data/geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 6550 features and 3 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 3669.148 ymin: 25408.41 xmax: 47034.83 ymax: 50148.54
Projected CRS: SVY21 / Singapore TM
tmap_options(check.and.fix = TRUE)
tm_shape(mpsz) +
tm_polygons(col = "lightgreen") +
tm_shape(businesses) +
tm_dots(col = "red", size = 0.01)
Data integration and wrangling
<- read_rds("data/rds/flow_data_tidy.rds") flow_data
# Check how to make this work
<- flow_data %>% left_join(mpsz, by= c("DESTIN_SZ", "SUBZONE_C")) flow_data