Distanza da una città all'altra tramite un mutuo ID
Non capisco affatto spaziali.data. Ho studiato ma mi manca qualcosa.
Cosa ho: data.frame enterprises
con le colonne: id, parent_subsidiary, city_cod.
Cosa mi serve: la distanza media e massima dalla città madre alle città sussidiarie.
Ex:
id | mean_dist | max_dist
1111 | 25km | 50km
232 | 110km | 180km
333 | 0km | 0km
Cosa ho fatto :
library("tidyverse")
library("sf")
# library("brazilmaps") not working anymore
library("geobr")
parent <- enterprises %>% filter(parent_subsidiary==1)
subsidiary <- enterprises %>% filter(parent_subsidiary==2)
# Cities - polygons
m_city_br <- read_municipality(code_muni="all", year=2019)
# or shp_city<- st_read("/BR_Municipios_2019.shp")
# data.frame with the column geom
map_parent <- left_join(parent, m_city_br, by=c("city_cod"="code_muni"))
map_subsidiary <- left_join(subsidiary, m_city_br, by=c("city_cod"="code_muni"))
st_distance(map_parent$geom[1],map_subsidiary$geom[2]) %>% units::set_units(km)
# it took a long time and the result is different from google.maps
# is it ok?!
# To do by ID -- I also stucked here
distance_p_s <- data.frame(id=as.numeric(),subsidiar=as.numeric(),mean_dist=as.numeric(),max_dist=as.numeric())
id_v <- as.vector(parent$id)
for (i in 1:length(id_v)){
test_p <- map_parent %>% filter(id==id_v[i])
test_s <- map_subsidiary %>% filter(id==id_v[i])
total <- 0
value <- 0
max <- 0
l <- 0
l <- nrow(test_s)
for (j in 1:l){
value <- as.numeric(round(st_distance(test_p$geom[1],test_s$geom[j]) %>% units::set_units(km),2))
total <- total + value
ifelse(value>max,max<-value,NA)
}
mean_dist <- total/l
done <- data.frame(id=id[i],subsidiary=l,mean_dist=round(mean_dist,2),max_dist=max)
distance_p_s <- rbind(distance_p_s,done)
rm(done)
}
}
È giusto? Posso calcolare il baricentro delle città e poi calcolare la distanza?
Esempio di dati : structure(list(id = c("1111", "1111", "1111", "1111", "232", "232", "232", "232", "3123", "3123", "4455", "4455", "686", "333", "333", "14112", "14112", "14112", "3633", "3633","77172","77172"), parent_subsidiary = c("1","2", "2", "2", "1", "2", "2", "2", "1", "2", "1", "2", "1", "2", "1", "1", "2", "2", "1", "2","1","2"), city_cod = c(4305801L,4202404L, 4314803L, 4314902L, 4318705L, 1303403L, 4304507L, 4314100L, 2408102L, 3144409L, 5208707L, 4205407L, 5210000L, 3203908L, 3518800L, 3118601L, 4217303L, 3118601L, 5003702L, 5205109L,4111407L,4110102L)), row.names = c(NA, 22L), class = "data.frame")
PS: queste sono le città brasilianehttps://github.com/ipeaGIT/geobr/tree/master/r-package
Risposte
Dopo aver convertito map parent
e map subsidiary
in sf
oggetti (con la funzione st_as_sf()
la tua chiamata a st_distance
restituirà una matrice di distanza; quindi devi solo ottenere max
e mean
per ogni riga.
Per quanto riguarda la differenza tra la distanza di googlemaps, st_distance
sta restituendo la distanza "dritta" (in questo caso è una distanza circolare piuttosto grande), mentre google ti dà la distanza lungo un percorso.
library("tidyverse")
library("sf")
# library("brazilmaps") not working anymore
library("geobr")
library(janitor)
parent <- enterprises %>% filter(parent_subsidiary==1)
subsidiary <- enterprises %>% filter(parent_subsidiary==2)
map_parent <- left_join(parent, m_city_br, by=c("city_cod"="code_muni")) %>%
st_as_sf()
map_subsidiary <- left_join(subsidiary, m_city_br, by=c("city_cod"="code_muni")) %>%
st_as_sf()
dist_matrix = st_distance(map_parent, map_subsidiary)
dist_matrix %>% as.data.frame() %>%
set_names(map_subsidiary$name_muni) %>% janitor::clean_names() %>%
mutate(id = map_parent$id) %>% rowwise() %>%
mutate(max_d = max(blumenau:catalao), mean_d = mean(blumenau:catalao)) %>%
select(id, mean_d, max_d)
Source: local data frame [8 x 3]
Groups: <by row>
# A tibble: 8 x 3
id mean_d max_d
<chr> <dbl> <dbl>
1 1111 751800. 1128379.
2 232 812441. 1279803.
3 3123 2283090. 2726233.
4 4455 625060. 1083423.
5 686 671740. 1127195.
6 333 491425. 553784.
7 14112 640124. 897345.
8 3633 735219. 756007.