#install.packages(rgdal) #install.packages(dplyr) #install.packages(sf) #install.packages(data.table) #install.packages(lwgeom) #install.packages(ggplot2) #install.packages(ggpubr) require(rgdal) require(dplyr) library(sf) library(data.table) library(lwgeom) require(ggplot2) library(ggpubr) #---------------------------------------------------------------------------------------------# # # # PRE-PROCESSING # #---------------------------------------------------------------------------------------------# #BRITTANY M KRZYZANOWSKI #UMN DEPT OF GEOGRAPHY, ENVIRONMENT & SOCIETY #read in the shapefile of the regions regions<- readOGR("C:\\FilePath", "shapefile_name") #assign projection proj4string(regions) <- CRS("+proj=utm +zone=15 +ellps=GRS80 +datum=NAD83") writeOGR(regions,"C:\\FilePath", "regionsA",driver="ESRI Shapefile",overwrite_layer=TRUE) regions<- st_read("C:\\FilePath", "regionsA") #read in base units bsu<- readOGR("C:\\FilePath", "metro_bg_full") #create deciles of median household income bsu$decile <- ntile(bsu$MEDHINC_CY,10) #save it writeOGR(bsu,"C:\\FilePath", "deciles",driver="ESRI Shapefile",overwrite_layer=TRUE) deciles<- st_read("C:\\FilePath", "deciles") #join the baseunits to the regions on the region ID bsu_regions_join <- st_join(deciles, left = FALSE, regions["region_ID"]) #save the joined shapefile st_write(bsu_regions_join, "C:\\FilePath",update=TRUE) br10<- readOGR("C:\\FilePath", "bsu_regions") #create the deciles br10$d1 = ifelse(br10$decile=="1", br10$MEDHINC_CY, "0") br10$d2 = ifelse(br10$decile=="2", br10$MEDHINC_CY, "0") br10$d3 = ifelse(br10$decile=="3", br10$MEDHINC_CY, "0") br10$d4 = ifelse(br10$decile=="4", br10$MEDHINC_CY, "0") br10$d5 = ifelse(br10$decile=="5", br10$MEDHINC_CY, "0") br10$d6 = ifelse(br10$decile=="6", br10$MEDHINC_CY, "0") br10$d7 = ifelse(br10$decile=="7", br10$MEDHINC_CY, "0") br10$d8 = ifelse(br10$decile=="8", br10$MEDHINC_CY, "0") br10$d9 = ifelse(br10$decile=="9", br10$MEDHINC_CY, "0") br10$d10 = ifelse(br10$decile=="10", br10$MEDHINC_CY, "0") #save it as a shapefile writeOGR(br10,"C:\\FilePath", "bsu_regions_deciles",driver="ESRI Shapefile",overwrite_layer=TRUE) bsu_re_dec<- readOGR("C:\\FilePath", "bsu_regions_deciles") #write csv write.csv(bsu_re_dec, "C:\\FilePath\\bsu_re_dec.csv") HI_csv<- read.csv("C:\\FilePath\\bsu_re_dec.csv") #group by region ID and sum by decile HI_regions <- HI_csv %>% group_by(region_ID) %>% summarise(sum(d1), sum(d2), sum(d3),sum(d4), sum(d5), sum(d6),sum(d7), sum(d8), sum(d9), sum(d10)) write.csv(HI_regions, "C:\\FilePath\\HI_regions.csv",row.names=FALSE) #format the table so that it can run through Pinzari's homogeneity index script smoothly tmp <- read.csv("C:\\FilePath\\HI_regions.csv") tmp3 <- tmp <- cbind(tmp,"id","SA3_code","SA3_name","state_code") formatted_tmp = tmp3 %>% select("\"id\"","\"SA3_code\"","\"SA3_name\"","\"state_code\"","region_ID","sum.d1.","sum.d2.","sum.d3.","sum.d4.","sum.d5.","sum.d6.","sum.d7.","sum.d8.","sum.d9.","sum.d10.") names(formatted_tmp)[names(formatted_tmp) == "\"id\""] <- "id" names(formatted_tmp)[names(formatted_tmp) == "\"SA3_code\""] <- "SA3_code" names(formatted_tmp)[names(formatted_tmp) == "\"SA3_name\""] <- "SA3_name" names(formatted_tmp)[names(formatted_tmp) == "\"state_code\""] <- "state_code" names(formatted_tmp)[names(formatted_tmp) == "sum.d1."] <- "d1" names(formatted_tmp)[names(formatted_tmp) == "sum.d2."] <- "d2" names(formatted_tmp)[names(formatted_tmp) == "sum.d3."] <- "d3" names(formatted_tmp)[names(formatted_tmp) == "sum.d4."] <- "d4" names(formatted_tmp)[names(formatted_tmp) == "sum.d5."] <- "d5" names(formatted_tmp)[names(formatted_tmp) == "sum.d6."] <- "d6" names(formatted_tmp)[names(formatted_tmp) == "sum.d7."] <- "d7" names(formatted_tmp)[names(formatted_tmp) == "sum.d8."] <- "d8" names(formatted_tmp)[names(formatted_tmp) == "sum.d9."] <- "d9" names(formatted_tmp)[names(formatted_tmp) == "sum.d10."] <- "d10" write.csv(formatted_tmp, "C:\\FilePath\\HI_LP_formatted.csv",row.names=FALSE) ################################################################################################ # Title : hi_li.R # Author : Ludovico Pinzari ################################################################################################ uni.concI <- function(p){ d <- length(p) pop <- sum(p) i <- 1 while(i <= d) { p[i] <- p[i]/pop i <- i+1 } p<-sort(p) lc <- rep(0,d+1) i <- 1 while(i <= d) { if(i==1) lc[2]<-lc[1]+p[1] else lc[i+1]<-lc[i]+p[i] i<- i+1 } b <- 1/d i <- 1 imp_area <- 1-b la <- rep(0,d) while( i <= d) { la[i] <- b*(lc[i]+lc[i+1])/(2*imp_area) i <- i+1 } pu <- rep(1/d,d) lcu <- rep(0,d+1) i <- 1 while(i <= d) { if(i==1) lcu[2] <- lcu[1]+pu[1] else lcu[i+1] <- lcu[i]+pu[i] i<- i+1 } lau <- rep(0,d) i<-1 while( i <= d) { lau[i] <- b*(lcu[i]+lcu[i+1])/(2*imp_area) i <- i+1 } Area <- 0 i<- 1 while( i <= d) { Area <- Area + lau[i]-la[i] i <- i+1 } Area <-round(Area,4) return(2*Area) } uni.conv <- function(x,y){ m <- length(x) n <- length(y) z <- numeric(m+n-1) for(j in 1:m){ for(k in 1:n){ z[j+k-1] = z[j+k-1]+x[j]*y[k] } } return(z) } uni.corr <- function(x){ R <- uni.conv(x,rev(x)) return(R) } uni.div <- function(x){ d <- length(x) comb <- rep(1,d) bcdf <- uni.conv(x,comb) sbcdf <- uni.corr(bcdf) imp <- rep(0,d) imp[1] <- 1 bcdfi <- uni.conv(imp,comb) sbcdfi <- uni.corr(bcdfi) E <- sum(sbcdfi) sbcdf <- sbcdf/E sbcdfi <- sbcdfi/E l <- length(sbcdf) lgs1 <- rep(0,l) lgsi <- rep(0,l) i <- 1 while (i <= l){ if(sbcdf[i] > 0) lgs1[i] <- log(sbcdf[i],2) if(sbcdfi[i] > 0) lgsi[i] <- log(sbcdfi[i],2) i <- i+1 } M <- rep(0,l) i <- 1 while (i <= l){ M[i] <- (sbcdf[i]+sbcdfi[i])/2 if (M[i] > 0) M[i] <- -log(M[i],2) i <- i+1 } i <- 1 div <- 0 while (i <= l){ im <- sbcdfi[i]*(lgsi[i]+M[i]) is <- sbcdf[i]*(lgs1[i]+M[i]) div <- div + im + is i <- i+1 } return(div) } uni.divConst <- function(n){ p <- rep(0,n) # pdf p[1] <- 0.5 p[n] <- 0.5 const <- uni.div(p) return(const) } uni.hom <- function(p){ conc <- uni.concI(p) d <- length(p) pop <- sum(p) i <- 1 while(i <= d) { p[i] <- p[i]/pop i <- i+1 } div <- uni.div(p) uniform <- rep(1/d,d) divUnif <- uni.div(uniform) H <- (conc + divUnif - div)/(1 + divUnif) return(H) } uni.hom_mn <- function(m,n) { p <- rep(0,m) for(j in 1:n){ p[j] <- 1/n } h <- uni.hom(p) return (h) } uni.hom_class <- function(a,b,c,p) { h <- uni.hom(p) if (h >= a) { class <- 'A' } else if (h >= b) { class <- 'B' } else if (h >= c) { class <- 'C' } else { class <- 'D' } return (class) } uni.locVec <- function(x){ n <- length(x) i <- 1 ## iterator for the bins s <- 0 ## current interval score vs <- rep(0,n) ## cumulative score k <- rep(0,n) ## bins scores while(i <= n){ j <- 0 ## iterator for the nested intervals (width) s <- 0 vs <- rep(0,n) while(j < n){ if(j == 0){ s <- x[i] ## interval width zero (initial point) }else{ fw <- i+j ## interval border right maxp <- i-j ## interval border left if (maxp >= 1 && fw <= n){ s <- s+x[maxp]+x[fw] }else if (maxp >= 1){ s <- s+x[maxp] }else if (fw <= n){ s <- s+x[fw] } } vs[j+1] <- s j <- j+1 } k[i] <- sum(vs) i<- i+1 } ## Normalized score (singleton is n) k <- (1/n)*k return(k) } uni.loc <- function(x){ n <- length(x) loc1 <- 0 ## minimum position of the bin with maximum score loc2 <- 0 ## maximum position of the bin with maximum score ## compute the Location Index vector and the maximum score v <- uni.locVec(x) m <- max(v) for (j in 1:n){ if (v[j] == m){ if (loc1 > 0) loc2 <- j else{ loc1 <- j loc2 <- j } } } cl <- c(loc1,loc2) return (cl) } uni.homTable <- function(x){ df <- data.frame(id = integer(), SA3_code = character(), SA3_name = character(), State_code = character(), region_ID = character(), d1 = integer(), d2 = integer(), d3 = integer(), d4 = integer(), d5 = integer(), d6 = integer(), d7 = integer(), d8 = integer(), d9 = integer(), d10 = integer(), Hom = double(), CI = double(), DI = double(), LI = integer(), CL = character(), stringsAsFactors = FALSE ) cl_a <- uni.hom_mn(10,4) cl_b <- uni.hom_mn(10,5) cl_c <- uni.hom_mn(10,6) rows <- nrow(x) for(i in 1:rows){ pop <- as.numeric(x[i,6:15]) tot <- sum(pop) pdf <- pop/tot df[i,1] <- as.numeric(x[i,1]) ## Sa3 id df[i,2] <- as.character(x[i,2]) ## Sa3 code df[i,3] <- as.character(x[i,3]) ## SA3 name df[i,4] <- as.character(x[i,4]) ## State code df[i,5] <- as.character(x[i,5]) ## region_ID df[i,6] <- as.numeric(x[i,6]) ## decile 1 df[i,7] <- as.numeric(x[i,7]) ## decile 2 df[i,8] <- as.numeric(x[i,8]) ## decile 3 df[i,9] <- as.numeric(x[i,9]) ## decile 4 df[i,10] <- as.numeric(x[i,10]) ## decile 5 df[i,11] <- as.numeric(x[i,11]) ## decile 6 df[i,12] <- as.numeric(x[i,12]) ## decile 7 df[i,13] <- as.numeric(x[i,13]) ## decile 8 df[i,14] <- as.numeric(x[i,14]) ## decile 9 df[i,15] <- as.numeric(x[i,15]) ## decile 10 df[i,16] <- uni.hom(pop) ## compute the Homogenetiy index df[i,17] <- uni.concI(pop) ## compute the Concentration Index div <- uni.div(pdf) ## compute the Divergence Index const <- uni.divConst(10) ## ratio constant df[i,18] <- div/const loc <- uni.loc(pdf) ## compute the location Index df[i,19] <- loc[1] ## Location Index df[i,20] <- uni.hom_class(cl_a,cl_b,cl_c,pdf) ## Homogeneity class } return (df) } uni.homTable <- function(x){ # data frame initialization df <- data.frame(id = integer(), SA3_code = character(), SA3_name = character(), State_code = character(), region_ID = character(), d1 = integer(), d2 = integer(), d3 = integer(), d4 = integer(), d5 = integer(), d6 = integer(), d7 = integer(), d8 = integer(), d9 = integer(), d10 = integer(), Hom = double(), CI = double(), DI = double(), LI = integer(), CL = character(), stringsAsFactors = FALSE ) # definition of Homogeneity class lower bound cl_a <- uni.hom_mn(10,4) cl_b <- uni.hom_mn(10,5) cl_c <- uni.hom_mn(10,6) # compute the number of rows rows <- nrow(x) for(i in 1:rows){ pop <- as.numeric(x[i,6:15]) ## compute the pdf tot <- sum(pop) pdf <- pop/tot ## copy the first fifteen columns df[i,1] <- as.numeric(x[i,1]) ## Sa3 id df[i,2] <- as.character(x[i,2]) ## Sa3 code df[i,3] <- as.character(x[i,3]) ## SA3 name df[i,4] <- as.character(x[i,4]) ## State code df[i,5] <- as.character(x[i,5]) ## region_ID df[i,6] <- as.numeric(x[i,6]) ## decile 1 df[i,7] <- as.numeric(x[i,7]) ## decile 2 df[i,8] <- as.numeric(x[i,8]) ## decile 3 df[i,9] <- as.numeric(x[i,9]) ## decile 4 df[i,10] <- as.numeric(x[i,10]) ## decile 5 df[i,11] <- as.numeric(x[i,11]) ## decile 6 df[i,12] <- as.numeric(x[i,12]) ## decile 7 df[i,13] <- as.numeric(x[i,13]) ## decile 8 df[i,14] <- as.numeric(x[i,14]) ## decile 9 df[i,15] <- as.numeric(x[i,15]) ## decile 10 ## compute statistical indices df[i,16] <- uni.hom(pop) ## compute the Homogenetiy index df[i,17] <- uni.concI(pop) ## compute the Concentration Index div <- uni.div(pdf) ## compute the Divergence Index const <- uni.divConst(10) ## ratio constant df[i,18] <- div/const loc <- uni.loc(pdf) ## compute the location Index ## loc is a vector 1 x 2 ## since we do not have bimodal pdf the two values are equal df[i,19] <- loc[1] ## Location Index ## Homogeneity classifcation df[i,20] <- uni.hom_class(cl_a,cl_b,cl_c,pdf) ## Homogeneity class } return (df) } ##################################################################################### ##################################################################################### setwd(""C:\\FilePath\\WhereYouWantOutput\\") input <- read.csv("HI_LP_formatted.csv") x <- data.frame(lapply(input,as.character),stringsAsFactors=FALSE) t <- uni.homTable(x) write.csv(t, file = "LP_HI_CL")