#install.packages(rgdal)
#install.packages(dplyr)
#install.packages(sf)
#install.packages(data.table)
#install.packages(lwgeom)
#install.packages(ggplot2)
#install.packages(ggpubr)


require(rgdal)
require(dplyr)
library(sf)
library(data.table)
library(lwgeom)
require(ggplot2)
library(ggpubr)


#---------------------------------------------------------------------------------------------#
#                                                                                             #
#                                       PRE-PROCESSING                                        #
#---------------------------------------------------------------------------------------------#

 
#BRITTANY M KRZYZANOWSKI
#UMN DEPT OF GEOGRAPHY, ENVIRONMENT & SOCIETY

#read in the shapefile of the regions
regions<- readOGR("C:\\FilePath", "shapefile_name") 
#assign projection
proj4string(regions) <- CRS("+proj=utm +zone=15 +ellps=GRS80 +datum=NAD83")
writeOGR(regions,"C:\\FilePath", "regionsA",driver="ESRI Shapefile",overwrite_layer=TRUE)  
regions<- st_read("C:\\FilePath", "regionsA") 

#read in base units 
bsu<- readOGR("C:\\FilePath", "metro_bg_full")  
#create deciles of median household income
bsu$decile <- ntile(bsu$MEDHINC_CY,10)
#save it
writeOGR(bsu,"C:\\FilePath", "deciles",driver="ESRI Shapefile",overwrite_layer=TRUE)  
deciles<- st_read("C:\\FilePath", "deciles")

#join the baseunits to the regions on the region ID 
bsu_regions_join <- st_join(deciles, left = FALSE, regions["region_ID"])

#save the joined shapefile
st_write(bsu_regions_join, "C:\\FilePath",update=TRUE)
br10<- readOGR("C:\\FilePath", "bsu_regions")  

#create the deciles
br10$d1 = ifelse(br10$decile=="1", br10$MEDHINC_CY, "0")
br10$d2 = ifelse(br10$decile=="2", br10$MEDHINC_CY, "0")
br10$d3 = ifelse(br10$decile=="3", br10$MEDHINC_CY, "0")
br10$d4 = ifelse(br10$decile=="4", br10$MEDHINC_CY, "0")
br10$d5 = ifelse(br10$decile=="5", br10$MEDHINC_CY, "0")
br10$d6 = ifelse(br10$decile=="6", br10$MEDHINC_CY, "0")
br10$d7 = ifelse(br10$decile=="7", br10$MEDHINC_CY, "0")
br10$d8 = ifelse(br10$decile=="8", br10$MEDHINC_CY, "0")
br10$d9 = ifelse(br10$decile=="9", br10$MEDHINC_CY, "0")
br10$d10 = ifelse(br10$decile=="10", br10$MEDHINC_CY, "0")

#save it as a shapefile
writeOGR(br10,"C:\\FilePath", "bsu_regions_deciles",driver="ESRI Shapefile",overwrite_layer=TRUE)  

bsu_re_dec<- readOGR("C:\\FilePath", "bsu_regions_deciles")  
#write csv
write.csv(bsu_re_dec, "C:\\FilePath\\bsu_re_dec.csv")

HI_csv<- read.csv("C:\\FilePath\\bsu_re_dec.csv") 

#group by region ID and sum by decile
HI_regions <- HI_csv %>% group_by(region_ID) %>% summarise(sum(d1), sum(d2), sum(d3),sum(d4), sum(d5), sum(d6),sum(d7), sum(d8), sum(d9), sum(d10))
write.csv(HI_regions, "C:\\FilePath\\HI_regions.csv",row.names=FALSE)

#format the table so that it can run through Pinzari's homogeneity index script smoothly
tmp <- read.csv("C:\\FilePath\\HI_regions.csv")
tmp3 <- tmp <- cbind(tmp,"id","SA3_code","SA3_name","state_code")
formatted_tmp = tmp3 %>% select("\"id\"","\"SA3_code\"","\"SA3_name\"","\"state_code\"","region_ID","sum.d1.","sum.d2.","sum.d3.","sum.d4.","sum.d5.","sum.d6.","sum.d7.","sum.d8.","sum.d9.","sum.d10.")

names(formatted_tmp)[names(formatted_tmp) == "\"id\""] <- "id"
names(formatted_tmp)[names(formatted_tmp) == "\"SA3_code\""] <- "SA3_code"
names(formatted_tmp)[names(formatted_tmp) == "\"SA3_name\""] <- "SA3_name"
names(formatted_tmp)[names(formatted_tmp) == "\"state_code\""] <- "state_code"
names(formatted_tmp)[names(formatted_tmp) == "sum.d1."] <- "d1"
names(formatted_tmp)[names(formatted_tmp) == "sum.d2."] <- "d2"
names(formatted_tmp)[names(formatted_tmp) == "sum.d3."] <- "d3"
names(formatted_tmp)[names(formatted_tmp) == "sum.d4."] <- "d4"
names(formatted_tmp)[names(formatted_tmp) == "sum.d5."] <- "d5"
names(formatted_tmp)[names(formatted_tmp) == "sum.d6."] <- "d6"
names(formatted_tmp)[names(formatted_tmp) == "sum.d7."] <- "d7"
names(formatted_tmp)[names(formatted_tmp) == "sum.d8."] <- "d8"
names(formatted_tmp)[names(formatted_tmp) == "sum.d9."] <- "d9"
names(formatted_tmp)[names(formatted_tmp) == "sum.d10."] <- "d10"

write.csv(formatted_tmp, "C:\\FilePath\\HI_LP_formatted.csv",row.names=FALSE)

################################################################################################
# Title       : hi_li.R
# Author      : Ludovico Pinzari
################################################################################################
uni.concI <- function(p){
  
  d <- length(p)
  pop <- sum(p)
  i <- 1
  
  while(i <= d)
  {
    p[i] <- p[i]/pop
    i <- i+1
  }
  
  p<-sort(p) 
  
  
  lc <- rep(0,d+1)
  i <- 1
  
  while(i <= d)
  {
    if(i==1)
      lc[2]<-lc[1]+p[1]
    else
      lc[i+1]<-lc[i]+p[i]
    i<- i+1
  }
  
  
  b <- 1/d 
  i <- 1
  imp_area <- 1-b 
  la <- rep(0,d)
  
  while( i <= d)
  {
    la[i] <- b*(lc[i]+lc[i+1])/(2*imp_area)
    i <- i+1
  }
  
 
  pu <- rep(1/d,d)
  lcu <- rep(0,d+1)
  i <- 1
  
  while(i <= d)
  {
    if(i==1)
      lcu[2] <- lcu[1]+pu[1]
    else
      lcu[i+1] <- lcu[i]+pu[i]
    i<- i+1
  }
     
  lau <- rep(0,d)
  
  i<-1
  
  while( i <= d)
  {
    lau[i] <- b*(lcu[i]+lcu[i+1])/(2*imp_area)
    i <- i+1
  }
   
  Area <- 0
  i<- 1
  
  while( i <= d)
  {
    Area <- Area + lau[i]-la[i]
    i <- i+1
  }
  
  Area <-round(Area,4) 
  
  return(2*Area)
}

uni.conv <- function(x,y){
  
  m <- length(x)
  n <- length(y)
  z <- numeric(m+n-1)
  
  for(j in 1:m){
    for(k in 1:n){
      z[j+k-1] = z[j+k-1]+x[j]*y[k]
    }
  }
  return(z)
}

uni.corr <- function(x){
  
  R <- uni.conv(x,rev(x))
  return(R)
  
}

uni.div <- function(x){
  
  d <- length(x)
  comb <- rep(1,d)
  
  bcdf <- uni.conv(x,comb)
  sbcdf <- uni.corr(bcdf)
  
  imp <- rep(0,d)
  imp[1] <- 1
  
  bcdfi <- uni.conv(imp,comb)
  sbcdfi <- uni.corr(bcdfi)
    
  E <- sum(sbcdfi)
  
  sbcdf <- sbcdf/E
  sbcdfi <- sbcdfi/E
    
  l <- length(sbcdf)
  lgs1 <- rep(0,l)
  lgsi <- rep(0,l)
  
  i <- 1
  
  while (i <= l){
    
    if(sbcdf[i] > 0)
      lgs1[i] <- log(sbcdf[i],2)
    
    if(sbcdfi[i] > 0)
      lgsi[i] <- log(sbcdfi[i],2)
    
    i <- i+1
    
  }
     
  M <- rep(0,l)
  i <- 1
  
  while (i <= l){
    
    M[i] <- (sbcdf[i]+sbcdfi[i])/2
    if (M[i] > 0)
      M[i] <- -log(M[i],2)
    
    i <- i+1
    
  }
    
  i <- 1
  div <- 0
  
  while (i <= l){
    
    im <- sbcdfi[i]*(lgsi[i]+M[i])
    is <- sbcdf[i]*(lgs1[i]+M[i])
    div <- div + im + is
    
    i <- i+1
  }
  
  return(div)
  
}

uni.divConst <- function(n){
    
  p <- rep(0,n) # pdf
  p[1] <- 0.5
  p[n] <- 0.5
  
  
  const <- uni.div(p)
  
  return(const)
  
}


uni.hom <- function(p){
    
  conc <- uni.concI(p)
   
  d <- length(p)
  pop <- sum(p)
  i <- 1
    
  while(i <= d)
  {
    p[i] <- p[i]/pop
    i <- i+1
  }
    
  div <- uni.div(p)
  
  
  uniform <- rep(1/d,d)
  divUnif <- uni.div(uniform)
   
  H <- (conc + divUnif - div)/(1 + divUnif)
  
  return(H)
  
}

uni.hom_mn <- function(m,n) {
  
  
  p <- rep(0,m)
  for(j in 1:n){
    p[j] <- 1/n
  }
  
  h <- uni.hom(p)
  
  return (h)
  
}

uni.hom_class <- function(a,b,c,p) {
  
  h <- uni.hom(p)
  
  if (h >= a) {
    class <- 'A'
  } else if (h >= b) {
    class <- 'B'
  } else if (h >= c) {
    class <- 'C'
  } else {
    class <- 'D'
  }
  
  return (class)
}

uni.locVec <- function(x){
  
  n <- length(x)
  i <- 1           ## iterator for the bins
  s <- 0           ## current interval score
  vs <- rep(0,n)   ## cumulative score
  k <- rep(0,n)    ## bins scores
  
  while(i <= n){
    
    j <- 0         ## iterator for the nested intervals (width)
    s <- 0
    vs <- rep(0,n)
    
    while(j < n){
      
      
      if(j == 0){
        
        s <- x[i]  ## interval width zero (initial point)
        
      }else{
        
        fw <- i+j  ## interval border right
        maxp <- i-j  ## interval border left
        
        
        if (maxp >= 1 && fw <= n){
          s <- s+x[maxp]+x[fw]
        }else if (maxp >= 1){
          s <- s+x[maxp]
        }else if (fw <= n){
          s <- s+x[fw]
        }
        
      }
      vs[j+1] <- s
      j <- j+1
      
    }
    
    k[i] <- sum(vs)
    i<- i+1
    
  }
  
  ## Normalized score (singleton is n)
  
  k <- (1/n)*k
  
  
  return(k)
  
}

uni.loc <- function(x){
  
  n <- length(x)
  loc1 <- 0        ## minimum position of the bin with maximum score
  loc2 <- 0        ## maximum position of the bin with maximum score
  
  ## compute the Location Index vector and the maximum score
  
  v <- uni.locVec(x)
  m <- max(v)
  
  for (j in 1:n){
    if (v[j] == m){

      if (loc1 > 0)
          loc2 <- j
      else{
          loc1 <- j
          loc2 <- j
      }
    }
  }
  
  cl <- c(loc1,loc2)
  
  return (cl)
  
}

uni.homTable <- function(x){
  
  
  df <- data.frame(id = integer(),
                   SA3_code = character(),
                   SA3_name = character(),
                   State_code = character(),
                   region_ID = character(),
                   d1 = integer(),
                   d2 = integer(),
                   d3 = integer(),
                   d4 = integer(),
                   d5 = integer(),
                   d6 = integer(),
                   d7 = integer(),
                   d8 = integer(),
                   d9 = integer(),
                   d10 = integer(),
                   Hom = double(),
                   CI = double(),
                   DI = double(),
                   LI = integer(),
                   CL = character(),
                   stringsAsFactors = FALSE
  )
  
    
  cl_a <- uni.hom_mn(10,4)
  cl_b <- uni.hom_mn(10,5)
  cl_c <- uni.hom_mn(10,6)
   
  
  rows <- nrow(x)
  
  
  for(i in 1:rows){
    pop <- as.numeric(x[i,6:15])
  
    
    
    tot <- sum(pop)
    pdf <- pop/tot
    
       
    df[i,1] <- as.numeric(x[i,1])   ## Sa3 id
    df[i,2] <- as.character(x[i,2]) ## Sa3 code
    df[i,3] <- as.character(x[i,3]) ## SA3 name
    df[i,4] <- as.character(x[i,4]) ## State code
    df[i,5] <- as.character(x[i,5]) ## region_ID
    df[i,6] <- as.numeric(x[i,6])   ## decile 1
    df[i,7] <- as.numeric(x[i,7])   ## decile 2
    df[i,8] <- as.numeric(x[i,8])   ## decile 3
    df[i,9] <- as.numeric(x[i,9])   ## decile 4
    df[i,10] <- as.numeric(x[i,10]) ## decile 5
    df[i,11] <- as.numeric(x[i,11]) ## decile 6
    df[i,12] <- as.numeric(x[i,12]) ## decile 7
    df[i,13] <- as.numeric(x[i,13]) ## decile 8
    df[i,14] <- as.numeric(x[i,14]) ## decile 9
    df[i,15] <- as.numeric(x[i,15]) ## decile 10
        
    
    df[i,16] <- uni.hom(pop)        ## compute the Homogenetiy index
    df[i,17] <- uni.concI(pop)      ## compute the Concentration Index
    div <- uni.div(pdf)             ## compute the Divergence Index
    const <- uni.divConst(10)       ## ratio constant
    df[i,18] <- div/const
    
    loc <- uni.loc(pdf)             ## compute the location Index
       
    
    df[i,19] <- loc[1]              ## Location Index
       
    
    df[i,20] <- uni.hom_class(cl_a,cl_b,cl_c,pdf) ## Homogeneity class
    
    
  }
  
  return (df)
  
}


uni.homTable <- function(x){
  
  # data frame initialization
  
  df <- data.frame(id = integer(),
                   SA3_code = character(),
                   SA3_name = character(),
                   State_code = character(),
                   region_ID = character(),
                   d1 = integer(),
                   d2 = integer(),
                   d3 = integer(),
                   d4 = integer(),
                   d5 = integer(),
                   d6 = integer(),
                   d7 = integer(),
                   d8 = integer(),
                   d9 = integer(),
                   d10 = integer(),
                   Hom = double(),
                   CI = double(),
                   DI = double(),
                   LI = integer(),
                   CL = character(),
                   stringsAsFactors = FALSE
  )
  
  # definition of Homogeneity class lower bound
  
  cl_a <- uni.hom_mn(10,4)
  cl_b <- uni.hom_mn(10,5)
  cl_c <- uni.hom_mn(10,6)
  
  # compute the number of rows
  
  rows <- nrow(x)
  
  
  for(i in 1:rows){
    pop <- as.numeric(x[i,6:15])
    
    ## compute the pdf
    
    tot <- sum(pop)
    pdf <- pop/tot
    
    ## copy the first fifteen columns
    
    df[i,1] <- as.numeric(x[i,1])   ## Sa3 id
    df[i,2] <- as.character(x[i,2]) ## Sa3 code
    df[i,3] <- as.character(x[i,3]) ## SA3 name
    df[i,4] <- as.character(x[i,4]) ## State code
    df[i,5] <- as.character(x[i,5]) ## region_ID
    df[i,6] <- as.numeric(x[i,6])   ## decile 1
    df[i,7] <- as.numeric(x[i,7])   ## decile 2
    df[i,8] <- as.numeric(x[i,8])   ## decile 3
    df[i,9] <- as.numeric(x[i,9])   ## decile 4
    df[i,10] <- as.numeric(x[i,10]) ## decile 5
    df[i,11] <- as.numeric(x[i,11]) ## decile 6
    df[i,12] <- as.numeric(x[i,12]) ## decile 7
    df[i,13] <- as.numeric(x[i,13]) ## decile 8
    df[i,14] <- as.numeric(x[i,14]) ## decile 9
    df[i,15] <- as.numeric(x[i,15]) ## decile 10
    
    ## compute statistical indices
    
    df[i,16] <- uni.hom(pop)        ## compute the Homogenetiy index
    df[i,17] <- uni.concI(pop)      ## compute the Concentration Index
    div <- uni.div(pdf)             ## compute the Divergence Index
    const <- uni.divConst(10)       ## ratio constant
    df[i,18] <- div/const
    
    loc <- uni.loc(pdf)             ## compute the location Index
    
    ## loc is a vector 1 x 2
    ## since we do not have bimodal pdf the two values are equal
    
    df[i,19] <- loc[1]              ## Location Index
    
    ## Homogeneity classifcation
    
    df[i,20] <- uni.hom_class(cl_a,cl_b,cl_c,pdf) ## Homogeneity class
    
    
  }
  
  return (df)
  
}

#####################################################################################
#####################################################################################

setwd(""C:\\FilePath\\WhereYouWantOutput\\")
input <- read.csv("HI_LP_formatted.csv")
x <- data.frame(lapply(input,as.character),stringsAsFactors=FALSE)
t <- uni.homTable(x)
write.csv(t, file = "LP_HI_CL")