library(readr) # For reading CSV files
library(dplyr) # For data manipulation
library(tidyverse) # For data manipulation
library(sf) # For handling spatial data
library(caret) # For train/test split and preprocessing
library(glmnet) # For Ridge regression

path <- "own_path/dataset/"

df_environment <- read_csv(paste0(path, "RegionalEnvironment.csv"), locale = locale(encoding = "CP932"))
gdf_hausing <- st_read(paste0(path, "Hausing.geojson"))
gdf_polygon <- st_read(paste0(path, "Polygons.geojson"))

get_key_code <- function(point_list, mesh_data) {
  key_code_list <- c()
  for (i in seq_along(point_list)) {
    point <- point_list[i, ]
    key_code <- mesh_data[t(st_contains(mesh_data, point))[[1]], ]$KEY_CODE[1]
    key_code_list <- c(key_code_list, key_code)
  }
  return(key_code_list)
}

gdf_hausing <- st_transform(gdf_hausing, crs = st_crs(3100))
gdf_polygon <- st_transform(gdf_polygon, crs = st_crs(3100))
gdf_hausing$KEY_CODE <- get_key_code(gdf_hausing$geometry, gdf_polygon)

df <- gdf_hausing %>%
  left_join(df_environment, by = "KEY_CODE") %>%
  filter(p == "Z") %>%
  drop_na() %>%
  st_drop_geometry()

df <- cbind(df, model.matrix(~\ - 1, data=df))

features <- c('n', 'Ԍ䗦', '', 'eϗ', 'KXL', 'L', '\LS', '\RC', '\S', '\W', 'OʓH̕', 'w܂ł̋',
              'ЊQ댯', 'ÔgZz', 'wZ܂ł̋', 'wZ܂ł̋', 'x@܂ł̋', 'oX܂ł̋', 'l')

X <- df[, features]
Y <- df$i / 10000

set.seed(0)
split <- createDataPartition(Y, p = .9, list = FALSE)
train_X <- X[split, ]
test_X <- X[-split, ]
train_Y <- Y[split]
test_Y <- Y[-split]

preProcValues <- preProcess(train_X, method = c("center", "scale"))
train_X <- predict(preProcValues, train_X)
test_X <- predict(preProcValues, test_X)

regr <- glmnet(as.matrix(train_X), train_Y, alpha = 0, lambda = 0.5)
print(paste("Intercept :", regr$a0))
print(regr$beta)

pred_Y = predict(regr, as.matrix(test_X))
mse <- mean((test_Y - pred_Y)^2)
r2 <- 1 - sum((test_Y - pred_Y)^2) / 
  sum((test_Y - mean(test_Y))^2)
  
print(paste("MSE :", mse))
print(paste("R2", r2))

plot(test_Y, predict(regr, as.matrix(test_X)))
abline(coef = c(0,1))
