My Website

小徐同学参考了谢益辉大神的blog,也尝试记录自己的生活

factoextra+ggpubr+ggplot2-层次聚类图添加标签及箱线图

Xwyturbo / 2022-11-16


1.数据的预处理

  library(readxl)
  library(readxl)
  library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
  library(bruceR)
## 
## bruceR (version 0.8.9)
## BRoadly Useful Convenient and Efficient R functions
## 
## Packages also loaded:
## √ dplyr     	√ emmeans     	√ ggplot2
## √ tidyr     	√ effectsize  	√ ggtext
## √ stringr   	√ performance 	√ cowplot
## √ forcats   	√ lmerTest    	√ see
## √ data.table
## 
## Main functions of `bruceR`:
## cc()          	Describe() 	TTEST()
## add()         	Freq()     	MANOVA()
## .mean()       	Corr()     	EMMEANS()
## set.wd()      	Alpha()    	PROCESS()
## import()      	EFA()      	model_summary()
## print_table() 	CFA()      	lavaan_summary()
## 
## https://psychbruce.github.io/bruceR/
## 
## These R packages are dependencies of `bruceR` but not installed:
## vars, phia, BayesFactor, GPArotation
## ***** Please Install All Dependencies *****
## install.packages("bruceR", dep=TRUE)
  library(writexl)
  library(ggpubr)
## 
## Attaching package: 'ggpubr'
## 
## The following object is masked from 'package:cowplot':
## 
##     get_legend
  library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:bruceR':
## 
##     export
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
  sui_de_initial <- read_xlsx("D:/wenyu/Rrojects/sui_de/sui_de_1107/sui_de_initial.xlsx")
  sd_data <- read_xlsx("D:/wenyu/Rrojects/sui_de/sui_de_1107/14种土地利用亚类面积.xlsx")
  
  #  计算每个村庄每种地类的占村庄全部用地的比重。
  sd1 <- sd_data/rowSums(sd_data)
  #  计算县域范围内每种地类占全部用地的比重
  sd2 <- colSums(sd_data)/sum(sd_data)
  
  #  按照14个亚类计算土地利用强度。
  sd_data  <- as_tibble(sd_data)
  sd_dataset <- sd_data %>% 
    mutate(lq1 = sd1$科教文卫用地/sd2[1],
           lq2 = sd1$交通运输用地/sd2[2],
           lq3 = sd1$林地/sd2[3],
           lq4 = sd1$园地/sd2[4],
           lq5 = sd1$水域及水利设施/sd2[5],
           lq6 = sd1$旱地/sd2[6],
           lq7 = sd1$草地/sd2[7],
           lq8 = sd1$水浇地/sd2[8],
           lq9 = sd1$设施农用地/sd2[9],
           lq10 = sd1$矿业用地/sd2[10],
           lq11 = sd1$公共服务用地/sd2[11],
           lq12 = sd1$商服用地 /sd2[12],
           lq13 = sd1$工业用地/sd2[13],
           lq14 = sd1$物流仓储用地/sd2[14])
  
  # library(writexl)
  # write_xlsx(sd_dataset,"sd_dataset.xlsx")
  library(readxl)
  library(readxl)
  library(tidyverse)
  library(bruceR)
  library(writexl)
  library(ggpubr)
  library(plotly)

  sd_mianji <- sd_dataset[,1:14]
  sd_mianji <- sd_mianji %>% 
    mutate(村庄 = sui_de_initial$QSDWMC )
  sd_mjlong = sd_mianji %>%
    pivot_longer(-村庄, names_to = "地类", values_to = "面积/公亩")
  str(sd_mjlong)
## tibble [9,310 × 3] (S3: tbl_df/tbl/data.frame)
##  $ 村庄     : chr [1:9310] "踊跃" "踊跃" "踊跃" "踊跃" ...
##  $ 地类     : chr [1:9310] "科教文卫用地" "交通运输用地" "林地" "园地" ...
##  $ 面积/公亩: num [1:9310] 0 0 11010 21856.3 94.9 ...
  p1 <- ggboxplot(sd_mjlong, x="地类", y="面积/公亩",fill ="地类",
                  palette = "aaas",#杂志Science的配色
                  sorting = "descending",                   
                  rotate = TRUE,                    
                  ggtheme = theme_bw())+
    theme(legend.position = "none")+
    ylim(0,20001)
  p1
## Warning: Removed 94 rows containing non-finite values (stat_boxplot).
## Warning: This manual palette can handle a maximum of 10 values. You have
## supplied 14.
  # ggexport(p1,filename = "pics/14种地类规模.png",
  #          width = 4000,height = 2000,
  #          res = 600)

  
  sd_quweishang <- sd_dataset[,15:28]
  names(sd_quweishang) <- c("科教文卫用地","交通运输用地","林地",
                            "园地","水域及水利设施","旱地","草地",
                            "水浇地","设施农用地","矿业用地",
                            "公共服务用地","商服用地",
                            "工业用地","物流仓储用地")
  sd_quweishang <- sd_quweishang %>% 
    mutate(村庄 = sui_de_initial$QSDWMC )
  sd_longer = sd_quweishang %>%
    pivot_longer(-村庄, names_to = "地类", values_to = "区位商")
  
  p2 <- ggboxplot(sd_longer, x="地类", y="区位商",fill ="地类",
                 palette = "aaas",
                 sorting = "descending",                   
                 rotate = TRUE,                    
                 ggtheme = theme_bw())+
    theme(legend.position = "none")+
    geom_hline(yintercept = 1,linetype = 2,
               color = "red")+
    scale_y_continuous(breaks = 1)+
    ylim(0,10)
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
  p2
## Warning: Removed 160 rows containing non-finite values (stat_boxplot).
## This manual palette can handle a maximum of 10 values. You have supplied 14.
  # ggexport(p,filename = "pics/绥德县14种地类区位商.png",
  #          width = 4000,height = 2000,
  #          res = 600)

2.层次聚类图美化

  library(ggdendro)
## Warning: package 'ggdendro' was built under R version 4.2.2
  library(ggplot2)
  library(factoextra)
## Warning: package 'factoextra' was built under R version 4.2.2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
  library(ggpubr)
  library(ggsci)
## 
## Attaching package: 'ggsci'
## The following objects are masked from 'package:see':
## 
##     scale_color_material, scale_colour_material, scale_fill_material
  library(readxl)
  library(tidyverse)
  #  层次聚类,先生成距离矩阵。
  d1 <- dist(sd_dataset[,15:28],"canberra")
  hc7 <- hclust(d1,"ward.D2")
  tree = as.dendrogram(hc7)
  
  c2 <- cutree(hc7,2)
  table(c2)
## c2
##   1   2 
## 244 421
  plot(cut(tree, h=25)$upper, horiz=FALSE)
  rect.hclust(hc7,2)
  c3 <- cutree(hc7,3)
  table(c3)
## c3
##   1   2   3 
## 244 149 272
  plot(cut(tree, h=25)$upper, horiz=FALSE)
  rect.hclust(hc7,3)
  c4 <- cutree(hc7,4)
  table(c4)
## c4
##   1   2   3   4 
## 244 149 193  79
  plot(cut(tree, h=25)$upper, horiz=FALSE)
  rect.hclust(hc7,4)
  c5 <- cutree(hc7,5)
  table(c5)
## c5
##   1   2   3   4   5 
## 108 149 136 193  79
  plot(cut(tree, h=25)$upper, horiz=FALSE)
  rect.hclust(hc7,5)
  #vignette("ggsci")
  pal_aaas(palette = c("default"), alpha = 1)(5)
## [1] "#3B4992FF" "#EE0000FF" "#008B45FF" "#631879FF" "#008280FF"
  pal_npg(palette = c("nrc"), alpha = 1)(8)
## [1] "#E64B35FF" "#4DBBD5FF" "#00A087FF" "#3C5488FF" "#F39B7FFF" "#8491B4FF"
## [7] "#91D1C2FF" "#DC0000FF"
  p52 <- fviz_dend(hc7,k = 2,
                   xlab = "村庄",
                   ylab = "Height",
                   main = "",
                   rect = TRUE,
                   k_colors = c("#3B4992FF","#EE0000FF"),
                   ggtheme = theme_bw())+
    annotate("text", x = 100, y = 32,label = "村庄数:244",colour= "#3B4992FF",size=3)+
    annotate("text", x = 400, y = 48,label = "村庄数:421",colour= "#EE0000FF",size=3) 
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
  p52
  # ggexport(p52,filename = "pics/聚类谱系图2.png",
  #          width = 3400,height = 3000,
  #          res = 600)
  
  
  p53 <- fviz_dend(hc7,k = 3,
                   xlab = "村庄",
                   ylab = "Height",
                   main = "",
                   rect = TRUE,
                   k_colors = c("#3B4992FF","#EE0000FF","#008B45FF"),
                   ggtheme = theme_bw())+
    annotate("text", x = 110, y = 32,label = "村庄数:244",colour= "#3B4992FF",
             size = 3)+
    annotate("text", x = 300, y = 32,label = "村庄数:149",colour= "#EE0000FF",
             size = 3)+
    annotate("text", x = 500, y = 32,label = "村庄数:272",colour= "#008B45FF",
             size = 3)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
  p53
  # ggexport(p53,filename = "pics/聚类谱系图3.png",
  #          width = 3400,height = 3000,
  #          res = 600)
  
  p54 <- fviz_dend(hc7,k = 4,
                   xlab = "村庄",
                   ylab = "Height",
                   main = "",
                   rect = TRUE,
                   k_colors = c("#3B4992FF","#EE0000FF",
                                "#008B45FF","#631879FF"),
                   ggtheme = theme_bw())+
    annotate("text", x = 110, y = 32,label = "村庄数:244",colour= "#3B4992FF",
             size = 3)+
    annotate("text", x = 310, y = 32,label = "村庄数:149",colour= "#EE0000FF",
             size = 3)+
    annotate("text", x = 430, y = 32,label = "村庄数:79",colour= "#008B45FF",
             size = 3)+
    annotate("text", x = 550, y = 28,label = "村庄数:193",colour = "#631879FF",
             size = 3)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
  table(c3)
## c3
##   1   2   3 
## 244 149 272
  table(c5)  
## c5
##   1   2   3   4   5 
## 108 149 136 193  79
  p54
  # ggexport(p54,filename = "pics/聚类谱系图4.png",
  #          width = 3400,height = 3000,
  #          res = 600)
  
  p55 <- fviz_dend(hc7,k = 5,
                   xlab = "村庄",
                   ylab = "Height",
                   main = "",
                   rect = TRUE,
                   k_colors = c("#3B4992FF","#F39B7FFF",
                                "#EE0000FF",
                                "#008B45FF","#631879FF"),
                   ggtheme = theme_bw())+
    annotate("text", x = 60, y = 32,label = "村庄数:136",colour= "#3B4992FF",
             size = 3)+
    annotate("text", x = 190, y = 28,label = "村庄数:108",colour= "#F39B7FFF",
             size = 3)+
    annotate("text", x = 310, y = 32,label = "村庄数:149",colour= "#EE0000FF",
             size = 3)+
    annotate("text", x = 430, y = 32,label = "村庄数:79",colour= "#008B45FF",
             size = 3)+
    annotate("text", x = 550, y = 28,label = "村庄数:193",colour = "#631879FF",
             size = 3)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
  p55
  # ggexport(p55,filename = "pics/聚类谱系图5.png",
  #          width = 3400,height = 3000,
  #          res = 600)

3.箱线图

  library(ggdendro)
  library(ggplot2)
  library(factoextra)
  library(ggpubr)
  library(ggsci)
  library(readxl)
  library(tidyverse)
  sd_dataset_hclust <- read_xlsx("D:/wenyu/Rrojects/sui_de/sui_de_1107/sd_hclust.xlsx")
  table(sd_dataset_hclust$结果_5)
## 
##   1   2   3   4   5 
## 108 149 136 193  79
  sdhc1 <-  sd_dataset_hclust %>% 
    filter(结果_5 == 3) 
  sdhc1_mianji <- sdhc1[,c(1:14,35)]
  sdhc1_quweishang <- sdhc1[,c(15:28,35)]
  str(sdhc1_quweishang)
## tibble [136 × 15] (S3: tbl_df/tbl/data.frame)
##  $ lq1 : num [1:136] 0 0 0 0 0 0 0 0 0 0 ...
##  $ lq2 : num [1:136] 0 0 0 0 0 0 0 0 0 0 ...
##  $ lq3 : num [1:136] 0.868 1.162 1.039 1.06 1.481 ...
##  $ lq4 : num [1:136] 0.503 0.842 0.539 0.421 0.502 ...
##  $ lq5 : num [1:136] 0.3743 0.0823 1.2515 0 0.3604 ...
##  $ lq6 : num [1:136] 0.945 1.382 0.953 1.031 0.883 ...
##  $ lq7 : num [1:136] 1.399 0.906 1.284 1.338 1.205 ...
##  $ lq8 : num [1:136] 0 0 0 0 0 0 0 0 0 0 ...
##  $ lq9 : num [1:136] 0 0 0 0 0 ...
##  $ lq10: num [1:136] 0 0 0 0 0 ...
##  $ lq11: num [1:136] 0 0 0 0 0.543 ...
##  $ lq12: num [1:136] 0 0 0 0 0 0 0 0 0 0 ...
##  $ lq13: num [1:136] 0 0 0 0 0 0 0 0 0 0 ...
##  $ lq14: num [1:136] 0 0 0 0 0 0 0 0 0 0 ...
##  $ 村庄: chr [1:136] "张家塔" "周家沟" "东沟" "红旗沟" ...
  names(sdhc1_quweishang) <- c("科教文卫用地","交通运输用地","林地",
                               "园地","水域及水利设施","旱地","草地",
                               "水浇地","设施农用地","矿业用地",
                               "公共服务用地","商服用地",
                               "工业用地","物流仓储用地","村庄")
  str(sdhc1_quweishang)
## tibble [136 × 15] (S3: tbl_df/tbl/data.frame)
##  $ 科教文卫用地  : num [1:136] 0 0 0 0 0 0 0 0 0 0 ...
##  $ 交通运输用地  : num [1:136] 0 0 0 0 0 0 0 0 0 0 ...
##  $ 林地          : num [1:136] 0.868 1.162 1.039 1.06 1.481 ...
##  $ 园地          : num [1:136] 0.503 0.842 0.539 0.421 0.502 ...
##  $ 水域及水利设施: num [1:136] 0.3743 0.0823 1.2515 0 0.3604 ...
##  $ 旱地          : num [1:136] 0.945 1.382 0.953 1.031 0.883 ...
##  $ 草地          : num [1:136] 1.399 0.906 1.284 1.338 1.205 ...
##  $ 水浇地        : num [1:136] 0 0 0 0 0 0 0 0 0 0 ...
##  $ 设施农用地    : num [1:136] 0 0 0 0 0 ...
##  $ 矿业用地      : num [1:136] 0 0 0 0 0 ...
##  $ 公共服务用地  : num [1:136] 0 0 0 0 0.543 ...
##  $ 商服用地      : num [1:136] 0 0 0 0 0 0 0 0 0 0 ...
##  $ 工业用地      : num [1:136] 0 0 0 0 0 0 0 0 0 0 ...
##  $ 物流仓储用地  : num [1:136] 0 0 0 0 0 0 0 0 0 0 ...
##  $ 村庄          : chr [1:136] "张家塔" "周家沟" "东沟" "红旗沟" ...
  str(sdhc1_mianji)
## tibble [136 × 15] (S3: tbl_df/tbl/data.frame)
##  $ 科教文卫用地  : num [1:136] 0 0 0 0 0 0 0 0 0 0 ...
##  $ 交通运输用地  : num [1:136] 0 0 0 0 0 0 0 0 0 0 ...
##  $ 林地          : num [1:136] 3836 3502 2934 2967 6654 ...
##  $ 园地          : num [1:136] 3043 3474 2087 1613 3093 ...
##  $ 水域及水利设施: num [1:136] 152 22.8 325 0 148.9 ...
##  $ 旱地          : num [1:136] 5815 5803 3750 4021 5529 ...
##  $ 草地          : num [1:136] 16783 7410 9850 10168 14706 ...
##  $ 水浇地        : num [1:136] 0 0 0 0 0 0 0 0 0 0 ...
##  $ 设施农用地    : num [1:136] 0 0 0 0 0 ...
##  $ 矿业用地      : num [1:136] 0 0 0 0 0 ...
##  $ 公共服务用地  : num [1:136] 0 0 0 0 8.68 ...
##  $ 商服用地      : num [1:136] 0 0 0 0 0 0 0 0 0 0 ...
##  $ 工业用地      : num [1:136] 0 0 0 0 0 0 0 0 0 0 ...
##  $ 物流仓储用地  : num [1:136] 0 0 0 0 0 0 0 0 0 0 ...
##  $ 村庄          : chr [1:136] "张家塔" "周家沟" "东沟" "红旗沟" ...
  ## 先把各地类面积的宽表转成长表
  sdhc1_mjlong = sdhc1_mianji %>%
    pivot_longer(-村庄, names_to = "地类", values_to = "面积/公亩")
  str(sdhc1_mjlong)
## tibble [1,904 × 3] (S3: tbl_df/tbl/data.frame)
##  $ 村庄     : chr [1:1904] "张家塔" "张家塔" "张家塔" "张家塔" ...
##  $ 地类     : chr [1:1904] "科教文卫用地" "交通运输用地" "林地" "园地" ...
##  $ 面积/公亩: num [1:1904] 0 0 3836 3043 152 ...
  ## 基于各地类面积长表,绘制箱线图
  p1 <- ggboxplot(sdhc1_mjlong, x="地类", y="面积/公亩",fill ="地类",
                  palette = "aaas",#杂志Science的配色
                  sorting = "descending",                   
                  rotate = TRUE,                    
                  ggtheme = theme_bw())+
    theme(legend.position = "none")+
    ylim(0,20001)
  p1
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).
## Warning: This manual palette can handle a maximum of 10 values. You have
## supplied 14.
  # ggexport(p1,filename = "pics/聚类5对应的136个村庄地类面积.png",
  #          width = 4000,height = 2000,
  #          res = 600)
  
  
  ## 先把各地类区位商的宽表转成长表
  sdhc1_qwsl= sdhc1_quweishang %>%
    pivot_longer(-村庄, names_to = "地类", values_to = "区位商")
  str(sdhc1_qwsl)
## tibble [1,904 × 3] (S3: tbl_df/tbl/data.frame)
##  $ 村庄  : chr [1:1904] "张家塔" "张家塔" "张家塔" "张家塔" ...
##  $ 地类  : chr [1:1904] "科教文卫用地" "交通运输用地" "林地" "园地" ...
##  $ 区位商: num [1:1904] 0 0 0.868 0.503 0.374 ...
  ## 基于各地类面积长表,绘制箱线图
  p1 <- ggboxplot(sdhc1_qwsl, x="地类", y="区位商",fill ="地类",
                  palette = "aaas",
                  sorting = "descending",                   
                  rotate = TRUE,                    
                  ggtheme = theme_bw())+
    theme(legend.position = "none")+
    geom_hline(yintercept = 1,linetype = 2,
               color = "red")+
    scale_y_continuous(breaks = 1)+
    ylim(0,5)
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
  p1
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## This manual palette can handle a maximum of 10 values. You have supplied 14.
  # ggexport(p1,filename = "pics/聚类5对应的136个村庄地类区位商.png",
  #          width = 4000,height = 2000,
  #          res = 600)