# ggplot2作图详解5：图层语法和图形组合

## 1 图层的几何和统计类型

### 1.1 几何/统计类型设置函数

library(ggplot2)
ls("package:ggplot2", pattern="^geom_.+")
##  [1] "geom_abline"     "geom_area"       "geom_bar"
##  [4] "geom_bin2d"      "geom_blank"      "geom_boxplot"
##  [7] "geom_contour"    "geom_crossbar"   "geom_density"
## [10] "geom_density2d"  "geom_dotplot"    "geom_errorbar"
## [13] "geom_errorbarh"  "geom_freqpoly"   "geom_hex"
## [16] "geom_histogram"  "geom_hline"      "geom_jitter"
## [19] "geom_line"       "geom_linerange"  "geom_map"
## [22] "geom_path"       "geom_point"      "geom_pointrange"
## [25] "geom_polygon"    "geom_quantile"   "geom_raster"
## [28] "geom_rect"       "geom_ribbon"     "geom_rug"
## [31] "geom_segment"    "geom_smooth"     "geom_step"
## [34] "geom_text"       "geom_tile"       "geom_violin"
## [37] "geom_vline"
ls("package:ggplot2", pattern="^stat_.+")
##  [1] "stat_abline"      "stat_bin"         "stat_bin2d"
##  [4] "stat_bindot"      "stat_binhex"      "stat_boxplot"
##  [7] "stat_contour"     "stat_density"     "stat_density2d"
## [10] "stat_ecdf"        "stat_function"    "stat_hline"
## [13] "stat_identity"    "stat_qq"          "stat_quantile"
## [16] "stat_smooth"      "stat_spoke"       "stat_sum"
## [19] "stat_summary"     "stat_summary2d"   "stat_summary_hex"
## [22] "stat_unique"      "stat_vline"       "stat_ydensity"

# 函数说明，非运行代码
geom_point(mapping = NULL, data = NULL, stat = "identity",
position = "identity", na.rm = FALSE, ...)
stat_identity(mapping = NULL, data = NULL, geom = "point",
position = "identity", width = NULL,
height = NULL, ...)

# 示例，非运行代码
ggplot(数据, 映射)
geom_xxx(映射, 数据)
stat_xxx(映射, 数据)

"点点点"参数是R语言非常特殊的一个数据类型，用在函数的参数用表示任意参数，在这里表示传递给图层的任意参数如color, shape, alpha等。

# 取ggplot2的diamonds数据集的一部分数据：
set.seed(100)
d.sub <- diamonds[sample(nrow(diamonds), 500),]
p <- ggplot(d.sub, aes(x=carat, y=price))
theme_set(theme_bw())
p + stat_identity()
p + geom_point()

(p + stat_identity())$layers ## [[1]] ## geom_point: ## stat_identity: width = NULL, height = NULL ## position_identity: (width = NULL, height = NULL) (p + geom_point())$layers
## [[1]]
## geom_point: na.rm = FALSE
## stat_identity:
## position_identity: (width = NULL, height = NULL)

(p + stat_identity(geom=NULL))$layers ## [[1]] ## geom_point: ## stat_identity: width = NULL, height = NULL ## position_identity: (width = NULL, height = NULL) (p + geom_point(stat=NULL))$layers
## [[1]]
## geom_point: na.rm = FALSE
## stat_identity:
## position_identity: (width = NULL, height = NULL)

### 1.2 图层对象

geom_xxx和stat_xxx可以指定数据，映射、几何类型和统计类型，一般来说，有这些东西我们就可以作图了。但实际情况是这些函数不可以直接出图，因为它不是完整的ggplot对象：

p <- geom_point(mapping=aes(x=carat, y=price), data=d.sub)
class(p)
## [1] "proto"       "environment"
p
## mapping: x = carat, y = price
## geom_point: na.rm = FALSE
## stat_identity:
## position_identity: (width = NULL, height = NULL)

ggplot() + p

p + ggplot()
## NULL
class(aes(x=carat, y=price) + ggplot(d.sub))
## [1] "NULL"
class(ggplot(d.sub) + aes(x=carat, y=price))
## [1] "gg"     "ggplot"

## 2 图层的位置调整参数

• dodge：“避让”方式，即往旁边闪，如柱形图的并排方式就是这种。
• fill：填充方式， 先把数据归一化，再填充到绘图区的顶部。
• identity：原地不动，不调整位置
• jitter：随机抖一抖，让本来重叠的露出点头来
• stack：叠罗汉
p <- ggplot(d.sub, aes(x=cut, y=price, fill=color))
p + geom_bar(stat="summary", fun.y="mean", position="stack")
p + geom_bar(stat="summary", fun.y="mean", position="fill")
p + geom_bar(stat="summary", fun.y="mean", position="dodge")
p + geom_bar(stat="summary", fun.y="mean", position="jitter")

p + geom_point(position="identity")
p + geom_point(position="jitter")

p <- ggplot(d.sub, aes(x=price, fill=cut, color=cut))
p + stat_density(position="stack")
p + stat_density(position="fill")
p + stat_density(position="identity")
p + stat_density(position="identity", fill="transparent")

## 3 图层组合

### 3.1 简单组合

datax <- data.frame(x=1:10, y=rnorm(10)+1:10)
p  <- ggplot(datax, aes(x=x, y=y))
p + geom_point() + geom_line()
p + geom_point() + geom_smooth(method="lm")

ggplot2的图层设置函数对映射的数据类型是有较严格要求的，比如geom_point和geom_line函数要求x映射的数据类型为数值向量，而geom_bar函数要使用因子型数据。如果数据类型不符合映射要求就得做类型转换，在组合图形时还得注意图层的先后顺序：

p  <- ggplot(datax, aes(x=factor(x), y=y)) + xlab("x")
p + geom_bar(stat="identity", fill="gray") +
geom_line(aes(group=1), size=2) + geom_point(color="red")
p + geom_bar(stat="identity", fill="gray") +
geom_smooth(aes(group=1), method="lm", se=FALSE, size=2)

### 3.2 不同映射的组合

p <- ggplot(d.sub, aes(x=carat)) + ylab("depth (blue) / table (red)")
p + geom_point(aes(y=depth), color="blue")  +
geom_point(aes(y=table), color="red")

library(reshape2)
datax <- melt(d.sub, id.vars="carat", measure.vars=c("depth", "table"))
ggplot(datax, aes(x=carat, y=value, color=variable)) + geom_point()

### 3.3 不同类型数据的组合

diamonds数据我们在前面已经了解过了，先看看R datasets包里面的mtcars数据：

data(mtcars, package="datasets")
str(mtcars)
## 'data.frame': 32 obs. of  11 variables:
##  $mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ... ##$ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $disp: num 160 160 108 258 360 ... ##$ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ... ##$ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $qsec: num 16.5 17 18.6 19.4 17 ... ##$ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $am : num 1 1 1 0 0 0 0 0 0 0 ... ##$ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $carb: num 4 4 1 1 2 1 4 2 2 4 ... head(mtcars, 4) ## mpg cyl disp hp drat wt qsec vs am gear carb ## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 ## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 ## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 ## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 好，开始玩点玄乎的： p <- ggplot(data=d.sub, aes(x=carat, y=price, color=cut)) layer1 <- geom_point(aes(x=carb, y=mpg), mtcars, color="black") (p1 <- p + layer1) 图中数据点是正确的，但坐标轴标题却对不上号。看看ggplot对象的数据、映射和图层： head(p1$data, 4)
##       carat       cut color clarity depth table price    x    y    z
## 16601  1.01 Very Good     D     SI1  62.1    59  6630 6.37 6.41 3.97
## 13899  0.90     Ideal     D     SI1  62.4    55  5656 6.15 6.19 3.85
## 29792  0.30     Ideal     D     SI1  61.6    56   709 4.34 4.30 2.66
## 3042   0.30 Very Good     G     VS1  62.0    60   565 4.27 4.31 2.66
p1$mapping ## List of 3 ##$ x     : symbol carat
##  $y : symbol price ##$ colour: symbol cut
p1$layers ## [[1]] ## mapping: x = carb, y = mpg ## geom_point: na.rm = FALSE, colour = black ## stat_identity: ## position_identity: (width = NULL, height = NULL) 数据和映射都还是ggplot原来设置的样子，layer2图层设置的都没有存储到ggplot图形列表对象的data和mapping元素中，而是放在了图层中，但图层中设定的数据不知道跑哪里。 如果再增加一个图层，把坐标轴标题标清楚： layer2 <- geom_point(aes(y=depth)) (p1 <- p1 + layer2 + xlab("carb(black) / carat") + ylab("mpg(black) / depth")) 很有意思。layer2重新指定了y映射，但没碰原来ggplot对象设置的x和color映射，从获得的图形来看y数据改变了，x和color还是原ggplot对象的设置。查看一下映射和图层： p1$mapping
## List of 3
##  $x : symbol carat ##$ y     : symbol price
##  $colour: symbol cut p1$layers
## [[1]]
## mapping: x = carb, y = mpg
## geom_point: na.rm = FALSE, colour = black
## stat_identity:
## position_identity: (width = NULL, height = NULL)
##
## [[2]]
## mapping: y = depth
## geom_point: na.rm = FALSE
## stat_identity:
## position_identity: (width = NULL, height = NULL)

p + geom_point(aes(x=carb, y=mpg), mtcars)
## Error: arguments imply differing number of rows: 32, 0

p + geom_point(aes(x=carb, y=mpg, color=NULL), mtcars)
p + geom_point(aes(x=carb, y=mpg), mtcars, color="red")

## 4 SessionInfo

sessionInfo()
## R version 3.1.0 (2014-04-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
##
## locale:
##  [1] LC_CTYPE=zh_CN.UTF-8       LC_NUMERIC=C
##  [3] LC_TIME=zh_CN.UTF-8        LC_COLLATE=zh_CN.UTF-8
##  [5] LC_MONETARY=zh_CN.UTF-8    LC_MESSAGES=zh_CN.UTF-8
##  [7] LC_PAPER=zh_CN.UTF-8       LC_NAME=C
## [11] LC_MEASUREMENT=zh_CN.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] tcltk     stats     graphics  grDevices utils     datasets  methods
## [8] base
##
## other attached packages:
## [1] reshape2_1.2.2  ggplot2_0.9.3.1 zblog_0.1.0     knitr_1.5
##
## loaded via a namespace (and not attached):
##  [1] colorspace_1.2-4 digest_0.6.4     evaluate_0.5.3   formatR_0.10
##  [5] grid_3.1.0       gtable_0.1.2     highr_0.3        labeling_0.2
##  [9] MASS_7.3-31      munsell_0.4.2    plyr_1.8.1       proto_0.3-10
## [13] Rcpp_0.11.1      scales_0.2.4     stringr_0.6.2    tools_3.1.0

• 本文由 整理发表
• 网站部分文章源自互联网，若未正确标注来源，请联系管理员更新。文章转载，请务必保留本文链接