df1 <- data.frame(gene = paste0("gene",1:4),
change = rep(c("up","down"),each = 2),
score = c(5,3,-2,-4))
df1
代码语言:r复制## gene change score
## 1 gene1 up 5
## 2 gene2 up 3
## 3 gene3 down -2
## 4 gene4 down -4
代码语言:r复制df1[2,]
代码语言:r复制## gene change score
## 2 gene2 up 3
代码语言:r复制rownames(df1)
代码语言:r复制## [1] "1" "2" "3" "4"
代码语言:r复制class(df1[,2]);df1[,2]
代码语言:r复制## [1] "character"
代码语言:r复制## [1] "up" "up" "down" "down"
代码语言:r复制colnames(df1)
代码语言:r复制## [1] "gene" "change" "score"
代码语言:r复制class(df1[2,])
代码语言:r复制## [1] "data.frame"
代码语言:r复制class(df1[,2])
代码语言:r复制## [1] "character"
代码语言:r复制df1[c(1,3),1:2]
代码语言:r复制## gene change
## 1 gene1 up
## 3 gene3 down
代码语言:r复制df1[,"gene"]
代码语言:r复制## [1] "gene1" "gene2" "gene3" "gene4"
代码语言:r复制df1[,c("gene","change")]
代码语言:r复制## gene change
## 1 gene1 up
## 2 gene2 up
## 3 gene3 down
## 4 gene4 down
代码语言:r复制test1 <- data.frame(name = c("jimmy","nicker","Damon","Sophie"),
blood_type = c("A","B","O","AB"))
test2 <- data.frame(name = c("Damon","jimmy","nicker","tony"),
group = rep(paste0("group",1:2),each = 2),
vision = c(4.2,4.3,4.9,4.5))
test3 <- data.frame(NAME = c("Damon","jimmy","nicker","tony"),
weight = c(140,145,110,138))
test1;test2;test3
代码语言:r复制## name blood_type
## 1 jimmy A
## 2 nicker B
## 3 Damon O
## 4 Sophie AB
代码语言:r复制## name group vision
## 1 Damon group1 4.2
## 2 jimmy group1 4.3
## 3 nicker group2 4.9
## 4 tony group2 4.5
代码语言:r复制## NAME weight
## 1 Damon 140
## 2 jimmy 145
## 3 nicker 110
## 4 tony 138
代码语言:r复制test4 <- merge(test1,test2,by="name");test4
代码语言:r复制## name blood_type group vision
## 1 Damon O group1 4.2
## 2 jimmy A group1 4.3
## 3 nicker B group2 4.9
代码语言:r复制test5 <- merge(test1,test3,by.x="name",by.y="NAME");test5
代码语言:r复制## name blood_type weight
## 1 Damon O 140
## 2 jimmy A 145
## 3 nicker B 110
代码语言:r复制test6 <- merge(test1,test3,by.x="name",by.y="NAME",all=T);test6
代码语言:r复制## name blood_type weight
## 1 Damon O 140
## 2 jimmy A 145
## 3 nicker B 110
## 4 Sophie AB NA
## 5 tony <NA> 138
代码语言:r复制#练习3-1
#table(test$Strand)
#练习2-3
#
m <- matrix(1:9,nrow = 3)
m
代码语言:r复制## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
代码语言:r复制m[1:2,2:3]
代码语言:r复制## [,1] [,2]
## [1,] 4 7
## [2,] 5 8
代码语言:r复制colnames(m) <- c("a","b","c");m
代码语言:r复制## a b c
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
代码语言:r复制t(m)
代码语言:r复制## [,1] [,2] [,3]
## a 1 2 3
## b 4 5 6
## c 7 8 9
代码语言:r复制a1 <- as.data.frame(m);a1
代码语言:r复制## a b c
## 1 1 4 7
## 2 2 5 8
## 3 3 6 9
代码语言:r复制t(a1)
代码语言:r复制## [,1] [,2] [,3]
## a 1 2 3
## b 4 5 6
## c 7 8 9
代码语言:r复制x <- list(m1 = matrix(1:9,nrow = 3),
m2 = matrix(2:9,nrow = 2));x
代码语言:r复制## $m1
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
##
## $m2
## [,1] [,2] [,3] [,4]
## [1,] 2 4 6 8
## [2,] 3 5 7 9
代码语言:r复制scores = c(100,59,73,95,45)
names(scores) = c("jimmy","nicker","Damon","Sophie","tony")
scores
代码语言:r复制## jimmy nicker Damon Sophie tony
## 100 59 73 95 45
代码语言:r复制names(scores)[scores>60]
代码语言:r复制## [1] "jimmy" "Damon" "Sophie"
代码语言:r复制#数据框按照逻辑值取子集
k = df1$score>0;k
代码语言:r复制## [1] TRUE TRUE FALSE FALSE
代码语言:r复制df1[k,]
代码语言:r复制## gene change score
## 1 gene1 up 5
## 2 gene2 up 3
代码语言:r复制df1[,k]
代码语言:r复制## gene change
## 1 gene1 up
## 2 gene2 up
## 3 gene3 down
## 4 gene4 down
代码语言:r复制df1$gene[df1$score>0]
代码语言:r复制## [1] "gene1" "gene2"
R Markdown
*数据框来源 (1) 用代码新建 (2) 用已有数据转换或处理得到 (3) 读取表格文件(存在于R语言之外的文件,只操作数据框而不修改表格) (4) R语言内置数据(heatmap(volcano);iris;letters;LETTERS),并非所有内置数据都是数据框
**代码新建数据框 df1 <- data.frame(gene = paste0("gene",1:4),
change = rep(c("up","down"),each = 2), score = c(5,3,-2,-4))
**读取文件 df2 <- read.csv("gene.csv")
**数据框属性 dim(df1) 1 4 3 nrow(df1) 1 4 ncol(df1) 1 3 rownames(df1) 1 "1" "2" "3" "4" colnames(df1) 1 df1$score 1 5 3 -2 -4 #删掉score,按tab键可自动显示列名 mean(df1%score) 1 0.5
#统计表格中有多少某一列有多少元素:table() table(test$strand)
**数据框取子集
按名字
df1,"gene" "gene1" "gene2" "gene3" "gene4" df1,c("gene","change") (可一次取多列,列名需加"")
按坐标
df12,2 "up" df12, ;df1,2 df1c(1,3),1:2;
##按逻辑值(难点) k = df1$score>0;k 1 T T F F df1k, #结果保留前两行(TURE) ###筛选score>0的基因 k = df1$score>0;k 1 T T F F df1k,1 "gene1" "gene2" df1$genek "gene1" "gene2" df1$genedf1$score>0 "gene1" "gene2"
#中括号中的逗号,代表维度的分割,因此x1,5,会报错,向量不存在第二维度
**数据框修改 #改一个格 df13,3 <- 5
#改一整列 df1$score <- c(12,23,50,2)
#新增一列("$"后使用新列名) df1$p.value <- c(0.01,0.02,0.07,0.05)
#行(列)名的修改 等价于修改向量 rownames(df1) <- c("r1","","","")) colnames(df1)2 <- "CHANGE"
**两个数据框的连接(取交集) merge(test1,test2,by="name") merge(test1,test3,by.x = "name",by.y = "NAME") (当列名不同时)**两个数据框的连接(取合集) #左连接,右连接,取合集,? merge(test1,test3,by.x="name",by.y="NAME",all=T) / merge(test1,test3,by.x="name",by.y="NAME",all.x=T,all.y=T)
*矩阵新建和取子集 m <- matrix(1:9,nrow = 3) #取子集 m2,;m,1;m2,3;m1:2,2:3
*矩阵的转置和转换 colmanes(m) <- c("a","b","c") t(m) #转置 as.data.frame(m) #转换为数据框#此时m数据结构并没有发生改变,仍为矩阵(m = as.data.frame(m))
*列表新建和取子集 x <- list(m1 = matrix(1:9,nrow = 3), m2 = matrix(2:9,nrow = 2)) x[1] / x$m1 #取列表中第一个矩阵m1
*补充:元素的“名字”-names() scores = c(100,59,73,95,45) names(scores) = c("jimmy","nicker","Damon","Sophie","tony") #scores仍为向量 scores"jimmy" scoresc("jimmy","nicker") names(scores)scores>60 #yx>0成立的前提是x与y一一对应
*删除变量 rm(x) #删除一个 rm(df1,m) #删除多个 rm(list = ls()) #删除全部,清空环境 快捷键Ctrl L #清空控制台,清空控制台文字
引用自生信技能树