代码语言:javascript复制
#apply函数,沿着数组的某一维度处理数据
#例如将函数用于矩阵的行或列
#与for/while循环的效率相似,但只用一句话可以完成
#apply(参数):apply(数组,维度,函数/函数名)
> x <- matrix(1:16,4,4)
> x
[,1] [,2] [,3] [,4]
[1,] 1 5 9 13
[2,] 2 6 10 14
[3,] 3 7 11 15
[4,] 4 8 12 16
> apply(x,2,mean) #沿着x的第二维度求平均,x有两个维度,行和列,第二个维度就是沿着x的列求平均
[1] 2.5 6.5 10.5 14.5
> apply(x,2,sum) #沿着x的第二维度求和
[1] 10 26 42 58
> apply(x,1,sum)
[1] 28 32 36 40
> apply(x,1,mean)
[1] 7 8 9 10
> rowSums(x) #行的总和
[1] 28 32 36 40
> rowMeans(x) #行的平均值
[1] 7 8 9 10
> colSums(x) #列的总和
[1] 10 26 42 58
> colMeans(x) #列的平均值
[1] 2.5 6.5 10.5 14.5
> x <- matrix(rnorm(100),10,10) #随机从正态分布中取100个数据
> x
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,] -0.6028508 1.4642242 0.04427663 0.2871729 0.04981660 -0.8558895 0.5130530
[2,] -1.9378240 0.2039535 -0.19909385 -0.4309858 0.85004373 0.4976094 -0.5580487
[3,] 1.2487024 0.3279828 -0.61134011 -0.1575374 -0.29225789 0.3887533 0.3905769
[4,] -2.5628573 0.4519969 -0.31849107 -1.4633238 0.46414326 0.3366307 -2.1061818
[5,] -0.2568173 -0.7387934 -0.65190045 -1.5211132 -0.68554516 0.3329140 -1.3744196
[6,] -0.3072326 -1.2575338 0.42412478 -1.3476506 -0.21221874 0.7673182 -0.4560506
[7,] 0.1561480 0.3020903 0.36489259 -0.2507313 1.35735729 -0.2610940 0.5355151
[8,] 0.6536334 0.3717443 -0.77679094 1.0801878 0.07262787 -0.5006976 -2.6058038
[9,] 1.4417755 -1.2989872 1.04908993 0.5010024 -0.41921218 2.2141514 0.3646026
[10,] -1.6978768 -0.9097784 0.01689380 0.6535433 1.55588778 0.4550700 2.5595517
[,8] [,9] [,10]
[1,] -0.57296509 0.1170718 -1.89788063
[2,] 0.06360181 1.3552013 0.83369280
[3,] -0.44550756 0.3857978 0.24664750
[4,] 0.51678695 0.2522804 -0.77862862
[5,] 0.35021885 -0.2767039 -0.37358325
[6,] -0.12660675 -1.4168734 0.86864076
[7,] 0.69927317 0.6202195 -2.31017158
[8,] 1.43228754 1.3257759 0.59362053
[9,] -1.63696656 0.3467712 0.72186091
[10,] -1.02416667 -1.7024939 0.03971799
#解释:
#x赋值函数中的2*3*4分别对应行*列*组(相对应的维度即为1*2*3
#apply(x,c(1,2),mean)中1,2对应的维度为行*列,不需要考虑组,所以对每组相同位置的所有元素相加后求平均,因此输出的结果为2行3列的矩阵
#同理,apply(x,c(1,3),mean)中1,3对应的维度为行*组,所以分别对每组中的行求平均,因此输出的结果为2行4列的矩阵(x中有4个组,每组中有2行)
#同理,(2,3)就代表列*组了~
> apply(x,1,quantile,probs=c(0.25,0.75)) #quantile求数据的百分位点,可通过probs=c()进行分配
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
25% -0.5953794 -0.3730128 -0.2585778 -1.2921500 -0.7254813 -1.0571630 -0.1490114
75% 0.2446476 0.7496719 0.3880144 0.4231553 -0.2617889 0.2864419 0.5990434
[,8] [,9] [,10]
25% -0.3573663 -0.2277163 -0.9955696
75% 0.9735492 0.9672827 0.6039250
> x <- array(rnorm(2*3*4),c(2,3,4))#表示随机从正太分布中抽取出来的24个数据,按照三维排列出来。
> x
, , 1
[,1] [,2] [,3]
[1,] -0.6055074 0.1428984 -0.9020732
[2,] -0.6947868 1.3597884 0.8797562
, , 2
[,1] [,2] [,3]
[1,] -0.3114873 -2.3184400 0.4499677
[2,] 0.1497819 0.1295499 -1.6927436
, , 3
[,1] [,2] [,3]
[1,] 0.9606359 1.3313254 -0.60785734
[2,] 0.7255531 -0.1389708 -0.02877733
, , 4
[,1] [,2] [,3]
[1,] 0.0279858 0.9007448 0.1251860
[2,] 0.5111250 -0.4223850 -0.6083399
> apply(x,c(1,2),mean) #以第1及第2维为基础,沿第3方向压成平面
[,1] [,2] [,3]
[1,] 0.01790675 0.01413214 -0.2336942
[2,] 0.17291831 0.23199563 -0.3625262
> apply(x,c(1,3),mean)
[,1] [,2] [,3] [,4]
[1,] -0.4548941 -0.7266532 0.561368 0.3513056
[2,] 0.5149192 -0.4711373 0.185935 -0.1732000
> apply(x,c(2,3),mean)
[,1] [,2] [,3] [,4]
[1,] -0.6501471 -0.08085267 0.8430945 0.2695554
[2,] 0.7513434 -1.09444509 0.5961773 0.2391799
[3,] -0.0111585 -0.62138791 -0.3183173 -0.2415770