一、背景
这两天在努力记单词,想着应该把最常使用的单词先记下来,从网上找了几篇文章之后分析了一批词汇,效果还算不错;
接着又想到了代码,也好奇开发者最常使用的单词或函数有哪些,我统计了三种类型:系统函数、变量名、自定义函数名等统计,感兴趣的朋友可以将正则设置为自己需要统计的规则。
二、参考代码
代码语言:javascript复制<?php
function scanMyDir($path)
{
$path = rtrim($path, '/');
// 打开目录
$dh = opendir($path);
// 循环读取目录
while (($file = readdir($dh)) !== false) {
// 先要过滤掉当前目录'.'和上一级目录'..'
if ($file == '.' || $file == '..' || $file == '.git') continue;
if (strpos($file, ".php") > 1) {
// 为了能够显示中文目录/文件,需要进行转码
$_SERVER['fileList'][] = $path . '/' . iconv('gbk', 'utf-8', $file);
}
// 如果该文件仍然是一个目录,进入递归
if (is_dir($path . '/' . $file)) {
scanMyDir($path . '/' . $file);
}
}
}
function explodeCount($fileName, $pregRule)
{
$text = file_get_contents($fileName);
preg_match_all($pregRule, $text, $keywordArr);
foreach ($keywordArr[1] as $keyword) {
$_SERVER['count'][$keyword] = $_SERVER['count'][$keyword] ?? 0;
$_SERVER['count'][$keyword] = 1;
}
}
function start($path, $pregRule)
{
scanMyDir($path);
//把要统计的文件写入到当前文件夹中,方便查看统计了那些文件
file_put_contents("fileList.txt", implode("n", $_SERVER['fileList']));
//从上一个文件中读取要统计的文件列表,
$fileList = file_get_contents("fileList.txt");
$fileList = explode("n", $fileList);
//遍历统计每个文件中的词汇
foreach ($fileList as $fileName) {
explodeCount($fileName, $pregRule);
}
//将结果写入到文件当中之前,先做好排序
arsort($_SERVER['count']);
//只需要前100个
$_SERVER['count'] = array_slice($_SERVER['count'], 0, 100);
//将结果写入到文件中去
$sumResult = var_export($_SERVER['count'], true);
file_put_contents("countResult.txt", $sumResult);
}
//要统计的代码目录
$path = '/root/mycode/work/offcn-live/vendor';
//$pregRule = '/ ([a-z] _?[a-z] )(/'; //系统函数规则
//$pregRule = '/($[a-zA-Z] _?[a-zA-Z] )/'; //变量名规则
$pregRule = '/[->:] ([a-z] _?[a-z] )(/'; //自定义函数名规则
start($path, $pregRule);
三、常用函数
代码语言:javascript复制array (
'array' => 6126,
'isset' => 1345,
'substr' => 845,
'sprintf' => 737,
'strlen' => 650,
'count' => 598,
'unset' => 556,
'array_merge' => 449,
'list' => 413,
'strpos' => 408,
'str_replace' => 393,
'implode' => 348,
'explode' => 333,
'is_array' => 332,
'static' => 297,
'trim' => 263,
'declare' => 238,
'mock' => 237,
'pack' => 232,
'preg_match' => 222,
'is_null' => 210,
'get_class' => 203,
'array_map' => 195,
'self' => 191,
'strtolower' => 190,
'empty' => 183,
'preg_replace' => 180,
'chr' => 169,
'function_exists' => 163,
'user_error' => 161,
'handle' => 158,
'is_string' => 155,
'is_object' => 140,
'str_repeat' => 139,
'array_keys' => 138,
'rewind' => 137,
'in_array' => 133,
'write' => 132,
'mt_rand' => 132,
'array_values' => 129,
'time' => 125,
'not' => 124,
'array_shift' => 124,
'extract' => 120,
'getenv' => 115,
'reset' => 113,
'execute' => 112,
'printf' => 110,
'fopen' => 108,
'get' => 105,
'collect' => 100,
'current' => 100,
'fclose' => 99,
'unpack' => 96,
'strval' => 96,
'matches' => 92,
'rtrim' => 90,
'str_pad' => 88,
'json_encode' => 88,
'array_filter' => 88,
'array_pop' => 85,
'app' => 84,
'range' => 84,
'dirname' => 83,
'define' => 81,
'microtime' => 80,
'foo' => 80,
'create' => 80,
'ord' => 80,
'compact' => 79,
'read' => 77,
'method_exists' => 76,
'register' => 75,
'realpath' => 74,
'intval' => 73,
'bar' => 73,
'strtotime' => 73,
'fread' => 72,
'class_exists' => 72,
'print' => 72,
'max' => 72,
'curl_setopt' => 70,
'fwrite' => 69,
'tap' => 66,
'strtoupper' => 65,
'array_unshift' => 65,
'serialize' => 64,
'ob_start' => 64,
'unserialize' => 63,
'strrpos' => 61,
'key' => 61,
'preg_split' => 61,
'ini_get' => 61,
'add' => 59,
'close' => 59,
'array_slice' => 58,
'putenv' => 57,
'eval' => 57,
'gettype' => 56,
'var_export' => 56,
)
四、常用变量名
代码语言:javascript复制array (
'$this' => 75572,
'$value' => 6303,
'$options' => 4731,
'$key' => 4597,
'$name' => 4367,
'$vendorDir' => 4310,
'$message' => 4115,
'$request' => 3453,
'$stackPos' => 3237,
'$response' => 2796,
'$result' => 2577,
'$data' => 2308,
'$path' => 2117,
'$node' => 1733,
'$type' => 1650,
'$method' => 1620,
'$file' => 1449,
'$arguments' => 1415,
'$class' => 1408,
'$callback' => 1378,
'$output' => 1364,
'$command' => 1314,
'$parameters' => 1273,
'$config' => 1252,
'$expected' => 1197,
'$column' => 1153,
'$input' => 1140,
'$id' => 1119,
'$headers' => 1083,
'$event' => 1083,
'$args' => 986,
'$attributes' => 979,
'$length' => 961,
'$code' => 950,
'$query' => 947,
'$prefix' => 947,
'$mock' => 930,
'$token' => 925,
'$context' => 909,
'$test' => 892,
'$temp' => 884,
'$header' => 871,
'$matches' => 847,
'$object' => 825,
'$string' => 813,
'$container' => 810,
'$server' => 810,
'$stream' => 768,
'$collection' => 768,
'$route' => 761,
'$values' => 761,
'$record' => 748,
'$exception' => 748,
'$actual' => 719,
'$connection' => 712,
'$item' => 697,
'$constraint' => 670,
'$operation' => 666,
'$date' => 655,
'$bucket' => 648,
'$array' => 644,
'$line' => 643,
'$count' => 641,
'$uri' => 622,
'$buf' => 618,
'$handler' => 608,
'$default' => 598,
'$table' => 594,
'$content' => 578,
'$reader' => 558,
'$resource' => 549,
'$application' => 549,
'$tokens' => 541,
'$locale' => 539,
'$attribute' => 531,
'$format' => 518,
'$filename' => 510,
'$className' => 509,
'$str' => 505,
'$parts' => 505,
'$matcher' => 499,
'$text' => 498,
'$queue' => 483,
'$generator' => 480,
'$filter' => 476,
'$client' => 475,
'$level' => 468,
'$domain' => 467,
'$writer' => 464,
'$argument' => 460,
'$number' => 459,
'$option' => 452,
'$payload' => 448,
'$keys' => 445,
'$process' => 444,
'$translator' => 437,
'$app' => 435,
'$listener' => 430,
'$files' => 429,
'$index' => 422,
)
五、常用自定义函数
代码语言:javascript复制array (
'once' => 1292,
'with' => 1105,
'get' => 997,
'expects' => 700,
'method' => 651,
'set' => 612,
'create' => 600,
'add' => 588,
'foo' => 464,
'format' => 434,
'write' => 429,
'execute' => 421,
'all' => 378,
'evaluate' => 344,
'has' => 320,
'register' => 318,
'fail' => 294,
'find' => 286,
'run' => 284,
'any' => 280,
'start' => 254,
'parse' => 233,
'load' => 203,
'make' => 200,
'read' => 191,
'generate' => 185,
'factory' => 182,
'close' => 164,
'current' => 155,
'render' => 152,
'ask' => 149,
'numerify' => 146,
'will' => 145,
'where' => 137,
'singleton' => 133,
'writeln' => 128,
'valid' => 125,
'next' => 124,
'main' => 122,
'send' => 121,
'trans' => 117,
'request' => 116,
'option' => 113,
'handle' => 112,
'matches' => 111,
'match' => 110,
'contains' => 103,
'write_shortstr' => 103,
'process' => 101,
'never' => 100,
'at' => 99,
'initialize' => 97,
'rewind' => 96,
'bind' => 92,
'validate' => 90,
'dispatch' => 88,
'filter' => 86,
'in' => 86,
'copy' => 85,
'verify' => 84,
'delete' => 83,
'wrap' => 81,
'put' => 79,
'stop' => 78,
'mock' => 78,
'dump' => 78,
'supports' => 78,
'observe' => 77,
'encrypt' => 77,
'attach' => 75,
'first' => 74,
'apply' => 73,
'remove' => 72,
'invoke' => 72,
'connection' => 71,
'advance' => 69,
'decrypt' => 69,
'ordered' => 69,
'save' => 68,
'resolve' => 68,
'prepare' => 67,
'println' => 67,
'auth' => 65,
'reset' => 65,
'bar' => 64,
'write_short' => 64,
'call' => 63,
'map' => 63,
'compare' => 63,
'string' => 62,
'log' => 62,
'wait' => 61,
'info' => 61,
'update' => 60,
'escape' => 60,
'lookup' => 58,
'write_bits' => 57,
'count' => 57,
'push' => 56,
'times' => 55,
)
后面的数字,代表为在代码中出现的次数,我用的四项目的vendor目录,里面都是一些比较常用的开源代码库,所以应该算是比较有参考价值
作者:汤青松 日期: 2020-06-30