文章目录
- 1. 使用方法
- 2. 读取物理内存
- 3. 虚拟地址转换为物理地址
- 4. 分页度量进程的代码段
- 5. 知识点记录
- 6. 问题记录
- 7. 参考
之前通过读取/proc/pid/mem的方法读取某个进程的内存数据,mem内部是用copy_from_user实现的,是对虚拟地址进行的操作。但是在某一时刻,该进程的所有内存页不一定都已经被加载到内存。由于虚拟内存的存在,只有那页代码被访问到时(copy_from_user()会判断缺页的情况),才会产生缺页中断,将该页代码加载到内存。这种方式并不够理想,理想的方法是判断哪些数据页已加载到内存中,然后对其进行度量。
在google一番后,发现有三个小程序涉及到的知识可以完成这一任务,第一个是dram.c,用来创建字符设备,这个字符设备将物理内存虚拟为一个dev/dram文件。第二个是fileview.cpp,用来读取dram文件,从而获取物理内存页的数据。第三个是translate.c,用来将虚拟地址转换为物理地址。
这样编写一个内核模块,就可以实现对进程代码段的分页度量了。以下是三个小程序的使用方法、代码注释、内核模块。
使用方法
代码语言:javascript复制cd Access_Physical_Memory
make #编译dram.ko
insmod dram.ko #加载内核模块
mknod /dev/dram c 85 0 #创建字符设备,设备号设置为85
g fileview.cpp -o fileview
./fileview /dev/dram
再输入回车,可以输入物理地址
读取物理内存
字符设备dram.c。
代码语言:javascript复制//-------------------------------------------------------------------
// dram.c
//
// This module implements a Linux character-mode device-driver
// for the processor's installed physical memory. It utilizes
// the kernel's 'kmap()' function, as a uniform way to provide
// access to all the memory-zones (including the "high memory"
// on systems with more than 896MB of installed physical ram).
// The access here is 'read-only' because we deem it too risky
// to the stable functioning of our system to allow every user
// the unrestricted ability to arbitrarily modify memory-areas
// which might contain some "critical" kernel data-structures.
// We implement an 'llseek()' method so that users can readily
// find out how much physical processor-memory is installed.
//
// NOTE: Developed and tested with Linux kernel version 2.6.10
//
// programmer: ALLAN CRUSE
// written on: 30 JAN 2005
// revised on: 28 JAN 2008 -- for Linux kernel version 2.6.22.5
// revised on: 06 FEB 2008 -- for machines having 4GB of memory
//-------------------------------------------------------------------
#include <linux/module.h> // for module_init()
#include <linux/highmem.h> // for kmap(), kunmap()
#include <asm/uaccess.h> // for copy_to_user()
char modname[] = "dram"; // for displaying driver's name
int my_major = 85; // note static major assignment
loff_t dram_size; // total bytes of system memory
/*
#ifdef __GNUC__
typedef long long __kernel_loff_t;
#endif
#if defined(__GNUC__)
typedef __kernel_loff_t loff_t;
#endif
loff_t 是一个long long类型
*/
loff_t my_llseek( struct file *file, loff_t offset, int whence );
ssize_t my_read( struct file *file, char *buf, size_t count, loff_t *pos );
//指定成员赋值,cpp不支持
//该结构体里都是函数指针,llseek用于改变文件的当前读写位置。read用于从设备获取数据。
struct file_operations
my_fops = {
owner: THIS_MODULE,
llseek: my_llseek,
read: my_read,
};
static int __init dram_init( void )
{
printk( "<1>nInstalling '%s' module ", modname );
printk( "(major=%d)n", my_major );
//get_num_physpages()获取 所有物理内存减去内核所保留内存块后的剩余内存(是内存地址数,不是内存页数)。
dram_size = (loff_t)get_num_physpages() << PAGE_SHIFT;
// llX 前补0,域宽8位,大写16进制输出
printk( "<1> ramtop=llX (%llu MB)n", dram_size, dram_size >> 20 );
//register_chrdev注册字符设备,
return register_chrdev( my_major, modname, &my_fops );
}
static void __exit dram_exit( void )
{
//取消字符设备注册
unregister_chrdev( my_major, modname );
printk( "<1>Removing '%s' modulen", modname );
}
/*
file:为进行读取信息的目标文件,
buf:为对应放置信息的缓冲区(即用户空间内存地址);
count:为要读取的信息长度;
pos:为读的位置相对于文件开头的偏移,这里的pos是想读取的物理地址
*/
ssize_t my_read( struct file *file, char *buf, size_t count, loff_t *pos )
{
struct page *pp;
void *from;
int page_number, page_indent, more;
// we cannot read beyond the end-of-file
//如果读取位置超出物理内存尺寸,则退出
if ( *pos >= dram_size ) return 0;
// determine which physical page to temporarily map
// and how far into that page to begin reading from
//根据物理地址计算对应页号
page_number = *pos / PAGE_SIZE;
//计算页内偏移
page_indent = *pos % PAGE_SIZE;
// map the designated physical page into kernel space
//If kerel vesion is 2.6.32 or later, please use pfn_to_page() to get page, and include
// asm-generic/memory_model.h
//这里我的内核是3.16.82,所以改成if 1
#if 1
//根据物理页号获取mem_map数组中相应地址
pp = pfn_to_page( page_number);
#else
pp = &mem_map[ page_number ];
#endif
//kmap在永久内核映射区,创建高端页框(物理页)到内核地址空间(线性地址)的长期映射
from = kmap( pp ) page_indent;
// cannot reliably read beyond the end of this mapped page
//每次只读取不超过一页的数据
if ( page_indent count > PAGE_SIZE ) count = PAGE_SIZE - page_indent;
// now transfer count bytes from mapped page to user-supplied buffer
/*
unsigned long copy_to_user(void *to, const void *from, unsigned long n)
to:目标地址(用户空间)
from:源地址(内核空间)
n:将要拷贝数据的字节数
返回:成功返回0,失败返回没有拷贝成功的数据字节数
*/
more = copy_to_user( buf, from, count );
// ok now to discard the temporary page mapping
//删除之前的映射
kunmap( pp );
// an error occurred if less than count bytes got copied
if ( more ) return -EFAULT;
// otherwise advance file-pointer and report number of bytes read
//往后推进读取位置,返回读取的字节数
*pos = count;
return count;
}
/*
重新定位文件读写偏移量
whence有以下取值:
SEEK_SET 偏移量设置为offset字节。
SEEK_CUR 偏移量设置为当前位置加上offset字节。
SEEK_END 偏移量设置为文件大小加上偏移字节大小。
*/
loff_t my_llseek( struct file *file, loff_t offset, int whence )
{
loff_t newpos = -1;
switch( whence )
{
case 0: newpos = offset; break; // SEEK_SET
case 1: newpos = file->f_pos offset; break; // SEEK_CUR
case 2: newpos = dram_size offset; break; // SEEK_END
}
if (( newpos < 0 )||( newpos > dram_size )) return -EINVAL;
file->f_pos = newpos;
return newpos;
}
MODULE_LICENSE("GPL");
module_init( dram_init );
module_exit( dram_exit );
fileview.cpp文件用来读取dev/dram字符设备,并输出内存页上的数据。
代码语言:javascript复制//----------------------------------------------------------------
// fileview.cpp
//
// This program displays the contents of a specified file
// in hexadecimal and ascii formats (including any device
// special files representing storage media). A user may
// navigate the file's contents using arrow-key commands,
// or may adjust the format of the hexadecimal display to
// select from among five data-sizes: byte (B), word (W),
// doubleword (D), quadword (Q) or octaword (O). It also
// is possible to seek to a specified position within the
// file by hitting the <ENTER>-key and then typing in the
// desired (hexadecimal) address. Type <ESCAPE> to quit.
此程序以十六进制和ascii格式显示指定文件的内容(包括表示存储介质的任何设备专用文件)
用户可以使用箭头键命令浏览文件内容,也可以调整十六进制显示的格式,
以便从五种数据大小中进行选择:字节(B)、字(W)、双字(D)、四字(Q)或八字(O)。
也可以通过按<ENTER>键,然后键入所需的(十六进制)地址,在文件中查找到指定的位置。
键入<ESCAPE>退出。
// compile-and-link using: $ make fileview
//
// programmer: ALLAN CRUSE
// written on: 26 OCT 2002
// revised on: 07 JUN 2006 -- removed reliance on 'ncurses'
//----------------------------------------------------------------
#include <stdio.h> // for printf(), perror(), fflush()
#include <fcntl.h> // for open()
#include <string.h> // for strncpy()
#include <unistd.h> // for read(), lseek64()
#include <stdlib.h> // for exit()
#include <termios.h> // for tcgetattr(), tcsetattr()
#define MAXNAME 80
#define BUFHIGH 16 //用十六进制输出内存数据时的行数为16
#define BUFWIDE 16 //用十六进制输出内存数据时的宽度为16
#define BUFSIZE 256
#define ROW 6
#define COL 2
//键盘按键
#define KB_SEEK 0x0000000A
#define KB_QUIT 0x0000001B
#define KB_BACK 0x0000007F
#define KB_HOME 0x00315B1B
#define KB_LNUP 0x00415B1B
#define KB_PGUP 0x00355B1B
#define KB_LEFT 0x00445B1B
#define KB_RGHT 0x00435B1B
#define KB_LNDN 0x00425B1B
#define KB_PGDN 0x00365B1B
#define KB_END 0x00345B1B
#define KB_DEL 0x00335B1B
char progname[] = "FILEVIEW";
char filename[ MAXNAME 1 ];
char buffer[ BUFSIZE ];
char outline[ 80 ];
// ./fileview /dev/dram
int main( int argc, char *argv[] )
{
// setup the filename (if supplied), else terminate
//此时artv[1]为dev/dram
if ( argc > 1 ) strncpy( filename, argv[1], MAXNAME );
else { fprintf( stderr, "argument neededn" ); exit(1); }
// open the file for reading
//以只读模式打开/dev/dram字符设备
int fd = open( filename, O_RDONLY );
if ( fd < 0 ) { perror( filename ); exit(1); }
// obtain the filesize (if possible)
//lseek64用于大文件内的读写位置跳转(可以设置64位的地址),返回相对于文件首的偏移量
long long filesize = lseek64( fd, 0LL, SEEK_END );
if ( filesize < 0LL )
{
fprintf( stderr, "cannot locate 'end-of-file' n" );
exit(1);
}
long long incmin = ( 1LL << 8 );
long long incmax = ( 1LL << 36 );
long long posmin = 0LL;
long long posmax = (filesize - 241LL)&~0xF;
if ( posmax < posmin ) posmax = posmin;
// initiate noncanonical terminal input
struct termios tty_orig;
//获取终端相关参数,第一个参数是fd,
tcgetattr( STDIN_FILENO, &tty_orig );
struct termios tty_work = tty_orig;
//关闭终端回显和规范模式(规范模式是什么?)
tty_work.c_lflag &= ~( ECHO | ICANON ); // | ISIG );
tty_work.c_cc[ VMIN ] = 1;
tty_work.c_cc[ VTIME ] = 0;
//设置终端的相关参数
tcsetattr( STDIN_FILENO, TCSAFLUSH, &tty_work );
printf( "e[He[J" );
// display the legend
int i, j, k;
k = (77 - strlen( progname ))/2;
//在1行k列位置打印FILEVIEW
printf( "e[%d;%dH %s ", 1, k, progname );
k = (77 - strlen( filename ))/2;
//打印/dev/dram
printf( "e[%d;%dH'%s'", 3, k, filename );
char infomsg[ 80 ];
sprintf( infomsg, "filesize: %llu (=0x3llX)", filesize, filesize );
k = (78 - strlen( infomsg ));
printf( "e[%d;%dH%s", 24, k, infomsg );
fflush( stdout );
// main loop to navigate the file
long long pageincr = incmin;
long long lineincr = 16LL;
long long position = 0LL;
long long location = 0LL;
int format = 1;
int done = 0;
while ( !done ){
// erase prior buffer contents
//清除缓冲区内容,此缓冲区用来临时储存物理内存数据
for (j = 0; j < BUFSIZE; j ) buffer[ j ] = ~0;
// restore 'pageincr' to prescribed bounds
if ( pageincr == 0LL ) pageincr = incmax;
else if ( pageincr < incmin ) pageincr = incmin;
else if ( pageincr > incmax ) pageincr = incmax;
// get current location of file-pointer position
//将读写位置设置为0,并获取当前读写指针的位置
location = lseek64( fd, position, SEEK_SET );
// try to fill 'buffer[]' with data from the file
char *where = buffer;
int to_read = BUFSIZE;
//读取物理内存数据到buffer数组中
while ( to_read > 0 ){
int nbytes = read( fd, where, to_read );
if ( nbytes <= 0 ) break;
to_read -= nbytes;
where = nbytes;
}
int datalen = BUFSIZE - to_read;
// display the data just read into the 'buffer[]' array
unsigned char *bp;
unsigned short *wp;
unsigned int *dp;
unsigned long long *qp;
for (i = 0; i < BUFHIGH; i ){
int linelen;
// draw the line-location (13-digit hexadecimal)
//第一列打印地址到outline
linelen = sprintf( outline, "3llX ", location );
// draw the line in the selected hexadecimal format
switch ( format ){
//以字节为单位读取buffer,然后用大写16进制输出到outline。
case 1: // 'byte' format
bp = (unsigned char*)&buffer[ i*BUFWIDE ];
for (j = 0; j < BUFWIDE; j )
linelen = sprintf( outline linelen,
"X ", bp[j] );
break;
case 2: // 'word' format
wp = (unsigned short*)&buffer[ i*BUFWIDE ];
for (j = 0; j < BUFWIDE/2; j )
linelen = sprintf( outline linelen,
" X ", wp[j] );
break;
case 4: // 'dword' format
dp = (unsigned int*)&buffer[ i*BUFWIDE ];
for (j = 0; j < BUFWIDE/4; j )
linelen = sprintf( outline linelen,
" X ", dp[j] );
break;
case 8: // 'qword' format
qp = (unsigned long long*)&buffer[ i*BUFWIDE ];
for (j = 0; j < BUFWIDE/8; j )
linelen = sprintf( outline linelen,
" 6llX ", qp[j] );
break;
case 16: // 'octaword'
qp = (unsigned long long*)&buffer[ i*BUFWIDE ];
linelen = sprintf( outline linelen, " " );
linelen = sprintf( outline linelen,
" 6llX6llX ", qp[1], qp[0] );
linelen = sprintf( outline linelen, " " );
break;
}
// draw the line in ascii format
//以ascii格式输出数据到outline
for (j = 0; j < BUFWIDE; j ){
char ch = buffer[ i*BUFWIDE j ];
if (( ch < 0x20 )||( ch > 0x7E )) ch = '.';
linelen = sprintf( outline linelen, "%c", ch);
}
// transfer this output-line to the screen
//打印outline
printf( "e[%d;%dH%s", ROW i, COL, outline );
// advance 'location' for the next output-line
location = BUFWIDE;
}
printf( "e[%d;%dH", 23, COL );
fflush( stdout );
// await keypress
long long inch = 0LL;
read( STDIN_FILENO, &inch, sizeof( inch ) );
printf( "e[%d;%dH`s", 23, COL, " " );
// interpret navigation or formatting command
//读取输入的字符
inch &= 0x00FFFFFFLL;
switch ( inch ){
// move to the file's beginning/ending
//移到文件首
case 'H': case 'h':
case KB_HOME: position = posmin; break;
case 'E': case 'e':
case KB_END: position = posmax; break;
// move forward/backward by one line
case KB_LNDN: position = BUFWIDE; break;
case KB_LNUP: position -= BUFWIDE; break;
// move forward/packward by one page
case KB_PGDN: position = pageincr; break;
case KB_PGUP: position -= pageincr; break;
// increase/decrease the page-size increment
case KB_RGHT: pageincr >>= 4; break;
case KB_LEFT: pageincr <<= 4; break;
// reset the hexadecimal output-format
case 'B': case 'b': format = 1; break;
case 'W': case 'w': format = 2; break;
case 'D': case 'd': format = 4; break;
case 'Q': case 'q': format = 8; break;
case 'O': case 'o': format = 16; break;
// seek to a user-specified file-position
case KB_SEEK:
printf( "e[%d;%dHAddress: ", 23, COL );
fflush( stdout );
{
char inbuf[ 16 ] = {0};
//tcsetattr( STDIN_FILENO, TCSANOW, &tty_orig );
int i = 0;
while ( i < 15 ){
long long ch = 0;
read( STDIN_FILENO, &ch, sizeof( ch ) );
ch &= 0xFFFFFF;
if ( ch == 'n' ) break;
if ( ch == KB_QUIT ) { inbuf[0] = 0; break; }
if ( ch == KB_LEFT ) ch = KB_BACK;
if ( ch == KB_DEL ) ch = KB_BACK;
if (( ch == KB_BACK )&&( i > 0 ))
{
inbuf[--i] = 0;
printf( "b b" );
fflush( stdout );
}
if (( ch < 0x20 )||( ch > 0x7E )) continue;
inbuf[ i ] = ch;
printf( "%c", ch );
fflush( stdout );
}
printf( "e[%d;%dHps", 23, COL, " " );
fflush( stdout );
position = strtoull( inbuf, NULL, 16 );
position &= ~0xFLL; // paragraph align
}
break;
// program termination
case KB_QUIT: done = 1; break;
default:
printf( "e[%d;%dHHit <ESC> to quit", 23, 2 );
}
fflush( stdout );
// insure that 'position' remains within bounds
if ( position < posmin ) position = posmin;
if ( position > posmax ) position = posmax;
}
// restore canonical terminal behavior
//复原终端的各项参数
tcsetattr( STDIN_FILENO, TCSAFLUSH, &tty_orig );
printf( "e[%d;%dHe[0Jn", 23, 0 );
}
Makefile文件
代码语言:javascript复制#Makefile
ifneq ($(KERNELRELEASE),)
obj-m := dram.o
else
KDIR := /lib/modules/$(shell uname -r)/build
PWD := $(shell pwd)
default:
$(MAKE) -C $(KDIR) SUBDIRS=$(PWD) modules
rm -r -f .tmp_versions *.mod.c .*.cmd *.o *.symvers
endif
虚拟地址转换为物理地址
由于虚拟内存的存在,一个二进制文件不是整个代码段加载到内存的。一个进程的内存页是否加载到物理内存,系统是有记录的。/proc/$pid/pagemap文件就记录了pid进程的虚拟地址和物理地址的映射情况。
translate.c
代码语言:javascript复制#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <errno.h>
#include <stdint.h>
#include <string.h>
#define PAGEMAP_ENTRY 8
#define GET_BIT(X,Y) (X & ((uint64_t)1<<Y)) >> Y //返回位数组中指定位的值,X:位数组,Y:位置
#define GET_PFN(X) X & 0x7FFFFFFFFFFFFF //获取物理页帧号
const int __endian_bit = 1;
#define is_bigendian() ( (*(char*)&__endian_bit) == 0 )
int i, c, pid, status;
unsigned long virt_addr;
uint64_t read_val, file_offset, page_size;
char path_buf [0x100] = {};
FILE * f;
char *end;
int read_pagemap(char * path_buf, unsigned long virt_addr);
int main(int argc, char ** argv){
if(argc!=3){
printf("Argument number is not correct!n pagemap PID VIRTUAL_ADDRESSn");
return -1;
}
if(!memcmp(argv[1],"self",sizeof("self"))){
sprintf(path_buf, "/proc/self/pagemap");
pid = -1;
}
else{
/*从字符串中解析整数
argv[1]中为字符串
end是一个字符指针,函数解析完long整数后,会将end指向整数之后的第一个字符。end如果为null,则不设置。
10表示按照10进制解析数据,如果是0表示按照其本身进制标记来解析。
*/
pid = strtol(argv[1],&end, 10);
if (end == argv[1] || *end != '