背景
- 内核的磁盘文件系统核心是如何组织充分利用物理磁盘文件空间来组织数据的存储,其中的数据存储包括的
file metadata
和file data
.磁盘文件系统包括了核心的数据结构,其中包括了磁盘文件系统的超级块
、inode bitmap
、block bitmap
、block data
。文件系统的超级块
存储了文件系统的元数据,包括文件系统block大小、inode个数
、block个数
等信息。inode bitmap
存储了inode的使用情况;block bitmap
存储block的使用情况;block data
是实际存储数据的空间。
- 接下来的会结合内核磁盘文件系统来实现简易的文件系统,如果需要构建用户态的分布式文件系统的文件组织可以看下其实现的思路,不同点就是一个运行内核态的本地磁盘文件系统;一个是运行于用户态的文件系统。设计文件系统需要考虑数据如何高效的组织和检索、访问。比如文件合并,文件空间管理、如何减少inode Cache的压力、元数据的高效访问等等。
模拟内核文件系统数据结构定义
- 首先需要定义磁盘文件系统的
超级块
,这里的结构定义struct superblock
,这个超级块包含了inodes_num:inode的个数
、blocks_num
多少个bkock、block_size
每个block的个数。这些在mkfs.fs
创建文件系统的时候根据数据结构和给定的物理空间可以计算出这些元数据的大小
代码语言:javascript
复制struct superblock {
int inodes_num;
int blocks_num;
int block_size;
};
- 有了超级快需要知道文件元数据的结构
inode
,在模拟磁盘文件系统实现中也定义定义了struct inode
包括size:文件大小
、first_block:起始block位置
、block_count:block的个数
、name:文件名称
代码语言:javascript
复制struct inode {
int size;
int first_block;
int block_count;
char name[FILE_NAME_MAX_LEN];
};
- 数据块的定义在
struct disk_block
,disk_block
存储了next_block_num:下一个数据块索引
、data:存储数据的区域
代码语言:javascript
复制struct disk_block {
int next_block_num;
char data[BLOCK_PER_SIZE];
};
模拟内核文件系统数据接口实现
- 文件系统的创建
mkfs.xxxfs
的命令就是用来初始化一个文件系统,在模拟磁盘文件系统实现中我们这定义了create_fs
的函数,这个函数的本质是把实现的磁盘文件系统的超级块数据写入到磁盘中。
代码语言:javascript
复制void create_fs()
{
sb.inodes_num = 10;
sb.blocks_num = 40;
sb.block_size = sizeof(struct disk_block);
inodes = (struct inode *)malloc(sizeof(struct inode) * sb.inodes_num);
int i = 0;
for (; i < sb.inodes_num; i )
{
inodes[i].size = -1;
inodes[i].first_block = -1;
snprintf((char *)&inodes[i].name, 256, "%s", "empty");
}
blocks = (struct disk_block *)malloc(sizeof(struct disk_block) * sb.blocks_num);
for (i = 0; i < sb.blocks_num; i )
{
blocks[i].next_block_num = -1;
}
}
- 文件系统挂载是采用
mount
命令,这里模拟过程中定义了mount_fs
函数,这个函数是读取超级块的信息,加载到内存,来完成模拟文件系统的挂载
代码语言:javascript
复制// 文件系统挂载
void mount_fs()
{
int fd = open("/tmp/samplefs_data", O_RDWR, 0755);
assert(fd != -1);
read(fd, &sb, sizeof(struct superblock));
if (inodes == NULL)
{
inodes = (struct inode *)calloc(sb.inodes_num, sizeof(struct inode));
assert(inodes != NULL);
}
read(fd, inodes, sizeof(struct inode) * sb.inodes_num);
if (blocks == NULL)
{
blocks = (struct disk_block *)calloc(sb.blocks_num, sizeof(struct disk_block));
assert(blocks != NULL);
}
read(fd, blocks, sizeof(struct disk_block) * sb.blocks_num);
close(fd);
}
- 文件系统的数据同步在模拟过程中采用了
sync_fs
函数,是要完成同步文件系统的数据和元数据。
代码语言:javascript
复制//文件系统同步
void sync_fs()
{
int fd = open("/tmp/samplefs_data", O_RDWR, 0755);
assert(fd != -1);
int i = 0;
write(fd, &sb, sizeof(struct superblock));
for (; i < sb.inodes_num; i )
{
write(fd, &inodes[i], sizeof(struct inode));
}
for (i = 0; i < sb.blocks_num; i )
{
write(fd, &blocks[i], sizeof(struct disk_block));
}
close(fd);
}
- 模拟过程中定义了
alloc_file
函数来完成inode的申请,这个是遍历inodes来查找空闲的inode,初始化后写入到空闲的block。
代码语言:javascript
复制int alloc_file(char *name)
{
int inode_index = find_empty_inode();
int block_index = find_empty_block();
snprintf((char *)inodes[inode_index].name, 256, "%s", name);
inodes[inode_index].first_block = block_index;
blocks[block_index].next_block_num = -2;
return inode_index;
}
- 模拟了文件系统写入的函数
write_file
,这个过程过程是先找到对应的inode,然后找到空闲的数据块写入数据
代码语言:javascript
复制int write_file(char *src_name, char *dst_name)
{
int src_fd = open(src_name, O_RDONLY, 0644);
assert(src_fd != -1);
struct stat st;
stat(src_name, &st);
int in = -1;
size_t dst_name_sz = strlen(dst_name);
for (int i = 0; i < sb.inodes_num; i )
{
if (strncmp(inodes[i].name, dst_name, dst_name_sz) == 0)
{
in = i;
break;
}
}
if (in < 0)
{
return in;
}
int block_count = st.st_size / BLOCK_PER_SIZE;
if (st.st_size % BLOCK_PER_SIZE != 0)
{
block_count ;
}
int blk_index = -1;
for (int i = 0; i < sb.blocks_num; i )
{
if (blocks[i].next_block_num == -1)
{
blk_index = i;
break;
}
}
if (blk_index < 0)
{
inodes[in].first_block = -1;
return -1;
}
inodes[in].size=st.st_size;
inodes[in].first_block = blk_index;
inodes[in].block_count = block_count;
size_t ret = 0;
for (int i = 0; i < block_count; i )
{
blocks[blk_index i].next_block_num =-2;
size_t r_sz = read(src_fd, (char *)&blocks[blk_index i].data, BLOCK_PER_SIZE);
ret = r_sz;
}
return ret;
}
模拟内核文件系统代码
代码语言:javascript
复制/*************************************************************************
> File Name: main.c
> Author:perrynzhou
> Mail:perrynzhou@gmail.com
> Created Time: Wed 23 Mar 2022 05:06:43 AM UTC
************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "sample_fs.h"
int main(int argc, char *argv[])
{
if (argc != 2)
{
fprintf(stdout,"usage:%s {1:init fs} {2:mount fs} {3:write a file}n",argv[0]);
return -1;
}
int flag = atoi(argv[1]);
switch (flag)
{
case 0:
create_fs();
sync_fs();
fprintf(stdout, "done init samplefsn");
break;
case 1:
mount_fs();
alloc_file("perrynzhou");
sync_fs();
print_fs();
break;
case 2:
mount_fs();
alloc_file("my_data.txt");
write_file("/tmp/a.txt", "my_data.txt");
sync_fs();
print_fs();
break;
default:
fprintf(stdout, "unknow commandn");
break;
}
}
代码语言:javascript
复制/*************************************************************************
> File Name: sample_fs.h
> Author:perrynzhou
> Mail:perrynzhou@gmail.com
> Created Time: Wed 23 Mar 2022 04:43:23 AM UTC
************************************************************************/
#ifndef _SAMPLE_FS_H
#define _SAMPLE_FS_H
#define FILE_NAME_MAX_LEN 255
#define BLOCK_PER_SIZE 512
// samplefs文件系统的元数据信息
// 包括 inode
struct superblock {
int inodes_num;
int blocks_num;
int block_size;
};
struct inode {
int size;
int first_block;
int block_count;
char name[FILE_NAME_MAX_LEN];
};
struct disk_block {
int next_block_num;
char data[BLOCK_PER_SIZE];
};
// 文件系统创建
void create_fs();
// 文件系统挂载
void mount_fs();
//文件系统同步
void sync_fs();
void print_fs();
int alloc_file(char *name);
int write_file(char *src_name,char *dst_name);
int read_file(char *name);
#endif
代码语言:javascript
复制/*************************************************************************
> File Name: sample_fs.c
> Author:perrynzhou
> Mail:perrynzhou@gmail.com
> Created Time: Wed 23 Mar 2022 04:48:32 AM UTC
************************************************************************/
#include "sample_fs.h"
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <assert.h>
#include <errno.h>
struct superblock sb;
struct inode *inodes;
struct disk_block *blocks;
// 文件系统创建
void create_fs()
{
sb.inodes_num = 10;
sb.blocks_num = 40;
sb.block_size = sizeof(struct disk_block);
inodes = (struct inode *)malloc(sizeof(struct inode) * sb.inodes_num);
int i = 0;
for (; i < sb.inodes_num; i )
{
inodes[i].size = -1;
inodes[i].first_block = -1;
snprintf((char *)&inodes[i].name, 256, "%s", "empty");
}
blocks = (struct disk_block *)malloc(sizeof(struct disk_block) * sb.blocks_num);
for (i = 0; i < sb.blocks_num; i )
{
blocks[i].next_block_num = -1;
}
}
void print_fs()
{
fprintf(stdout, "super_block:n");
fprintf(stdout, "t inodes_num:%dn", sb.inodes_num);
fprintf(stdout, "t blocks_num:%dn", sb.blocks_num);
fprintf(stdout, "t block_size:%dn", sb.block_size);
fprintf(stdout, "inodes:n");
int i;
for (i = 0; i < sb.inodes_num; i )
{
fprintf(stdout, "t inode:%d,size:%d first_block:%d name:%sn",i, inodes[i].size, inodes[i].first_block, (char *)&inodes[i].name);
}
fprintf(stdout, "blocks:n");
for (i = 0; i < sb.blocks_num; i )
{
fprintf(stdout, "t block num:%d next_block_num:%d n", i, blocks[i].next_block_num);
}
}
// 文件系统挂载
void mount_fs()
{
int fd = open("/tmp/samplefs_data", O_RDWR, 0755);
assert(fd != -1);
read(fd, &sb, sizeof(struct superblock));
if (inodes == NULL)
{
inodes = (struct inode *)calloc(sb.inodes_num, sizeof(struct inode));
assert(inodes != NULL);
}
read(fd, inodes, sizeof(struct inode) * sb.inodes_num);
if (blocks == NULL)
{
blocks = (struct disk_block *)calloc(sb.blocks_num, sizeof(struct disk_block));
assert(blocks != NULL);
}
read(fd, blocks, sizeof(struct disk_block) * sb.blocks_num);
close(fd);
}
//文件系统同步
void sync_fs()
{
int fd = open("/tmp/samplefs_data", O_RDWR, 0755);
assert(fd != -1);
int i = 0;
write(fd, &sb, sizeof(struct superblock));
for (; i < sb.inodes_num; i )
{
write(fd, &inodes[i], sizeof(struct inode));
}
for (i = 0; i < sb.blocks_num; i )
{
write(fd, &blocks[i], sizeof(struct disk_block));
}
close(fd);
}
int find_empty_inode()
{
int i;
for (i = 0; i < sb.inodes_num; i )
{
if (inodes[i].first_block ==-1)
{
return i;
}
}
return -1;
}
int find_empty_block()
{
int i;
for (i = 0; i < sb.blocks_num; i )
{
if (blocks[i].next_block_num == -1)
{
return i;
}
}
return -1;
}
int alloc_file(char *name)
{
int inode_index = find_empty_inode();
int block_index = find_empty_block();
snprintf((char *)inodes[inode_index].name, 256, "%s", name);
inodes[inode_index].first_block = block_index;
blocks[block_index].next_block_num = -2;
return inode_index;
}
int write_file(char *src_name, char *dst_name)
{
int src_fd = open(src_name, O_RDONLY, 0644);
assert(src_fd != -1);
struct stat st;
stat(src_name, &st);
int in = -1;
size_t dst_name_sz = strlen(dst_name);
for (int i = 0; i < sb.inodes_num; i )
{
if (strncmp(inodes[i].name, dst_name, dst_name_sz) == 0)
{
in = i;
break;
}
}
if (in < 0)
{
return in;
}
int block_count = st.st_size / BLOCK_PER_SIZE;
if (st.st_size % BLOCK_PER_SIZE != 0)
{
block_count ;
}
int blk_index = -1;
for (int i = 0; i < sb.blocks_num; i )
{
if (blocks[i].next_block_num == -1)
{
blk_index = i;
break;
}
}
if (blk_index < 0)
{
inodes[in].first_block = -1;
return -1;
}
inodes[in].size=st.st_size;
inodes[in].first_block = blk_index;
inodes[in].block_count = block_count;
size_t ret = 0;
for (int i = 0; i < block_count; i )
{
blocks[blk_index i].next_block_num =-2;
size_t r_sz = read(src_fd, (char *)&blocks[blk_index i].data, BLOCK_PER_SIZE);
ret = r_sz;
}
return ret;
}
代码语言:javascript
复制all: test
test: main.o sample_fs.o sample_fs.h
gcc -g -O0 -o test main.o sample_fs.o
main.o: main.c sample_fs.h
gcc -g -O0 -c main.c
sample_fs.o: sample_fs.c
gcc -g -O0 -c sample_fs.c
clean:
rm -rf *.o
rm -rf test*
模拟内核文件系统运行
代码语言:javascript
复制[perrynzhou@ubuntu-dev ~/source/perrynzhou/sample-fs/src]$ ls
main.c Makefile sample_fs.c sample_fs.h
[perrynzhou@ubuntu-dev ~/source/perrynzhou/sample-fs/src]$ make
gcc -g -O0 -c main.c
gcc -g -O0 -c sample_fs.c
gcc -g -O0 -o test main.o sample_fs.o
// 初始化文件系统
[perrynzhou@ubuntu-dev ~/source/perrynzhou/sample-fs/src]$ ./test 0
done init samplefs
// 申请一个名称为perrynzhou的inode
[perrynzhou@ubuntu-dev ~/source/perrynzhou/sample-fs/src]$ ./test 1
super_block:
inodes_num:10
blocks_num:40
block_size:516
inodes:
inode:0,size:-1 first_block:0 name:perrynzhou
inode:1,size:-1 first_block:-1 name:empty
inode:2,size:-1 first_block:-1 name:empty
inode:3,size:-1 first_block:-1 name:empty
inode:4,size:-1 first_block:-1 name:empty
inode:5,size:-1 first_block:-1 name:empty
inode:6,size:-1 first_block:-1 name:empty
inode:7,size:-1 first_block:-1 name:empty
inode:8,size:-1 first_block:-1 name:empty
inode:9,size:-1 first_block:-1 name:empty
blocks:
block num:0 next_block_num:-2
block num:1 next_block_num:-1
block num:2 next_block_num:-1
block num:3 next_block_num:-1
block num:4 next_block_num:-1
block num:5 next_block_num:-1
block num:6 next_block_num:-1
block num:7 next_block_num:-1
block num:8 next_block_num:-1
block num:9 next_block_num:-1
block num:10 next_block_num:-1
block num:11 next_block_num:-1
block num:12 next_block_num:-1
block num:13 next_block_num:-1
block num:14 next_block_num:-1
block num:15 next_block_num:-1
block num:16 next_block_num:-1
block num:17 next_block_num:-1
block num:18 next_block_num:-1
block num:19 next_block_num:-1
block num:20 next_block_num:-1
block num:21 next_block_num:-1
block num:22 next_block_num:-1
block num:23 next_block_num:-1
block num:24 next_block_num:-1
block num:25 next_block_num:-1
block num:26 next_block_num:-1
block num:27 next_block_num:-1
block num:28 next_block_num:-1
block num:29 next_block_num:-1
block num:30 next_block_num:-1
block num:31 next_block_num:-1
block num:32 next_block_num:-1
block num:33 next_block_num:-1
block num:34 next_block_num:-1
block num:35 next_block_num:-1
block num:36 next_block_num:-1
block num:37 next_block_num:-1
block num:38 next_block_num:-1
block num:39 next_block_num:-1
// 申请my_data.txt的inode,并且写入数据
[perrynzhou@ubuntu-dev ~/source/perrynzhou/sample-fs/src]$ ./test 2
super_block:
inodes_num:10
blocks_num:40
block_size:516
inodes:
inode:0,size:-1 first_block:0 name:perrynzhou
inode:1,size:12 first_block:2 name:my_data.txt
inode:2,size:-1 first_block:-1 name:empty
inode:3,size:-1 first_block:-1 name:empty
inode:4,size:-1 first_block:-1 name:empty
inode:5,size:-1 first_block:-1 name:empty
inode:6,size:-1 first_block:-1 name:empty
inode:7,size:-1 first_block:-1 name:empty
inode:8,size:-1 first_block:-1 name:empty
inode:9,size:-1 first_block:-1 name:empty
blocks:
// 这里使用了3个block,block 0和block 1 已经存储了perrynzhou和my_data.txt的inode元数据,block 2存储了my_data.txt的数据
block num:0 next_block_num:-2
block num:1 next_block_num:-2
block num:2 next_block_num:-2
block num:3 next_block_num:-1
block num:4 next_block_num:-1
block num:5 next_block_num:-1
block num:6 next_block_num:-1
block num:7 next_block_num:-1
block num:8 next_block_num:-1
block num:9 next_block_num:-1
block num:10 next_block_num:-1
block num:11 next_block_num:-1
block num:12 next_block_num:-1
block num:13 next_block_num:-1
block num:14 next_block_num:-1
block num:15 next_block_num:-1
block num:16 next_block_num:-1
block num:17 next_block_num:-1
block num:18 next_block_num:-1
block num:19 next_block_num:-1
block num:20 next_block_num:-1
block num:21 next_block_num:-1
block num:22 next_block_num:-1
block num:23 next_block_num:-1
block num:24 next_block_num:-1
block num:25 next_block_num:-1
block num:26 next_block_num:-1
block num:27 next_block_num:-1
block num:28 next_block_num:-1
block num:29 next_block_num:-1
block num:30 next_block_num:-1
block num:31 next_block_num:-1
block num:32 next_block_num:-1
block num:33 next_block_num:-1
block num:34 next_block_num:-1
block num:35 next_block_num:-1
block num:36 next_block_num:-1
block num:37 next_block_num:-1
block num:38 next_block_num:-1
block num:39 next_block_num:-1