模拟内核实现简易磁盘文件系统实现

2022-08-17 12:56:31 浏览数 (1)

背景

  • 内核的磁盘文件系统核心是如何组织充分利用物理磁盘文件空间来组织数据的存储,其中的数据存储包括的file metadatafile data.磁盘文件系统包括了核心的数据结构,其中包括了磁盘文件系统的超级块inode bitmapblock bitmapblock data文件系统的超级块存储了文件系统的元数据,包括文件系统block大小、inode个数block个数等信息。inode bitmap存储了inode的使用情况;block bitmap存储block的使用情况;block data是实际存储数据的空间。
  • 接下来的会结合内核磁盘文件系统来实现简易的文件系统,如果需要构建用户态的分布式文件系统的文件组织可以看下其实现的思路,不同点就是一个运行内核态的本地磁盘文件系统;一个是运行于用户态的文件系统。设计文件系统需要考虑数据如何高效的组织和检索、访问。比如文件合并,文件空间管理、如何减少inode Cache的压力、元数据的高效访问等等。

模拟内核文件系统数据结构定义

  • 首先需要定义磁盘文件系统的超级块,这里的结构定义struct superblock,这个超级块包含了inodes_num:inode的个数blocks_num多少个bkock、block_size每个block的个数。这些在mkfs.fs创建文件系统的时候根据数据结构和给定的物理空间可以计算出这些元数据的大小
代码语言:javascript复制
struct superblock {
  int inodes_num;
  int blocks_num;
  int block_size;
};
  • 有了超级快需要知道文件元数据的结构inode,在模拟磁盘文件系统实现中也定义定义了struct inode包括size:文件大小first_block:起始block位置block_count:block的个数name:文件名称
代码语言:javascript复制
struct inode   {
  int size;
  int first_block;
  int block_count;
  char name[FILE_NAME_MAX_LEN];
};
  • 数据块的定义在struct disk_blockdisk_block存储了next_block_num:下一个数据块索引data:存储数据的区域
代码语言:javascript复制
struct disk_block {
  int next_block_num;
  char data[BLOCK_PER_SIZE];
};

模拟内核文件系统数据接口实现

  • 文件系统的创建mkfs.xxxfs的命令就是用来初始化一个文件系统,在模拟磁盘文件系统实现中我们这定义了create_fs的函数,这个函数的本质是把实现的磁盘文件系统的超级块数据写入到磁盘中。
代码语言:javascript复制
void create_fs()
{
  sb.inodes_num = 10;
  sb.blocks_num = 40;
  sb.block_size = sizeof(struct disk_block);
  inodes = (struct inode *)malloc(sizeof(struct inode) * sb.inodes_num);
  int i = 0;
  for (; i < sb.inodes_num; i  )
  {
    inodes[i].size = -1;
    inodes[i].first_block = -1;
    snprintf((char *)&inodes[i].name, 256, "%s", "empty");
  }
  blocks = (struct disk_block *)malloc(sizeof(struct disk_block) * sb.blocks_num);
  for (i = 0; i < sb.blocks_num; i  )
  {
    blocks[i].next_block_num = -1;
  }
}
  • 文件系统挂载是采用mount命令,这里模拟过程中定义了mount_fs函数,这个函数是读取超级块的信息,加载到内存,来完成模拟文件系统的挂载
代码语言:javascript复制
// 文件系统挂载
void mount_fs()
{
  int fd = open("/tmp/samplefs_data", O_RDWR, 0755);
  assert(fd != -1);
  read(fd, &sb, sizeof(struct superblock));
  if (inodes == NULL)
  {
    inodes = (struct inode *)calloc(sb.inodes_num, sizeof(struct inode));
    assert(inodes != NULL);
  }
  read(fd, inodes, sizeof(struct inode) * sb.inodes_num);

  if (blocks == NULL)
  {
    blocks = (struct disk_block *)calloc(sb.blocks_num, sizeof(struct disk_block));
    assert(blocks != NULL);
  }
  read(fd, blocks, sizeof(struct disk_block) * sb.blocks_num);
  close(fd);
}
  • 文件系统的数据同步在模拟过程中采用了sync_fs函数,是要完成同步文件系统的数据和元数据。
代码语言:javascript复制
//文件系统同步
void sync_fs()
{
  int fd = open("/tmp/samplefs_data", O_RDWR, 0755);
  assert(fd != -1);
  int i = 0;

  write(fd, &sb, sizeof(struct superblock));
  for (; i < sb.inodes_num; i  )
  {
    write(fd, &inodes[i], sizeof(struct inode));
  }
  for (i = 0; i < sb.blocks_num; i  )
  {
    write(fd, &blocks[i], sizeof(struct disk_block));
  }
  close(fd);
}
  • 模拟过程中定义了alloc_file函数来完成inode的申请,这个是遍历inodes来查找空闲的inode,初始化后写入到空闲的block。
代码语言:javascript复制
int alloc_file(char *name)
{
  int inode_index = find_empty_inode();
  int block_index = find_empty_block();
  snprintf((char *)inodes[inode_index].name, 256, "%s", name);
  inodes[inode_index].first_block = block_index;
  blocks[block_index].next_block_num = -2;
  return inode_index;
}
  • 模拟了文件系统写入的函数write_file,这个过程过程是先找到对应的inode,然后找到空闲的数据块写入数据
代码语言:javascript复制
int write_file(char *src_name, char *dst_name)
{
  int src_fd = open(src_name, O_RDONLY, 0644);

  assert(src_fd != -1);
  struct stat st;
  stat(src_name, &st);
  int in = -1;
  size_t dst_name_sz = strlen(dst_name);

  for (int i = 0; i < sb.inodes_num; i  )
  {
    if (strncmp(inodes[i].name, dst_name, dst_name_sz) == 0)
    {
      in = i;
      break;
    }
  }
  if (in < 0)
  {
    return in;
  }
  int block_count = st.st_size / BLOCK_PER_SIZE;
  if (st.st_size % BLOCK_PER_SIZE != 0)
  {
    block_count  ;
  }
  int blk_index = -1;
  for (int i = 0; i < sb.blocks_num; i  )
  {
    if (blocks[i].next_block_num == -1)
    {
      blk_index = i;
      break;
    }
  }
  if (blk_index < 0)
  {
    inodes[in].first_block = -1;
    return -1;
  }
  inodes[in].size=st.st_size;
  inodes[in].first_block = blk_index;
  inodes[in].block_count = block_count;

  size_t ret = 0;
  for (int i = 0; i < block_count; i  )
  {

    blocks[blk_index i].next_block_num =-2;
    size_t r_sz = read(src_fd, (char *)&blocks[blk_index i].data, BLOCK_PER_SIZE);
    ret  = r_sz;
  }
  return ret;
}

模拟内核文件系统代码

  • main.c主函数
代码语言:javascript复制
/*************************************************************************
    > File Name: main.c
  > Author:perrynzhou
  > Mail:perrynzhou@gmail.com
  > Created Time: Wed 23 Mar 2022 05:06:43 AM UTC
 ************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include "sample_fs.h"
int main(int argc, char *argv[])
{
  if (argc != 2)
  {
    fprintf(stdout,"usage:%s {1:init fs} {2:mount fs} {3:write a file}n",argv[0]);
    return -1;
  }
  int flag = atoi(argv[1]);
  switch (flag)
  {

  case 0:
    create_fs();
    sync_fs();
    fprintf(stdout, "done init samplefsn");
    break;
  case 1:
    mount_fs();
    alloc_file("perrynzhou");
    sync_fs();
    print_fs();
    break;
  case 2:
    mount_fs();
    alloc_file("my_data.txt");
    write_file("/tmp/a.txt", "my_data.txt");
    sync_fs();
    print_fs();
    break;
  default:
    fprintf(stdout, "unknow commandn");
    break;
  }
}
  • sample_fs.h定义
代码语言:javascript复制
/*************************************************************************
    > File Name: sample_fs.h
  > Author:perrynzhou 
  > Mail:perrynzhou@gmail.com 
  > Created Time: Wed 23 Mar 2022 04:43:23 AM UTC
 ************************************************************************/

#ifndef _SAMPLE_FS_H
#define _SAMPLE_FS_H
#define FILE_NAME_MAX_LEN 255
#define BLOCK_PER_SIZE   512
//  samplefs文件系统的元数据信息
// 包括 inode



struct superblock {
  int inodes_num;
  int blocks_num;
  int block_size;
};

struct inode   {
  int size;
  int first_block;
  int block_count;
  char name[FILE_NAME_MAX_LEN];
};

struct disk_block {
  int next_block_num;
  char data[BLOCK_PER_SIZE];
};
// 文件系统创建
void create_fs();
// 文件系统挂载
void mount_fs();
//文件系统同步
void sync_fs();

void print_fs();

int alloc_file(char *name);
int write_file(char *src_name,char *dst_name);
int read_file(char *name);
#endif
  • sample_fs.c实现
代码语言:javascript复制
/*************************************************************************
    > File Name: sample_fs.c
  > Author:perrynzhou
  > Mail:perrynzhou@gmail.com
  > Created Time: Wed 23 Mar 2022 04:48:32 AM UTC
 ************************************************************************/

#include "sample_fs.h"
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <assert.h>
#include <errno.h>
struct superblock sb;

struct inode *inodes;
struct disk_block *blocks;
// 文件系统创建
void create_fs()
{
  sb.inodes_num = 10;
  sb.blocks_num = 40;
  sb.block_size = sizeof(struct disk_block);
  inodes = (struct inode *)malloc(sizeof(struct inode) * sb.inodes_num);
  int i = 0;
  for (; i < sb.inodes_num; i  )
  {
    inodes[i].size = -1;
    inodes[i].first_block = -1;
    snprintf((char *)&inodes[i].name, 256, "%s", "empty");
  }
  blocks = (struct disk_block *)malloc(sizeof(struct disk_block) * sb.blocks_num);
  for (i = 0; i < sb.blocks_num; i  )
  {
    blocks[i].next_block_num = -1;
  }
}
void print_fs()
{
  fprintf(stdout, "super_block:n");
  fprintf(stdout, "t inodes_num:%dn", sb.inodes_num);
  fprintf(stdout, "t blocks_num:%dn", sb.blocks_num);
  fprintf(stdout, "t block_size:%dn", sb.block_size);

  fprintf(stdout, "inodes:n");
  int i;
  for (i = 0; i < sb.inodes_num; i  )
  {
    fprintf(stdout, "t inode:%d,size:%d  first_block:%d name:%sn",i, inodes[i].size, inodes[i].first_block, (char *)&inodes[i].name);
  }

  fprintf(stdout, "blocks:n");
  for (i = 0; i < sb.blocks_num; i  )
  {
    fprintf(stdout, "t block num:%d next_block_num:%d  n", i, blocks[i].next_block_num);
  }
}
// 文件系统挂载
void mount_fs()
{
  int fd = open("/tmp/samplefs_data", O_RDWR, 0755);
  assert(fd != -1);
  read(fd, &sb, sizeof(struct superblock));
  if (inodes == NULL)
  {
    inodes = (struct inode *)calloc(sb.inodes_num, sizeof(struct inode));
    assert(inodes != NULL);
  }
  read(fd, inodes, sizeof(struct inode) * sb.inodes_num);

  if (blocks == NULL)
  {
    blocks = (struct disk_block *)calloc(sb.blocks_num, sizeof(struct disk_block));
    assert(blocks != NULL);
  }
  read(fd, blocks, sizeof(struct disk_block) * sb.blocks_num);
  close(fd);
}
//文件系统同步
void sync_fs()
{
  int fd = open("/tmp/samplefs_data", O_RDWR, 0755);
  assert(fd != -1);
  int i = 0;

  write(fd, &sb, sizeof(struct superblock));
  for (; i < sb.inodes_num; i  )
  {
    write(fd, &inodes[i], sizeof(struct inode));
  }
  for (i = 0; i < sb.blocks_num; i  )
  {
    write(fd, &blocks[i], sizeof(struct disk_block));
  }
  close(fd);
}

int find_empty_inode()
{
  int i;
  for (i = 0; i < sb.inodes_num; i  )
  {
    if (inodes[i].first_block ==-1)
    {
      return i;
    }
  }
  return -1;
}
int find_empty_block()
{
  int i;
  for (i = 0; i < sb.blocks_num; i  )
  {
    if (blocks[i].next_block_num == -1)
    {
      return i;
    }
  }
  return -1;
}
int alloc_file(char *name)
{
  int inode_index = find_empty_inode();
  int block_index = find_empty_block();
  snprintf((char *)inodes[inode_index].name, 256, "%s", name);
  inodes[inode_index].first_block = block_index;
  blocks[block_index].next_block_num = -2;
  return inode_index;
}
int write_file(char *src_name, char *dst_name)
{
  int src_fd = open(src_name, O_RDONLY, 0644);

  assert(src_fd != -1);
  struct stat st;
  stat(src_name, &st);
  int in = -1;
  size_t dst_name_sz = strlen(dst_name);

  for (int i = 0; i < sb.inodes_num; i  )
  {
    if (strncmp(inodes[i].name, dst_name, dst_name_sz) == 0)
    {
      in = i;
      break;
    }
  }
  if (in < 0)
  {
    return in;
  }
  int block_count = st.st_size / BLOCK_PER_SIZE;
  if (st.st_size % BLOCK_PER_SIZE != 0)
  {
    block_count  ;
  }
  int blk_index = -1;
  for (int i = 0; i < sb.blocks_num; i  )
  {
    if (blocks[i].next_block_num == -1)
    {
      blk_index = i;
      break;
    }
  }
  if (blk_index < 0)
  {
    inodes[in].first_block = -1;
    return -1;
  }
  inodes[in].size=st.st_size;
  inodes[in].first_block = blk_index;
  inodes[in].block_count = block_count;

  size_t ret = 0;
  for (int i = 0; i < block_count; i  )
  {

    blocks[blk_index i].next_block_num =-2;
    size_t r_sz = read(src_fd, (char *)&blocks[blk_index i].data, BLOCK_PER_SIZE);
    ret  = r_sz;
  }
  return ret;
}
  • Makefile
代码语言:javascript复制
all: test

test: main.o sample_fs.o sample_fs.h
	gcc -g -O0 -o test main.o sample_fs.o

main.o: main.c sample_fs.h 
	gcc -g -O0 -c main.c 

sample_fs.o: sample_fs.c 
	gcc -g -O0 -c sample_fs.c

clean: 
	rm -rf *.o
	rm -rf test*

模拟内核文件系统运行

代码语言:javascript复制
[perrynzhou@ubuntu-dev ~/source/perrynzhou/sample-fs/src]$ ls
main.c  Makefile  sample_fs.c  sample_fs.h
[perrynzhou@ubuntu-dev ~/source/perrynzhou/sample-fs/src]$ make
gcc -g -O0 -c main.c 
gcc -g -O0 -c sample_fs.c
gcc -g -O0 -o test main.o sample_fs.o

// 初始化文件系统
[perrynzhou@ubuntu-dev ~/source/perrynzhou/sample-fs/src]$ ./test  0
done init samplefs

// 申请一个名称为perrynzhou的inode
[perrynzhou@ubuntu-dev ~/source/perrynzhou/sample-fs/src]$ ./test  1
super_block:
         inodes_num:10
         blocks_num:40
         block_size:516
inodes:
         inode:0,size:-1  first_block:0 name:perrynzhou
         inode:1,size:-1  first_block:-1 name:empty
         inode:2,size:-1  first_block:-1 name:empty
         inode:3,size:-1  first_block:-1 name:empty
         inode:4,size:-1  first_block:-1 name:empty
         inode:5,size:-1  first_block:-1 name:empty
         inode:6,size:-1  first_block:-1 name:empty
         inode:7,size:-1  first_block:-1 name:empty
         inode:8,size:-1  first_block:-1 name:empty
         inode:9,size:-1  first_block:-1 name:empty
blocks:
         block num:0 next_block_num:-2  
         block num:1 next_block_num:-1  
         block num:2 next_block_num:-1  
         block num:3 next_block_num:-1  
         block num:4 next_block_num:-1  
         block num:5 next_block_num:-1  
         block num:6 next_block_num:-1  
         block num:7 next_block_num:-1  
         block num:8 next_block_num:-1  
         block num:9 next_block_num:-1  
         block num:10 next_block_num:-1  
         block num:11 next_block_num:-1  
         block num:12 next_block_num:-1  
         block num:13 next_block_num:-1  
         block num:14 next_block_num:-1  
         block num:15 next_block_num:-1  
         block num:16 next_block_num:-1  
         block num:17 next_block_num:-1  
         block num:18 next_block_num:-1  
         block num:19 next_block_num:-1  
         block num:20 next_block_num:-1  
         block num:21 next_block_num:-1  
         block num:22 next_block_num:-1  
         block num:23 next_block_num:-1  
         block num:24 next_block_num:-1  
         block num:25 next_block_num:-1  
         block num:26 next_block_num:-1  
         block num:27 next_block_num:-1  
         block num:28 next_block_num:-1  
         block num:29 next_block_num:-1  
         block num:30 next_block_num:-1  
         block num:31 next_block_num:-1  
         block num:32 next_block_num:-1  
         block num:33 next_block_num:-1  
         block num:34 next_block_num:-1  
         block num:35 next_block_num:-1  
         block num:36 next_block_num:-1  
         block num:37 next_block_num:-1  
         block num:38 next_block_num:-1  
         block num:39 next_block_num:-1 

// 申请my_data.txt的inode,并且写入数据
[perrynzhou@ubuntu-dev ~/source/perrynzhou/sample-fs/src]$ ./test  2
super_block:
         inodes_num:10
         blocks_num:40
         block_size:516
inodes:
         inode:0,size:-1  first_block:0 name:perrynzhou
         inode:1,size:12  first_block:2 name:my_data.txt
         inode:2,size:-1  first_block:-1 name:empty
         inode:3,size:-1  first_block:-1 name:empty
         inode:4,size:-1  first_block:-1 name:empty
         inode:5,size:-1  first_block:-1 name:empty
         inode:6,size:-1  first_block:-1 name:empty
         inode:7,size:-1  first_block:-1 name:empty
         inode:8,size:-1  first_block:-1 name:empty
         inode:9,size:-1  first_block:-1 name:empty
blocks:
		// 这里使用了3个block,block 0和block 1 已经存储了perrynzhou和my_data.txt的inode元数据,block 2存储了my_data.txt的数据
         block num:0 next_block_num:-2  
         block num:1 next_block_num:-2  
         block num:2 next_block_num:-2  
         block num:3 next_block_num:-1  
         block num:4 next_block_num:-1  
         block num:5 next_block_num:-1  
         block num:6 next_block_num:-1  
         block num:7 next_block_num:-1  
         block num:8 next_block_num:-1  
         block num:9 next_block_num:-1  
         block num:10 next_block_num:-1  
         block num:11 next_block_num:-1  
         block num:12 next_block_num:-1  
         block num:13 next_block_num:-1  
         block num:14 next_block_num:-1  
         block num:15 next_block_num:-1  
         block num:16 next_block_num:-1  
         block num:17 next_block_num:-1  
         block num:18 next_block_num:-1  
         block num:19 next_block_num:-1  
         block num:20 next_block_num:-1  
         block num:21 next_block_num:-1  
         block num:22 next_block_num:-1  
         block num:23 next_block_num:-1  
         block num:24 next_block_num:-1  
         block num:25 next_block_num:-1  
         block num:26 next_block_num:-1  
         block num:27 next_block_num:-1  
         block num:28 next_block_num:-1  
         block num:29 next_block_num:-1  
         block num:30 next_block_num:-1  
         block num:31 next_block_num:-1  
         block num:32 next_block_num:-1  
         block num:33 next_block_num:-1  
         block num:34 next_block_num:-1  
         block num:35 next_block_num:-1  
         block num:36 next_block_num:-1  
         block num:37 next_block_num:-1  
         block num:38 next_block_num:-1  
         block num:39 next_block_num:-1

0 人点赞