代码语言:c复制
// 新建一个socket结构体,并且创建一个下层的sock结构体,互相关联
static int sock_socket(int family, int type, int protocol)
{
int i, fd;
struct socket *sock;
struct proto_ops *ops;
for (i = 0; i < NPROTO; i)
{ // 从props数组中找到family协议对应的操作函数集,props由系统初始化时sock_register进行操作
if (pops[i] == NULL) continue;
if (pops[i]->family == family)
break;
}
if (i == NPROTO)
{
return -EINVAL;
}
// 函数集
ops = pops[i];
// 检查一下类型
if ((type != SOCK_STREAM && type != SOCK_DGRAM &&
type != SOCK_SEQPACKET && type != SOCK_RAW &&
type != SOCK_PACKET) || protocol < 0)
return(-EINVAL);
// 分配一个新的socket结构体,下面进行分析
if (!(sock = sock_alloc()))
{
printk("NET: sock_socket: no more socketsn");
return(-ENOSR); /* Was: EAGAIN, but we are out of
system resources! */
}
// 设置类型和操作函数集
sock->type = type;
sock->ops = ops;
// 创建一个struct sock结构体,和sock_alloc分配的socket结构体互相关联
if ((i = sock->ops->create(sock, protocol)) < 0)
{
sock_release(sock);
return(i);
}
// 返回一个新的文件描述符,下面分析
if ((fd = get_fd(SOCK_INODE(sock))) < 0)
{
sock_release(sock);
return(-EINVAL);
}
return(fd);
}
```
由上面的代码可以知道,socket函数主要是三个步骤,下面逐个分析。
1 拿到一个新的socket结构体
```c
struct socket *sock_alloc(void)
{
struct inode * inode;
struct socket * sock;
// 获取一个可用的inode节点
inode = get_empty_inode();
if (!inode)
return NULL;
// 初始化某些字段
inode->i_mode = S_IFSOCK;
inode->i_sock = 1;// socket文件
inode->i_uid = current->uid;
inode->i_gid = current->gid;
// 执行inode的socket结构体,初始化inode结构体的socket结构体
sock = &inode->u.socket_i;
sock->state = SS_UNCONNECTED;
sock->flags = 0;
sock->ops = NULL;
sock->data = NULL;
sock->conn = NULL;
sock->iconn = NULL;
sock->next = NULL;
// 这个结构很重要,在阻塞性的网络函数里会用到,主要是用于阻塞和唤醒进程
sock->wait = &inode->i_wait;
// 互相引用
sock->inode = inode; /* "backlink": we could use pointer arithmetic instead */
sock->fasync_list = NULL;
// socket数加一
sockets_in_use ;
// 返回新的socket结构体,实际上是inode中的一个字段
return sock;
}
```
2 sock->ops->create,根据props[(网络源码初始化分析)](https://blog.csdn.net/THEANARKH/article/details/85550187)数组的结构可知,create函数对应的是inet_create
```c
// 创建一个sock结构体,和socket结构体互相关联
static int inet_create(struct socket *sock, int protocol)
{
struct sock *sk;
struct proto *prot;
int err;
// 分配一个sock结构体
sk = (struct sock *) kmalloc(sizeof(*sk), GFP_KERNEL);
if (sk == NULL)
return(-ENOBUFS);
sk->num = 0;
sk->reuse = 0;
switch(sock->type)
{
case SOCK_STREAM:
case SOCK_SEQPACKET:
if (protocol && protocol != IPPROTO_TCP)
{
kfree_s((void *)sk, sizeof(*sk));
return(-EPROTONOSUPPORT);
}
protocol = IPPROTO_TCP;
sk->no_check = TCP_NO_CHECK;
// 函数集
prot = &tcp_prot;
break;
case SOCK_DGRAM:
if (protocol && protocol != IPPROTO_UDP)
{
kfree_s((void *)sk, sizeof(*sk));
return(-EPROTONOSUPPORT);
}
protocol = IPPROTO_UDP;
sk->no_check = UDP_NO_CHECK;
prot=&udp_prot;
break;
// 下面两种类型需要root身份
case SOCK_RAW:
if (!suser())
{
kfree_s((void *)sk, sizeof(*sk));
return(-EPERM);
}
if (!protocol)
{
kfree_s((void *)sk, sizeof(*sk));
return(-EPROTONOSUPPORT);
}
prot = &raw_prot;
sk->reuse = 1;
sk->no_check = 0; /*
* Doesn't matter no checksum is
* performed anyway.
*/
sk->num = protocol;
break;
case SOCK_PACKET:
if (!suser())
{
kfree_s((void *)sk, sizeof(*sk));
return(-EPERM);
}
if (!protocol)
{
kfree_s((void *)sk, sizeof(*sk));
return(-EPROTONOSUPPORT);
}
prot = &packet_prot;
sk->reuse = 1;
sk->no_check = 0; /* Doesn't matter no checksum is
* performed anyway.
*/
sk->num = protocol;
break;
default:
kfree_s((void *)sk, sizeof(*sk));
return(-ESOCKTNOSUPPORT);
}
// sock结构体的socket字段指向上层的socket结构体
sk->socket = sock;
#ifdef CONFIG_TCP_NAGLE_OFF
sk->nonagle = 1;
#else
sk->nonagle = 0;
#endif
sk->type = sock->type;
sk->stamp.tv_sec=0;
sk->protocol = protocol;
sk->wmem_alloc = 0;
sk->rmem_alloc = 0;
sk->sndbuf = SK_WMEM_MAX;
sk->rcvbuf = SK_RMEM_MAX;
sk->pair = NULL;
sk->opt = NULL;
sk->write_seq = 0;
sk->acked_seq = 0;
sk->copied_seq = 0;
sk->fin_seq = 0;
sk->urg_seq = 0;
sk->urg_data = 0;
sk->proc = 0;
sk->rtt = 0; /*TCP_WRITE_TIME << 3;*/
sk->rto = TCP_TIMEOUT_INIT; /*TCP_WRITE_TIME*/
sk->mdev = 0;
sk->backoff = 0;
sk->packets_out = 0;
sk->cong_window = 1; /* start with only sending one packet at a time. */
sk->cong_count = 0;
sk->ssthresh = 0;
sk->max_window = 0;
sk->urginline = 0;
sk->intr = 0;
sk->linger = 0;
sk->destroy = 0;
sk->priority = 1;
sk->shutdown = 0;
sk->keepopen = 0;
sk->zapped = 0;
sk->done = 0;
sk->ack_backlog = 0;
sk->window = 0;
sk->bytes_rcv = 0;
sk->state = TCP_CLOSE;
sk->dead = 0;
sk->ack_timed = 0;
sk->partial = NULL;
sk->user_mss = 0;
sk->debug = 0;
/* this is how many unacked bytes we will accept for this socket. */
sk->max_unacked = 2048; /* needs to be at most 2 full packets. */
/* how many packets we should send before forcing an ack.
if this is set to zero it is the same as sk->delay_acks = 0 */
sk->max_ack_backlog = 0;
sk->inuse = 0;
sk->delay_acks = 0;
skb_queue_head_init(&sk->write_queue);
skb_queue_head_init(&sk->receive_queue);
sk->mtu = 576;
// 下层的操作函数集
sk->prot = prot;
// 来自socket结构体的wait字段,wait字段来自inode的wait字段
sk->sleep = sock->wait;
sk->daddr = 0;
sk->saddr = 0 /* ip_my_addr() */;
sk->err = 0;
sk->next = NULL;
sk->pair = NULL;
sk->send_tail = NULL;
sk->send_head = NULL;
sk->timeout = 0;
sk->broadcast = 0;
sk->localroute = 0;
init_timer(&sk->timer);
init_timer(&sk->retransmit_timer);
sk->timer.data = (unsigned long)sk;
sk->timer.function = &net_timer;
skb_queue_head_init(&sk->back_log);
sk->blog = 0;
// socket结构体的data字段指向底层的sock结构体
sock->data =(void *) sk;
// 初始化tcp头
sk->dummy_th.doff = sizeof(sk->dummy_th)/4;
sk->dummy_th.res1=0;
sk->dummy_th.res2=0;
sk->dummy_th.urg_ptr = 0;
sk->dummy_th.fin = 0;
sk->dummy_th.syn = 0;
sk->dummy_th.rst = 0;
sk->dummy_th.psh = 0;
sk->dummy_th.ack = 0;
sk->dummy_th.urg = 0;
sk->dummy_th.dest = 0;
sk->ip_tos=0;
sk->ip_ttl=64;
#ifdef CONFIG_IP_MULTICAST
sk->ip_mc_loop=1;
sk->ip_mc_ttl=1;
*sk->ip_mc_name=0;
sk->ip_mc_list=NULL;
#endif
// 下面两个函数用于阻塞型的网络函数被阻塞时,一旦底层条件符合,则回调下面的函数通知上层,即唤醒进程
sk->state_change = def_callback1;
sk->data_ready = def_callback2;
sk->write_space = def_callback3;
sk->error_report = def_callback1;
if (sk->num)
{
/*
* It assumes that any protocol which allows
* the user to assign a number at socket
* creation time automatically
* shares.
*/
// 根据端口,把sock结构体放到下层协议的sock_srray数组
put_sock(sk->num, sk);
sk->dummy_th.source = ntohs(sk->num);
}
// 执行底层的初始化函数,tcp和udp都没有init函数
if (sk->prot->init)
{
err = sk->prot->init(sk);
if (err != 0)
{
destroy_sock(sk);
return(err);
}
}
return(0);
}
```
3 get_fd,经过上面的几个步骤,我们拿到了一个inode、一个socket、一个sock。最后我们要再拿到一个文件描述符返回给应用层,在操作系统中,每个进程有一个fd数组,记录进程打开的文件信息,数组的一个或多个项指向一个struct file结构体,一个或多个file结构体又指向一个inode结构体。所以我们拿到一个inode后,还需要拿到一个file结构,最后拿到一个fd结构,返回给用户。
```c
static int get_fd(struct inode *inode)
{
int fd;
struct file *file;
/*
* Find a file descriptor suitable for return to the user.
*/
// 获取一个可以的file结构体
file = get_empty_filp();
if (!file)
return(-1);
// 挂载到进程的fd数组中
for (fd = 0; fd < NR_OPEN; fd)
if (!current->files->fd[fd])
break;
if (fd == NR_OPEN)
{
file->f_count = 0;
return(-1);
}
FD_CLR(fd, ¤t->files->close_on_exec);
current->files->fd[fd] = file;
// 设置文件操作函数集,操作socket像操作文件一样
file->f_op = &socket_file_ops;
file->f_mode = 3;
file->f_flags = O_RDWR;
file->f_count = 1;
// 关联inode节点
file->f_inode = inode;
if (inode)
inode->i_count ;
file->f_pos = 0;
return(fd);
}
最后,当我们指向一个socket函数时,内存视图是: