写在前面:本系列文章先把各个层对应的文件源码剖析一遍,最后再穿插起来,理清整个协议栈网络数据包的上下传送通道,从整体实现上进行把握。
图片来源于《Linux 内核网络栈源代码情景分析》
更上层函数:tcp socket函数介绍。本篇则是介绍BSD Socket层。其对应函数集定义在socket.c 文件中,阅读源码后,你会发现这些函数都是层层嵌套调用表现出了上下层之间的关系。内核版本:Linux 1.2.13
源码剖析:
为方便大家理清思路,先介绍几个中间函数。建议:像这些大型软件项目,函数内通常还会调用一些公用的基础类的工具函数,我们在阅读源码时,应该先弄清楚这些函数,这样当阅读对应函数时,能很好地把握该函数的内部细节。
/*下面两个函数实现地址用户空间和内核空间地址之间的相互移动*/ //从uaddr拷贝ulen大小的数据到kaddr static int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr) { int err; if(ulen<0||ulen>MAX_SOCK_ADDR) return -EINVAL; if(ulen==0) return 0; //检查用户空间的指针所指的指定大小存储块是否可读 if((err=verify_area(VERIFY_READ,uaddr,ulen))<0) return err; memcpy_fromfs(kaddr,uaddr,ulen);//实质是memcpy函数 return 0; } //注意的是,从内核拷贝数据到用户空间是值-结果参数 //ulen这个指向某个整数变量的指针,当函数被调用的时候,它告诉内核需要拷贝多少 //函数返回时,该参数作为一个结果,告诉进程,内核实际拷贝了多少信息 static int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen) { int err; int len; //判断ulen指向的存储块是否可写,就是判断ulen是否可作为左值 if((err=verify_area(VERIFY_WRITE,ulen,sizeof(*ulen)))<0) return err; len=get_fs_long(ulen);//len = *ulen,ulen作为值传入,告诉要拷贝多少数据 if(len>klen) len=klen;//供不应求,按供的算。实际拷贝的数据 if(len<0 || len> MAX_SOCK_ADDR) return -EINVAL; if(len) { //判断uaddr用户空间所指的存储块是否可写 if((err=verify_area(VERIFY_WRITE,uaddr,len))<0) return err; memcpy_tofs(uaddr,kaddr,len);//实质是调用memcpy } put_fs_long(len,ulen);//*ulen = len,作为结果返回,即实际拷贝了多少数据 return 0; }下面这个函数一看就知道什么意思
static inline unsigned long get_user_long(const int *addr) { return *addr; } #define get_fs_long(addr) get_user_long((int *)(addr))
/* * 为网络套接字分配一个文件描述符 */ static int get_fd(struct inode *inode) { int fd; struct file *file; /* * Find a file descriptor suitable for return to the user. */ file = get_empty_filp();//分配文件对象,文件描述符对应实体,file结构体指示一个打开的文件,filp:file pointer if (!file) return(-1); //找到可用的文件描述符 for (fd = 0; fd < NR_OPEN; ++fd) if (!current->files->fd[fd]) break; //没有空闲可用的文件描述符,则退出 if (fd == NR_OPEN) { file->f_count = 0; return(-1); } //在文件描述符集合中删除一个新的文件描述符 FD_CLR(fd, ¤t->files->close_on_exec); current->files->fd[fd] = file;//赋值,挂钩 file->f_op = &socket_file_ops;//指定操作函数集,实现了网络操作的普通文件接口 file->f_mode = 3;//权限 file->f_flags = O_RDWR;//标志,可读可写 file->f_count = 1;//引用计数 file->f_inode = inode;//与文件inode建立联系,inode为对文件的索引 if (inode) inode->i_count++;//inode的引用计数也要增1 file->f_pos = 0;//偏移值 return(fd); }
/* * 通过inode结构查找对应的socket结构 */ inline struct socket *socki_lookup(struct inode *inode) { return &inode->u.socket_i;//socket结构是作为inode结构中的一个变量 } /* * 给定文件描述符返回socket结构以及file结构指针 */ static inline struct socket *sockfd_lookup(int fd, struct file **pfile) { struct file *file; struct inode *inode; //有效性检查,并从文件描述符中得到对应的file结构 if (fd < 0 || fd >= NR_OPEN || !(file = current->files->fd[fd])) return NULL; //得到对应inode结构 inode = file->f_inode; if (!inode || !inode->i_sock) return NULL; if (pfile) *pfile = file;//参数返回file结构指针 //返回inode对应的socket结构 return socki_lookup(inode); }下面开始socket结构的处理了
分配一个socket结构
/* * 分配一个socket结构 */ struct socket *sock_alloc(void) { struct inode * inode; struct socket * sock; inode = get_empty_inode();//分配一个inode对象 if (!inode) return NULL; //获得的inode结构的初始化 inode->i_mode = S_IFSOCK; inode->i_sock = 1; inode->i_uid = current->uid; inode->i_gid = current->gid; sock = &inode->u.socket_i; sock->state = SS_UNCONNECTED; sock->flags = 0; sock->ops = NULL; sock->data = NULL; sock->conn = NULL; sock->iconn = NULL; sock->next = NULL; sock->wait = &inode->i_wait; sock->inode = inode;//回绑 sock->fasync_list = NULL; sockets_in_use++;//系统当前使用的套接字数量加1 return sock; }
/* * Release a socket. */ //释放对端的套接字 static inline void sock_release_peer(struct socket *peer) { peer->state = SS_DISCONNECTING;//状态切换到正在处理关闭连接 wake_up_interruptible(peer->wait);//唤醒指定的注册在等待队列上的进程 sock_wake_async(peer, 1);//异步唤醒,涉及到套接字状态的改变,需要通知相应进程进行某种处理 } /* * 释放(关闭)一个套接字 */ void sock_release(struct socket *sock) { int oldstate; struct socket *peersock, *nextsock; //只要套接字不是出于未连接状态,就将其置为正在处理关闭连接状态 if ((oldstate = sock->state) != SS_UNCONNECTED) sock->state = SS_DISCONNECTING; /* * Wake up anyone waiting for connections. */ //iconn只用于服务器端,表示等待连接但尚未完成连接的客户端socket结构链表 for (peersock = sock->iconn; peersock; peersock = nextsock) { nextsock = peersock->next; sock_release_peer(peersock); } /* * Wake up anyone we're connected to. First, we release the * protocol, to give it a chance to flush data, etc. */ //如果该套接字已连接,peersock指向其连接的服务器端套接字 peersock = (oldstate == SS_CONNECTED) ? sock->conn : NULL; //转调用release函数 if (sock->ops) sock->ops->release(sock, peersock); //释放对端套接字 if (peersock) sock_release_peer(peersock); --sockets_in_use; /* 数量减1 */ iput(SOCK_INODE(sock)); }socket 结构
/* * Internal representation of a socket. not all the fields are used by * all configurations: * * server client * conn client connected to server connected to * iconn list of clients -unused- * awaiting connections * wait sleep for clients, sleep for connection, * sleep for i/o sleep for i/o */ //该结构表示一个网络套接字 struct socket { short type; /* 套接字所用的流类型*/ socket_state state;//套接字所处状态 long flags;//标识字段,目前尚无明确作用 struct proto_ops *ops; /* 操作函数集指针 */ /* data保存指向‘私有'数据结构指针,在不同的域指向不同的数据结构 */ //在INET域,指向sock结构,UNIX域指向unix_proto_data结构 void *data; //下面两个字段只用于UNIX域 struct socket *conn; /* 指向客户端连接的服务器端套接字 */ struct socket *iconn; /* 指向正等待连接的客户端 */ struct socket *next;//链表 struct wait_queue **wait; /* 等待队列 */ struct inode *inode;//inode结构指针 struct fasync_struct *fasync_list; /* 异步唤醒链表结构 */ };
/* * 系统调用,创建套接字socket。涉及到socket结构的创建. */ static int sock_socket(int family, int type, int protocol) { int i, fd; struct socket *sock; struct proto_ops *ops; /* 匹配应用程序调用socket()函数时指定的协议 */ for (i = 0; i < NPROTO; ++i) { if (pops[i] == NULL) continue; if (pops[i]->family == family) break; } //没有匹配的协议,则出错退出 if (i == NPROTO) { return -EINVAL; } ops = pops[i]; /* * Check that this is a type that we know how to manipulate and * the protocol makes sense here. The family can still reject the * protocol later. */ //套接字类型检查 if ((type != SOCK_STREAM && type != SOCK_DGRAM && type != SOCK_SEQPACKET && type != SOCK_RAW && type != SOCK_PACKET) || protocol < 0) return(-EINVAL); /* * Allocate the socket and allow the family to set things up. if * the protocol is 0, the family is instructed to select an appropriate * default. */ //分配套接字结构 if (!(sock = sock_alloc())) { printk("NET: sock_socket: no more sockets\n"); return(-ENOSR); /* Was: EAGAIN, but we are out of system resources! */ } //指定对应类型,协议,以及操作函数集 sock->type = type; sock->ops = ops; //分配下层sock结构,sock结构是比socket结构更底层的表示一个套接字的结构 //前面博文有说明:http://blog.csdn.net/wenqian1991/article/details/21740945 //socket是通用的套接字结构体,而sock与具体使用的协议相关 if ((i = sock->ops->create(sock, protocol)) < 0) { sock_release(sock); return(i); } //分配一个文件描述符并在后面返回给应用层序作为以后的操作句柄 if ((fd = get_fd(SOCK_INODE(sock))) < 0) { sock_release(sock); return(-EINVAL); } return(fd); }
/* * Bind a name to a socket. Nothing much to do here since it's * the protocol's responsibility to handle the local address. * * We move the socket address to kernel space before we call * the protocol layer (having also checked the address is ok). */ //建议对于理解这类系统调用函数,先看看应用层的对应函数,如bind,listen等 //bind函数对应的BSD层函数,用于绑定一个本地地址,服务器端 //umyaddr表示需要绑定的地址结构,addrlen表示改地址结构的长度 static int sock_bind(int fd, struct sockaddr *umyaddr, int addrlen) { struct socket *sock; int i; char address[MAX_SOCK_ADDR]; int err; //套接字参数有效性检查 if (fd < 0 || fd >= NR_OPEN || current->files->fd[fd] == NULL) return(-EBADF); //获取fd对应的socket结构 if (!(sock = sockfd_lookup(fd, NULL))) return(-ENOTSOCK); //将地址从用户缓冲区复制到内核缓冲区 if((err=move_addr_to_kernel(umyaddr,addrlen,address))<0) return err; //转调用bind指向的函数 if ((i = sock->ops->bind(sock, (struct sockaddr *)address, addrlen)) < 0) { return(i); } return(0); }
/* * Perform a listen. Basically, we allow the protocol to do anything * necessary for a listen, and if that works, we mark the socket as * ready for listening. */ //服务器端监听客户端的连接请求 //fd表示bind后的套接字,backlog表示排队的最大连接个数 //listen函数把一个未连接的套接字转换为一个被动套接字, //指示内核应接受该套接字的连接请求 static int sock_listen(int fd, int backlog) { struct socket *sock; if (fd < 0 || fd >= NR_OPEN || current->files->fd[fd] == NULL) return(-EBADF); if (!(sock = sockfd_lookup(fd, NULL))) return(-ENOTSOCK); //前提是没有建立连接 if (sock->state != SS_UNCONNECTED) { return(-EINVAL); } //调用底层实现函数 if (sock->ops && sock->ops->listen) sock->ops->listen(sock, backlog); sock->flags |= SO_ACCEPTCON;//设置标识字段 return(0); }
/* * For accept, we attempt to create a new socket, set up the link * with the client, wake up the client, then return the new * connected fd. We collect the address of the connector in kernel * space and move it to user at the very end. This is buggy because * we open the socket then return an error. */ //用于服务器接收一个客户端的连接请求,这里是值-结果参数,之前有说到 //fd 为监听后套接字。最后返回一个记录了本地与目的端信息的套接字 //upeer_sockaddr用来返回已连接客户的协议地址,如果对协议地址不感兴趣就NULL static int sock_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen) { struct file *file; struct socket *sock, *newsock; int i; char address[MAX_SOCK_ADDR]; int len; if (fd < 0 || fd >= NR_OPEN || ((file = current->files->fd[fd]) == NULL)) return(-EBADF); if (!(sock = sockfd_lookup(fd, &file))) return(-ENOTSOCK); if (sock->state != SS_UNCONNECTED)//socket各个状态的演变是一步一步来的 { return(-EINVAL); } //这是tcp连接,得按步骤来 if (!(sock->flags & SO_ACCEPTCON))//没有listen { return(-EINVAL); } //分配一个新的套接字,用于表示后面可进行通信的套接字 if (!(newsock = sock_alloc())) { printk("NET: sock_accept: no more sockets\n"); return(-ENOSR); /* Was: EAGAIN, but we are out of system resources! */ } newsock->type = sock->type; newsock->ops = sock->ops; //套接字重定向,目的是初始化新的用于数据传送的套接字 //继承了第一参数传来的服务器的IP和端口号信息 if ((i = sock->ops->dup(newsock, sock)) < 0) { sock_release(newsock); return(i); } //转调用inet_accept函数 i = newsock->ops->accept(sock, newsock, file->f_flags); if ( i < 0) { sock_release(newsock); return(i); } //分配一个文件描述符,用于以后的数据传送 if ((fd = get_fd(SOCK_INODE(newsock))) < 0) { sock_release(newsock); return(-EINVAL); } //返回通信远端的地址 if (upeer_sockaddr) {//得到客户端地址,并复制到用户空间 newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 1); move_addr_to_user(address,len, upeer_sockaddr, upeer_addrlen); } return(fd); }
/* * 首先将要连接的源端地址从用户缓冲区复制到内核缓冲区,之后根据套接字目前所处状态 * 采取对应措施,如果状态有效,转调用connect函数 */ //这是客户端,表示客户端向服务器端发送连接请求 static int sock_connect(int fd, struct sockaddr *uservaddr, int addrlen) { struct socket *sock; struct file *file; int i; char address[MAX_SOCK_ADDR]; int err; if (fd < 0 || fd >= NR_OPEN || (file=current->files->fd[fd]) == NULL) return(-EBADF); if (!(sock = sockfd_lookup(fd, &file))) return(-ENOTSOCK); if((err=move_addr_to_kernel(uservaddr,addrlen,address))<0) return err; //根据状态采取对应措施 switch(sock->state) { case SS_UNCONNECTED: /* This is ok... continue with connect */ break; case SS_CONNECTED: /* Socket is already connected */ if(sock->type == SOCK_DGRAM) /* Hack for now - move this all into the protocol */ break; return -EISCONN; case SS_CONNECTING: /* Not yet connected... we will check this. */ /* * FIXME: for all protocols what happens if you start * an async connect fork and both children connect. Clean * this up in the protocols! */ break; default: return(-EINVAL); } i = sock->ops->connect(sock, (struct sockaddr *)address, addrlen, file->f_flags); if (i < 0) { return(i); } return(0); }上面几个函数则是我们应用编程是socket、bind、listen、accept、connect 函数对应的内核的系统调用函数,可以看出,对应的sock_ 函数内部也是转调用了下一层的函数。
/* * System call vectors. Since I (RIB) want to rewrite sockets as streams, * we have this level of indirection. Not a lot of overhead, since more of * the work is done via read/write/select directly. * * I'm now expanding this up to a higher level to separate the assorted * kernel/user space manipulations and global assumptions from the protocol * layers proper - AC. */ //本函数是网络栈专用操作函数集的总入口函数,主要是将请求分配,调用具体的底层函数进行处理 asmlinkage int sys_socketcall(int call, unsigned long *args) { int er; switch(call) { case SYS_SOCKET://socket函数 er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); if(er) return er; return(sock_socket(get_fs_long(args+0), get_fs_long(args+1),//返回地址上的值 get_fs_long(args+2))); case SYS_BIND://bind函数 er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); if(er) return er; return(sock_bind(get_fs_long(args+0), (struct sockaddr *)get_fs_long(args+1), get_fs_long(args+2))); case SYS_CONNECT://connect函数 er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); if(er) return er; return(sock_connect(get_fs_long(args+0), (struct sockaddr *)get_fs_long(args+1), get_fs_long(args+2))); case SYS_LISTEN://listen函数 er=verify_area(VERIFY_READ, args, 2 * sizeof(long)); if(er) return er; return(sock_listen(get_fs_long(args+0), get_fs_long(args+1))); case SYS_ACCEPT://accept函数 er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); if(er) return er; return(sock_accept(get_fs_long(args+0), (struct sockaddr *)get_fs_long(args+1), (int *)get_fs_long(args+2))); case SYS_GETSOCKNAME://getsockname函数 er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); if(er) return er; return(sock_getsockname(get_fs_long(args+0), (struct sockaddr *)get_fs_long(args+1), (int *)get_fs_long(args+2))); case SYS_GETPEERNAME://getpeername函数 er=verify_area(VERIFY_READ, args, 3 * sizeof(long)); if(er) return er; return(sock_getpeername(get_fs_long(args+0), (struct sockaddr *)get_fs_long(args+1), (int *)get_fs_long(args+2))); case SYS_SOCKETPAIR://socketpair函数 er=verify_area(VERIFY_READ, args, 4 * sizeof(long)); if(er) return er; return(sock_socketpair(get_fs_long(args+0), get_fs_long(args+1), get_fs_long(args+2), (unsigned long *)get_fs_long(args+3))); case SYS_SEND://send函数 er=verify_area(VERIFY_READ, args, 4 * sizeof(unsigned long)); if(er) return er; return(sock_send(get_fs_long(args+0), (void *)get_fs_long(args+1), get_fs_long(args+2), get_fs_long(args+3))); case SYS_SENDTO://sendto函数 er=verify_area(VERIFY_READ, args, 6 * sizeof(unsigned long)); if(er) return er; return(sock_sendto(get_fs_long(args+0), (void *)get_fs_long(args+1), get_fs_long(args+2), get_fs_long(args+3), (struct sockaddr *)get_fs_long(args+4), get_fs_long(args+5))); case SYS_RECV://recv函数 er=verify_area(VERIFY_READ, args, 4 * sizeof(unsigned long)); if(er) return er; return(sock_recv(get_fs_long(args+0), (void *)get_fs_long(args+1), get_fs_long(args+2), get_fs_long(args+3))); case SYS_RECVFROM://recvfrom函数 er=verify_area(VERIFY_READ, args, 6 * sizeof(unsigned long)); if(er) return er; return(sock_recvfrom(get_fs_long(args+0), (void *)get_fs_long(args+1), get_fs_long(args+2), get_fs_long(args+3), (struct sockaddr *)get_fs_long(args+4), (int *)get_fs_long(args+5))); case SYS_SHUTDOWN://shutdown函数 er=verify_area(VERIFY_READ, args, 2* sizeof(unsigned long)); if(er) return er; return(sock_shutdown(get_fs_long(args+0), get_fs_long(args+1))); case SYS_SETSOCKOPT://setsockopt函数 er=verify_area(VERIFY_READ, args, 5*sizeof(unsigned long)); if(er) return er; return(sock_setsockopt(get_fs_long(args+0), get_fs_long(args+1), get_fs_long(args+2), (char *)get_fs_long(args+3), get_fs_long(args+4))); case SYS_GETSOCKOPT://getsockopt函数 er=verify_area(VERIFY_READ, args, 5*sizeof(unsigned long)); if(er) return er; return(sock_getsockopt(get_fs_long(args+0), get_fs_long(args+1), get_fs_long(args+2), (char *)get_fs_long(args+3), (int *)get_fs_long(args+4))); default: return(-EINVAL); } }
/* * Sockets are not seekable. */ static int sock_lseek(struct inode *inode, struct file *file, off_t offset, int whence) { return(-ESPIPE); } /* * Read data from a socket. ubuf is a user mode pointer. We make sure the user * area ubuf...ubuf+size-1 is writable before asking the protocol. */ static int sock_read(struct inode *inode, struct file *file, char *ubuf, int size) { struct socket *sock; int err; if (!(sock = socki_lookup(inode))) { printk("NET: sock_read: can't find socket for inode!\n"); return(-EBADF); } if (sock->flags & SO_ACCEPTCON) return(-EINVAL); if(size<0) return -EINVAL; if(size==0) return 0; if ((err=verify_area(VERIFY_WRITE,ubuf,size))<0) return err; return(sock->ops->read(sock, ubuf, size, (file->f_flags & O_NONBLOCK))); } /* * Write data to a socket. We verify that the user area ubuf..ubuf+size-1 is * readable by the user process. */ static int sock_write(struct inode *inode, struct file *file, char *ubuf, int size) { struct socket *sock; int err; if (!(sock = socki_lookup(inode))) { printk("NET: sock_write: can't find socket for inode!\n"); return(-EBADF); } if (sock->flags & SO_ACCEPTCON) return(-EINVAL); if(size<0) return -EINVAL; if(size==0) return 0; if ((err=verify_area(VERIFY_READ,ubuf,size))<0) return err; return(sock->ops->write(sock, ubuf, size,(file->f_flags & O_NONBLOCK))); } /* * You can't read directories from a socket! */ static int sock_readdir(struct inode *inode, struct file *file, struct dirent *dirent, int count) { return(-EBADF); } /* * With an ioctl arg may well be a user mode pointer, but we don't know what to do * with it - thats up to the protocol still. */ int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { struct socket *sock; if (!(sock = socki_lookup(inode))) { printk("NET: sock_ioctl: can't find socket for inode!\n"); return(-EBADF); } return(sock->ops->ioctl(sock, cmd, arg)); } static int sock_select(struct inode *inode, struct file *file, int sel_type, select_table * wait) { struct socket *sock; if (!(sock = socki_lookup(inode))) { printk("NET: sock_select: can't find socket for inode!\n"); return(0); } /* * We can't return errors to select, so it's either yes or no. */ if (sock->ops && sock->ops->select) return(sock->ops->select(sock, sel_type, wait)); return(0); } void sock_close(struct inode *inode, struct file *filp) { struct socket *sock; /* * It's possible the inode is NULL if we're closing an unfinished socket. */ if (!inode) return; //找对inode对应的socket结构 if (!(sock = socki_lookup(inode))) { printk("NET: sock_close: can't find socket for inode!\n"); return; } sock_fasync(inode, filp, 0);//更新异步通知列表 sock_release(sock);//释放套接字 } /* * Update the socket async list */ //输入参数on的取值决定是分配还是释放一个fasync_struct结构,该结构用于异步唤醒 static int sock_fasync(struct inode *inode, struct file *filp, int on) { struct fasync_struct *fa, *fna=NULL, **prev; struct socket *sock; unsigned long flags; if (on)//分配 { fna=(struct fasync_struct *)kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); if(fna==NULL) return -ENOMEM; } sock = socki_lookup(inode); prev=&(sock->fasync_list); save_flags(flags);//保存当前状态 cli(); //从链表中找到与file结构对应的fasync_struct for(fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev) if(fa->fa_file==filp) break; if(on)//分配后的建立联系 { //如果已经有对应的file结构,则释放之前创建的 if(fa!=NULL) { kfree_s(fna,sizeof(struct fasync_struct)); restore_flags(flags); return 0; } //如果没有,则挂载这个新创建的结构 fna->fa_file=filp; fna->magic=FASYNC_MAGIC; fna->fa_next=sock->fasync_list; sock->fasync_list=fna; } //释放 else { if(fa!=NULL) { *prev=fa->fa_next; kfree_s(fa,sizeof(struct fasync_struct)); } } restore_flags(flags);//恢复状态 return 0; } /* * 异步唤醒函数,通过遍历socket结构中fasync_list变量指向的队列, * 对队列中每个元素调用kill_fasync函数 */ int sock_wake_async(struct socket *sock, int how) { if (!sock || !sock->fasync_list) return -1; switch (how) { case 0: //kill_fasync函数即通过相应的进程发送信号。这就是异步唤醒功能 kill_fasync(sock->fasync_list, SIGIO); break; case 1: if (!(sock->flags & SO_WAITDATA)) kill_fasync(sock->fasync_list, SIGIO); break; case 2: if (sock->flags & SO_NOSPACE) { kill_fasync(sock->fasync_list, SIGIO); sock->flags &= ~SO_NOSPACE; } break; } return 0; } /* * 只用于UNIX域名(iconn,conn只用于UNIX域),用于处理一个客户端连接请求 */ int sock_awaitconn(struct socket *mysock, struct socket *servsock, int flags) { struct socket *last; /* * We must be listening */ //检查服务器端是否是处于监听状态,即可以进行连接 if (!(servsock->flags & SO_ACCEPTCON)) { return(-EINVAL); } /* * Put ourselves on the server's incomplete connection queue. */ //将本次客户端连接的套接字插入服务器端,socket结构iconn字段指向的链表 //表示客户端正等待连接 mysock->next = NULL; cli(); if (!(last = servsock->iconn)) servsock->iconn = mysock; else { while (last->next) last = last->next; last->next = mysock; } mysock->state = SS_CONNECTING;//正在处理连接 mysock->conn = servsock;//客户端连接的服务器端套接字 sti(); /* * Wake up server, then await connection. server will set state to * SS_CONNECTED if we're connected. */ //唤醒服务器端进程,以处理本地客户端连接 wake_up_interruptible(servsock->wait); sock_wake_async(servsock, 0); //检查连接状态 if (mysock->state != SS_CONNECTED) { if (flags & O_NONBLOCK) return -EINPROGRESS; //等待服务器端处理本次连接 interruptible_sleep_on(mysock->wait); //检查连接状态,如果仍然没有建立连接 if (mysock->state != SS_CONNECTED && mysock->state != SS_DISCONNECTING) { /*原因如下 * if we're not connected we could have been * 1) interrupted, so we need to remove ourselves * from the server list * 2) rejected (mysock->conn == NULL), and have * already been removed from the list */ //如果被其他中断,需要主动将本地socket从对方服务器中iconn中删除 if (mysock->conn == servsock) { cli(); //找到iconn中的本地socket结构 if ((last = servsock->iconn) == mysock) servsock->iconn = mysock->next; else { while (last->next != mysock) last = last->next; last->next = mysock->next; } sti(); } //被服务器拒绝,本地socket已经被删除,无需手动删除 return(mysock->conn ? -EINTR : -EACCES);//两种原因情况的返回 } } return(0); }其余没有贴出的函数,也基本上是这么个流程。