首页
登录 | 注册

Linux的poll与epoll实现(2)---epoll_create

    昨晚分析了poll,通过代码的阅读可以发现,poll操作有很多可以优化的地方。epoll是eventpoll的简称,他的效率是非常高的,我们今天来看看他的实现。他的实现在FS/Eventpoll.c,代码有1500多行,呵呵,怕了吧。
    大家都知道,epoll有三个系统调用,C库封装成以下三个:
  1. int epoll_create(int size);
  2. int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event);
  3. int epoll_wait(int epfd, struct epoll_event *events,int maxevents, int timeout);
epoll的源码这么多,我们就干脆跟着他们三个走着瞧。今天先搞定第一个---epoll_create
 
第一个是
  1. /*
  2.  * It opens an eventpoll file descriptor by suggesting a storage of "size"
  3.  * file descriptors. The size parameter is just an hint about how to size
  4.  * data structures. It won't prevent the user to store more than "size"
  5.  * file descriptors inside the epoll interface. It is the kernel part of
  6.  * the userspace epoll_create(2).
  7.  */
  8. asmlinkage long sys_epoll_create(int size)
  9. {
  10.     int error, fd;
  11.     struct inode *inode;
  12.     struct file *file;

  13.     DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
  14.          current, size));

  15.     /* Sanity check on the size parameter */
  16.     error = -EINVAL;
  17.     if (size <= 0)
  18.         goto eexit_1;

  19.     /*
  20.      * Creates all the items needed to setup an eventpoll file. That is,
  21.      * a file structure, and inode and a free file descriptor.
  22.      */
  23.     error = ep_getfd(&fd, &inode, &file);                  //(1)
  24.     if (error)
  25.         goto eexit_1;

  26.     /* Setup the file internal data structure ( "struct eventpoll" ) */
  27.     error = ep_file_init(file);                            //(2)
  28.     if (error)
  29.         goto eexit_2;


  30.     DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
  31.          current, size, fd));

  32.     return fd;

  33. eexit_2:
  34.     sys_close(fd);
  35. eexit_1:
  36.     DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
  37.          current, size, error));
  38.     return error;
  39. }

(1)这里用到了一个ep_getfd函数,从注释我们知道,这个函数建立eventpoll相关的file,当然,一个file要包括文件描述符、inode、还有文件对象,这也是我们传的三个参数。废话不说,看源码:

  1. /*
  2.  * Creates the file descriptor to be used by the epoll interface.
  3.  */
  4. static int ep_getfd(int *efd, struct inode **einode, struct file **efile)
  5. {
  6.     struct qstr this;
  7.     char name[32];
  8.     struct dentry *dentry;
  9.     struct inode *inode;
  10.     struct file *file;
  11.     int error, fd;

  12.     /* Get an ready to use file */
  13.     error = -ENFILE;
  14.     file = get_empty_filp();
  15.     if (!file)
  16.         goto eexit_1;

  17.     /* Allocates an inode from the eventpoll file system */
  18.     inode = ep_eventpoll_inode();
  19.     error = PTR_ERR(inode);
  20.     if (IS_ERR(inode))
  21.         goto eexit_2;

  22.     /* Allocates a free descriptor to plug the file onto */
  23.     error = get_unused_fd();
  24.     if (error < 0)
  25.         goto eexit_3;
  26.     fd = error;

  27.     /*
  28.      * Link the inode to a directory entry by creating a unique name
  29.      * using the inode number.
  30.      */
  31.     error = -ENOMEM;
  32.     sprintf(name, "[%lu]", inode->i_ino);
  33.     this.name = name;
  34.     this.len = strlen(name);
  35.     this.hash = inode->i_ino;
  36.     dentry = d_alloc(eventpoll_mnt->mnt_sb->s_root, &this);
  37.     if (!dentry)
  38.         goto eexit_4;
  39.     dentry->d_op = &eventpollfs_dentry_operations;
  40.     d_add(dentry, inode);
  41.     file->f_vfsmnt = mntget(eventpoll_mnt);
  42.     file->f_dentry = dentry;
  43.     file->f_mapping = inode->i_mapping;

  44.     file->f_pos = 0;
  45.     file->f_flags = O_RDONLY;
  46.     file->f_op = &eventpoll_fops;
  47.     file->f_mode = FMODE_READ;
  48.     file->f_version = 0;
  49.     file->private_data = NULL;

  50.     /* Install the new setup file into the allocated fd. */
  51.     fd_install(fd, file);

  52.     *efd = fd;
  53.     *einode = inode;
  54.     *efile = file;
  55.     return 0;

  56. eexit_4:
  57.     put_unused_fd(fd);
  58. eexit_3:
  59.     iput(inode);
  60. eexit_2:
  61.     put_filp(file);
  62. eexit_1:
  63.     return error;
  64. }

这个函数的注释都比较全,这里简单提一下,况且因为涉及到的函数太多,要深究起来涉及的知识太多,也不可能逐一去列代码。不过这个函数个人觉得比较经典,这函数就是创建一个文件的流程。

首先,我们得拿到一个file结构体,通过内核分配给我们;然后我们要拿到inode,调用这个ep_eventpoll_inode()就可以了;接着是get_unused_fd()拿到文件描述符;接着d_alloc()函数为我们拿到一个dentry;d_add(dentry, inode)函数把dentry建立hash里面并且绑定inode;后面是继续填充文件对象file;fd_install(fd, file)向进程注册文件,并通过这样的方式把文件描述符和文件对象关联起来。

(2)在跟踪ep_file_init函数之前,我们先来看一下eventpoll结构体:

  1. /*
  2.  * This structure is stored inside the "private_data" member of the file
  3.  * structure and rapresent the main data sructure for the eventpoll
  4.  * interface.
  5.  */
  6. struct eventpoll {
  7.     /* Protect the this structure access */
  8.     rwlock_t lock;

  9.     /*
  10.      * This semaphore is used to ensure that files are not removed
  11.      * while epoll is using them. This is read-held during the event
  12.      * collection loop and it is write-held during the file cleanup
  13.      * path, the epoll file exit code and the ctl operations.
  14.      */
  15.     struct rw_semaphore sem;

  16.     /* Wait queue used by sys_epoll_wait() */
  17.     wait_queue_head_t wq;

  18.     /* Wait queue used by file->poll() */
  19.     wait_queue_head_t poll_wait;

  20.     /* List of ready file descriptors */
  21.     struct list_head rdllist;

  22.     /* RB-Tree root used to store monitored fd structs */
  23.     struct rb_root rbr;
  24. };

注释也是相当清楚。这个eventpoll可以看得出来,是epoll的核心,它将会存储你想要监听的文件描述符,这也是为什么epoll高效之所在。

好,我们回到sys_epoll_create函数,开始跟踪ep_file_init函数:

  1. static int ep_file_init(struct file *file)
  2. {
  3.     struct eventpoll *ep;

  4.     if (!(ep = kmalloc(sizeof(struct eventpoll), GFP_KERNEL)))
  5.         return -ENOMEM;

  6.     memset(ep, 0, sizeof(*ep));
  7.     rwlock_init(&ep->lock);
  8.     init_rwsem(&ep->sem);
  9.     init_waitqueue_head(&ep->wq);
  10.     init_waitqueue_head(&ep->poll_wait);
  11.     INIT_LIST_HEAD(&ep->rdllist);
  12.     ep->rbr = RB_ROOT;

  13.     file->private_data = ep;

  14.     DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_file_init() ep=%p\n",
  15.          current, ep));
  16.     return 0;
  17. }

其实也就是eventpoll结构体的初始化。

sys_epoll_create函数大概就这样了,明天接着看sys_epoll_ctl。


相关文章

  • 亲爱的各位博主,博客评选活动又开始啦,感谢大家对活动的支持,希望大家的技术水平越来越好,博文也更加出色,获奖的博主还有机会晋级"推荐博客"."专家博客"!     博客评选将邀请技术专家作为点评嘉宾, ...
  • 1. 简述 OSI 七层协议. 2. 什么是C/S和B/S架构? 3. 简述 三次握手.四次挥手的流程. 4. 什么是arp协议? 5. TCP和UDP的区别? 6. 什么是局域网和广域网? 7. 为何基于tcp协议的通信比基于udp协议的 ...
  • 彻底学会使用epoll(六)--关于ET的若干问题总结 --lvyilong316 6.1 ET模式为什么要设置在非阻塞模式下工作     因为ET模式下的读写需要一直读或写直到出错(对于读,当读到的实际字节数小于请求字节数时就可以停止), ...
  • mtk7621驱动 无线驱动在完成驱动注册的同时,需要进行 cfg80211接口注册(提供命令支持). 1. 驱动 mtk wifi驱动基于pci进行扩展,第一个文件:/os/linux/pci_main_dev.c 文件用于创建和注册基于 ...
  • https://www.linuxidc.com/Linux/2011-09/42174.htm //my_joystick.c #include #include #include #include #include #include # ...
  • 原文地址:Linux内核对per-cpu变量的实现 作者:MagicBoy2010 在Linux中,per-cpu变量用在多处理器系统中,用来为系统中的每个cpu都生成一个变量的副本,以避开多处理器互斥中的加锁问题,另一个是cpu本地的变量 ...

2020 unjeep.com webmaster#unjeep.com
12 q. 0.012 s.
京ICP备10005923号