file = fget(epfd); /\* Get the "struct file \*" for the target file */ tfile = fget(fd); …………………………….. ep = file->private_data;
struct epitem { /\* RB tree node used to link this structure to the eventpoll RB tree \*/ struct rb_node rbn; ………………………………………… /\* List containing poll wait queues \*/ struct list_head pwqlist;
/\* The "container" of this item \*/ struct eventpoll *ep; ……………………………………… };
struct eventpoll { /\* Protect the this structure access \*/ spinlock_t lock; /* * This mutex is used to ensure that files are not removed * while epoll is using them. This is held during the event * collection loop, the file cleanup path, the epoll file exit * code and the ctl operations. */ struct mutex mtx;
/\* Wait queue used by sys_epoll_wait() \*/ wait_queue_head_t wq;
/\* Wait queue used by file->poll() \*/ wait_queue_head_t poll_wait;
/\* List of ready file descriptors \*/ struct list_head rdllist;
/\* RB tree root used to store monitored fd structs \*/ struct rb_root rbr;
/* * This is a single linked list that chains all the "struct epitem" that * happened while transfering ready events to userspace w/out * holding ->lock. */ struct epitem *ovflist; …………………………………………. };
static int ep_poll_callback(wait_queue_t \*wait, unsigned mode, int sync, void \*key) { struct epitem *epi = ep_item_from_wait(wait); struct eventpoll *ep = epi->ep; ……………………………………….. /\* If this file is already in the ready list we exit soon \*/ if (!ep_is_linked(&epi->rdllink)) list_add_tail(&epi->rdllink, &ep->rdllist);
if (list_empty(&ep->rdllist)) { /* * We don’t have any available event to return to the caller. * We need to sleep here, and we will be wake up by * ep_poll_callback() when events will become available. */ init_waitqueue_entry(&wait, current); __add_wait_queue_exclusive(&ep->wq, &wait);
for (;;) { /* * We don’t want to sleep if the ep_poll_callback() sends us * a wakeup in between. That’s why we set the task state * to TASK_INTERRUPTIBLE before doing the checks. */ set_current_state(TASK_INTERRUPTIBLE); if (!list_empty(&ep->rdllist) || timed_out) break; if (signal_pending(current)) { res = -EINTR; break; }
spin_unlock_irqrestore(&ep->lock, flags); if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) timed_out = 1;
static int ep_send_events_proc(struct eventpoll \*ep, struct list_head \*head, void *priv) { ……………………………………………. if (__put_user(revents, &uevent->events) || __put_user(epi->event.data, &uevent->data)) { list_add(&epi->rdllink, head); return eventcnt ? eventcnt : -EFAULT; } eventcnt++; uevent++; if (epi->event.events & EPOLLONESHOT) epi->event.events &= EP_PRIVATE_BITS; else if (!(epi->event.events & EPOLLET)) { /* * If this file has been added with Level * Trigger mode, we need to insert back inside * the ready list, so that the next call to * epoll_wait() will check again the events * availability. At this point, noone can insert * into ep->rdllist besides us. The epoll_ctl() * callers are locked out by * ep_scan_ready_list() holding "mtx" and the * poll callback will queue them in ep->ovflist. */ list_add_tail(&epi->rdllink, &ep->rdllist); } ………………………… }
static int ep_poll_callback(wait_queue_t \*wait, unsigned mode, int sync, void \*key) { ……………………………….. /* * If we are trasfering events to userspace, we can hold no locks * (because we’re accessing user memory, and because of linux f_op->poll() * semantics). All the events that happens during that period of time are * chained in ep->ovflist and requeued later on. */ if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) { if (epi->next == EP_UNACTIVE_PTR) { epi->next = ep->ovflist; ep->ovflist = epi; } goto out_unlock; } ……………………. }
static int ep_scan_ready_list(struct eventpoll *ep, int (\*sproc)(struct eventpoll \*, struct list_head \*, void \*), void *priv) { ……………………………………. /* * During the time we spent inside the "sproc" callback, some * other events might have been queued by the poll callback. * We re-insert them inside the main ready-list here. */ for (nepi = ep->ovflist; (epi = nepi) != NULL; nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { /* * We need to check if the item is already in the list. * During the "sproc" callback execution time, items are * queued into ->ovflist but the "txlist" might already * contain them, and the list_splice() below takes care of them. */ if (!ep_is_linked(&epi->rdllink)) list_add_tail(&epi->rdllink, &ep->rdllist); } …………………… }