page frame reclaiming 两个链表的总结

Agenda
1. Introduction
2. lru_rotate_pvecs链表的作用。
3. lru_deactive_pvecs链表的作用。

Text
1. Introduction
在swap.c文件中,定义了几个链表:
 39 static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
 40 static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
 41 static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
lru_add_pvecs这个链表的作用很清除。加入到zone中对应链表的时候,需要它来进行中转过渡。其余的两个链表是在"902aaed0d983dfd459fcb"这个patch中加入的。

2. lru_rotate_pvecs这个链表的作用:
对于正在writeback的页,如果被回收,那么需要把该页移到inactive链表的尾部。等待再次被回收。使用该链表的函数是:rotate_reclaimable_page(). 调用该函数的上层函数是: end_page_writeback().
与end_page_writeback()配对的是set_page_writeback(). 当一个页要写出到外部存储设备的时候,先设置set_page_writeback, 然后将该页的写指令提交到通用块设备队列中。当写完后(unlock_page()成功返回),调用end_page_writeback(). 
end_page_writeback所做的工作是:
 598 /**
 599  * end_page_writeback - end writeback against a page
 600  * @page: the page
 601  */
 602 void end_page_writeback(struct page *page)
 603 {
 604         if (TestClearPageReclaim(page))
 605                 rotate_reclaimable_page(page);
 606
 607         if (!test_clear_page_writeback(page))
 608                 BUG();
 609
 610         smp_mb__after_clear_bit();
 611         wake_up_page(page, PG_writeback);
 612 }
 613 EXPORT_SYMBOL(end_page_writeback);
上述函数的功能是:
a.  如果该页面之前被回收进程试图回收过,但因为PG_dirty,所以没有成功,因此,仍然待在inactive_list链表中,同时设置该页PG_reclaimable以告诉writeback进程,该页需要尽快被回收。当writeback进程writeback该页成功后,清除页面回收标志,并将该页放在inactive队列的末尾(Line 604~605),以尽快被回收(因为回收进程总是从inactive_list链表的尾部开始回收的)。
b. 清除该页PG_writeback标志(Line 607)
c. 唤醒之前因为该页在writeback的过程中而睡眠的那些进程(Line 611),比如回收页面的 shrink_page_list()->wait_on_page_writeback().
end_page_writeback->rotate_reclaimable_page
258 /*
259  * Writeback is about to end against a page which has been marked for immediate
260  * reclaim.  If it still appears to be reclaimable, move it to the tail of the
261  * inactive list.
262  */
263 void rotate_reclaimable_page(struct page *page)
264 {
265         if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
266             !PageUnevictable(page) && PageLRU(page)) {
267                 struct pagevec *pvec;
268                 unsigned long flags;
269
270                 page_cache_get(page);
271                 local_irq_save(flags);
272                 pvec = &__get_cpu_var(lru_rotate_pvecs);
273                 if (!pagevec_add(pvec, page))
274                         pagevec_move_tail(pvec);
275                 local_irq_restore(flags);
276         }
277 }

3. lru_deactivate_pvecs的作用是中转那些强制deactive 页的过渡链表。它的唯一上层调用函数是:invalidate_mapping_pages().
凡是经过deactive_page的页面,其active程度要降一级。
317 /**
318  * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
319  * @mapping: the address_space which holds the pages to invalidate
320  * @start: the offset 'from' which to invalidate
321  * @end: the offset 'to' which to invalidate (inclusive)
322  *
323  * This function only removes the unlocked pages, if you want to
324  * remove all the pages of one inode, you must call truncate_inode_pages.
325  *
326  * invalidate_mapping_pages() will not block on IO activity. It will not
327  * invalidate pages which are dirty, locked, under writeback or mapped into
328  * pagetables.
329  */
330 unsigned long invalidate_mapping_pages(struct address_space *mapping,
331                 pgoff_t start, pgoff_t end)
332 {
333         struct pagevec pvec;
334         pgoff_t index = start;
335         unsigned long ret;
336         unsigned long count = 0;
337         int i;
338
339         /*
340          * Note: this function may get called on a shmem/tmpfs mapping:
341          * pagevec_lookup() might then return 0 prematurely (because it
342          * got a gangful of swap entries); but it's hardly worth worrying
343          * about - it can rarely have anything to free from such a mapping
344          * (most pages are dirty), and already skips over any difficulties.
345          */
346
347         pagevec_init(&pvec, 0);
348         while (index <= end && pagevec_lookup(&pvec, mapping, index,
349                         min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
350                 mem_cgroup_uncharge_start();
351                 for (i = 0; i < pagevec_count(&pvec); i++) {
352                         struct page *page = pvec.pages[i];
353
354                         /* We rely upon deletion not changing page->index */
355                         index = page->index;
356                         if (index > end)
357                                 break;
358
359                         if (!trylock_page(page))
360                                 continue;
361                         WARN_ON(page->index != index);
362                         ret = invalidate_inode_page(page);
363                         unlock_page(page);
364                         /*
365                          * Invalidation is a hint that the page is no longer
366                          * of interest and try to speed up its reclaim.
367                          */
368                         if (!ret)
369                                 deactivate_page(page);
370                         count += ret;
371                 }
372                 pagevec_release(&pvec);
373                 mem_cgroup_uncharge_end();
374                 cond_resched();
375                 index++;
376         }
377         return count;
378 }
invalidate_mapping_pages-> deactivate_page:
528 /**
529  * deactivate_page - forcefully deactivate a page
530  * @page: page to deactivate
531  *
532  * This function hints the VM that @page is a good reclaim candidate,
533  * for example if its invalidation fails due to the page being dirty
534  * or under writeback.
535  */
536 void deactivate_page(struct page *page)
537 {
538         /*
539          * In a workload with many unevictable page such as mprotect, unevictable
540          * page deactivation for accelerating reclaim is pointless.
541          */
542         if (PageUnevictable(page))
543                 return;
544
545         if (likely(get_page_unless_zero(page))) {
546                 struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
547
548                 if (!pagevec_add(pvec, page))
549                         pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
550                 put_cpu_var(lru_deactivate_pvecs);
551         }
552 }

Remarks
1. https://lkml.org/lkml/2007/9/11/130

评论

此博客中的热门博文

Linux/ARM Page Table Entry 属性设置分析

提交了30次才AC ---【附】POJ 2488解题报告

笔记