summaryrefslogtreecommitdiffstats
path: root/debian/patches-rt/0002-Split-IRQ-off-and-zone-lock-while-freeing-pages-from.patch
blob: 85a7315dcdc2a83a2c064905b54db797c584ded5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 28 May 2018 15:24:21 +0200
Subject: [PATCH 2/4] Split IRQ-off and zone->lock while freeing pages from PCP
 list #2
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.2/older/patches-5.2.17-rt9.tar.xz

Split the IRQ-off section while accessing the PCP list from zone->lock
while freeing pages.
Introcude  isolate_pcp_pages() which separates the pages from the PCP
list onto a temporary list and then free the temporary list via
free_pcppages_bulk().

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 mm/page_alloc.c |   60 ++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 50 insertions(+), 10 deletions(-)

--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1192,8 +1192,8 @@ static inline void prefetch_buddy(struct
  * And clear the zone's pages_scanned counter, to hold off the "all pages are
  * pinned" detection logic.
  */
-static void free_pcppages_bulk(struct zone *zone, int count,
-			       struct list_head *head)
+static void free_pcppages_bulk(struct zone *zone, struct list_head *head,
+			       bool zone_retry)
 {
 	bool isolated_pageblocks;
 	struct page *page, *tmp;
@@ -1208,12 +1208,27 @@ static void free_pcppages_bulk(struct zo
 	 */
 	list_for_each_entry_safe(page, tmp, head, lru) {
 		int mt = get_pcppage_migratetype(page);
+
+		if (page_zone(page) != zone) {
+			/*
+			 * free_unref_page_list() sorts pages by zone. If we end
+			 * up with pages from a different NUMA nodes belonging
+			 * to the same ZONE index then we need to redo with the
+			 * correct ZONE pointer. Skip the page for now, redo it
+			 * on the next iteration.
+			 */
+			WARN_ON_ONCE(zone_retry == false);
+			if (zone_retry)
+				continue;
+		}
+
 		/* MIGRATE_ISOLATE page should not go to pcplists */
 		VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
 		/* Pageblock could have been isolated meanwhile */
 		if (unlikely(isolated_pageblocks))
 			mt = get_pageblock_migratetype(page);
 
+		list_del(&page->lru);
 		__free_one_page(page, page_to_pfn(page), zone, 0, mt);
 		trace_mm_page_pcpu_drain(page, 0, mt);
 	}
@@ -2731,7 +2746,7 @@ void drain_zone_pages(struct zone *zone,
 	local_irq_restore(flags);
 
 	if (to_drain > 0)
-		free_pcppages_bulk(zone, to_drain, &dst);
+		free_pcppages_bulk(zone, &dst, false);
 }
 #endif
 
@@ -2761,7 +2776,7 @@ static void drain_pages_zone(unsigned in
 	local_irq_restore(flags);
 
 	if (count)
-		free_pcppages_bulk(zone, count, &dst);
+		free_pcppages_bulk(zone, &dst, false);
 }
 
 /*
@@ -2960,7 +2975,8 @@ static bool free_unref_page_prepare(stru
 	return true;
 }
 
-static void free_unref_page_commit(struct page *page, unsigned long pfn)
+static void free_unref_page_commit(struct page *page, unsigned long pfn,
+				   struct list_head *dst)
 {
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
@@ -2989,10 +3005,8 @@ static void free_unref_page_commit(struc
 	pcp->count++;
 	if (pcp->count >= pcp->high) {
 		unsigned long batch = READ_ONCE(pcp->batch);
-		LIST_HEAD(dst);
 
-		isolate_pcp_pages(batch, pcp, &dst);
-		free_pcppages_bulk(zone, batch, &dst);
+		isolate_pcp_pages(batch, pcp, dst);
 	}
 }
 
@@ -3003,13 +3017,17 @@ void free_unref_page(struct page *page)
 {
 	unsigned long flags;
 	unsigned long pfn = page_to_pfn(page);
+	struct zone *zone = page_zone(page);
+	LIST_HEAD(dst);
 
 	if (!free_unref_page_prepare(page, pfn))
 		return;
 
 	local_irq_save(flags);
-	free_unref_page_commit(page, pfn);
+	free_unref_page_commit(page, pfn, &dst);
 	local_irq_restore(flags);
+	if (!list_empty(&dst))
+		free_pcppages_bulk(zone, &dst, false);
 }
 
 /*
@@ -3020,6 +3038,11 @@ void free_unref_page_list(struct list_he
 	struct page *page, *next;
 	unsigned long flags, pfn;
 	int batch_count = 0;
+	struct list_head dsts[__MAX_NR_ZONES];
+	int i;
+
+	for (i = 0; i < __MAX_NR_ZONES; i++)
+		INIT_LIST_HEAD(&dsts[i]);
 
 	/* Prepare pages for freeing */
 	list_for_each_entry_safe(page, next, list, lru) {
@@ -3032,10 +3055,12 @@ void free_unref_page_list(struct list_he
 	local_irq_save(flags);
 	list_for_each_entry_safe(page, next, list, lru) {
 		unsigned long pfn = page_private(page);
+		enum zone_type type;
 
 		set_page_private(page, 0);
 		trace_mm_page_free_batched(page);
-		free_unref_page_commit(page, pfn);
+		type = page_zonenum(page);
+		free_unref_page_commit(page, pfn, &dsts[type]);
 
 		/*
 		 * Guard against excessive IRQ disabled times when we get
@@ -3048,6 +3073,21 @@ void free_unref_page_list(struct list_he
 		}
 	}
 	local_irq_restore(flags);
+
+	for (i = 0; i < __MAX_NR_ZONES; ) {
+		struct page *page;
+		struct zone *zone;
+
+		if (list_empty(&dsts[i])) {
+			i++;
+			continue;
+		}
+
+		page = list_first_entry(&dsts[i], struct page, lru);
+		zone = page_zone(page);
+
+		free_pcppages_bulk(zone, &dsts[i], true);
+	}
 }
 
 /*