From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@intel.com>
Date: Mon, 7 Mar 2022 11:59:20 -0800
Subject: [PATCH] prezero 20220308
---
include/linux/compaction.h | 2 +
include/linux/gfp.h | 2 +-
kernel/sysctl.c | 9 +
mm/compaction.c | 52 +++++
mm/huge_memory.c | 9 +-
mm/internal.h | 23 +-
mm/page_alloc.c | 416 +++++++++++++++++++++++++++++++++----
7 files changed, 468 insertions(+), 45 deletions(-)
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 34bce35c808d..45407237e6c9 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -82,6 +82,8 @@ static inline unsigned long compact_gap(unsigned int order)
#ifdef CONFIG_COMPACTION
extern unsigned int sysctl_compaction_proactiveness;
+extern int sysctl_zero_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *length, loff_t *ppos);
extern int sysctl_compaction_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos);
extern int compaction_proactiveness_sysctl_handler(struct ctl_table *table,
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 80f63c862be5..f49971123e2f 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -334,7 +334,7 @@ struct vm_area_struct;
#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM)
#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | \
__GFP_SKIP_KASAN_POISON)
-#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
+#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | __GFP_ZERO |\
__GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 730ab56d9e92..95f4d4771470 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -94,6 +94,8 @@
#if defined(CONFIG_SYSCTL)
+extern int sysctl_zero_pages;
+
/* Constants used for minimum and maximum */
#ifdef CONFIG_PERF_EVENTS
@@ -2531,6 +2533,13 @@ static struct ctl_table vm_table[] = {
.extra1 = SYSCTL_ONE,
.extra2 = SYSCTL_FOUR,
},
+ {
+ .procname = "zero_pages",
+ .data = &sysctl_zero_pages,
+ .maxlen = sizeof(sysctl_zero_pages),
+ .mode = 0644,
+ .proc_handler = sysctl_zero_handler,
+ },
#ifdef CONFIG_COMPACTION
{
.procname = "compact_memory",
diff --git a/mm/compaction.c b/mm/compaction.c
index e9a36942c1fa..de6268560058 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -96,6 +96,16 @@ static void split_map_pages(struct list_head *list)
order = page_private(page);
nr_pages = 1 << order;
+ WARN_ON(PageBuddy(page));
+ // These pages recent came out of the buddy but
+ // they should have come via __isolate_free_page()
+ // which does del_page_from_free_list(). That
+ // should have left PageBuddy() clear.
+ // page_order() metadata was left presumably so
+ // that we could do this split and map here. It
+ // is likely no longer needed. Zap it to keep
+ // post_alloc_hook() from complaining.
+ page->private = 0;
post_alloc_hook(page, order, __GFP_MOVABLE);
if (order)
split_page(page, order);
@@ -2658,6 +2668,48 @@ static void proactive_compact_node(pg_data_t *pgdat)
}
}
+void zero_some_pages(struct zone *z, int pages);
+
+static void zero_nodes(int pages)
+{
+ int nid;
+
+ for_each_online_node(nid) {
+ pg_data_t *pgdat = NODE_DATA(nid);
+ int zoneid;
+
+ for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
+ struct zone *zone = &pgdat->node_zones[zoneid];
+ if (!populated_zone(zone))
+ continue;
+
+ zero_some_pages(zone, pages);
+ }
+ }
+}
+
+int sysctl_zero_pages;
+
+int sysctl_zero_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *length, loff_t *ppos)
+{
+ int rc;
+ int old = sysctl_zero_pages;
+
+ rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
+ if (rc)
+ return rc;
+
+
+
+ if (write)
+ zero_nodes(sysctl_zero_pages);
+
+ return 0;
+}
+
+
+
/* Compact all zones within a node */
static void compact_node(int nid)
{
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index fb9163691705..ba8c298784c1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -617,7 +617,12 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
goto release;
}
- clear_huge_page(page, vmf->address, HPAGE_PMD_NR);
+ // move the zeroing to use __GFP_ZERO in
+ // the allocator. Clearing here has the advantage of not
+ // wasting the clear operation if the cgroup charge or
+ // page table allocation fails.
+ //
+ //clear_huge_page(page, vmf->address, HPAGE_PMD_NR);
/*
* The memory barrier inside __SetPageUptodate makes sure that
* clear_huge_page writes become visible before the set_pmd_at()
@@ -774,7 +779,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
return ret;
}
gfp = vma_thp_gfp_mask(vma);
- page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
+ page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
if (unlikely(!page)) {
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
diff --git a/mm/internal.h b/mm/internal.h
index d80300392a19..d6c7c26fe598 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -328,6 +328,22 @@ isolate_migratepages_range(struct compact_control *cc,
int find_suitable_fallback(struct free_area *area, unsigned int order,
int migratetype, bool only_stealable, bool *can_steal);
+/*
+ * Use the bit above the highest-possible buddy page
+ * order (MAX_ORDER-1).
+ */
+#define BUDDY_ZEROED (1UL << (ilog2(MAX_ORDER-1)+1))
+static inline unsigned int __buddy_order(struct page *page, bool unsafe)
+{
+ unsigned int ret;
+ if (unsafe)
+ ret = READ_ONCE(page_private(page));
+ else
+ ret = page_private(page);
+
+ return ret & ~BUDDY_ZEROED;
+}
+
/*
* This function returns the order of a free page in the buddy system. In
* general, page_zone(page)->lock must be held by the caller to prevent the
@@ -339,7 +355,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
static inline unsigned int buddy_order(struct page *page)
{
/* PageBuddy() must be checked by the caller */
- return page_private(page);
+ return __buddy_order(page, false);
}
/*
@@ -353,7 +369,10 @@ static inline unsigned int buddy_order(struct page *page)
* times, potentially observing different values in the tests and the actual
* use of the result.
*/
-#define buddy_order_unsafe(page) READ_ONCE(page_private(page))
+static inline unsigned int buddy_order_unsafe(struct page *page)
+{
+ return __buddy_order(page, true);
+}
/*
* These three helpers classifies VMAs for virtual memory accounting.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0ea48434ac7d..bddadb46bf78 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -15,6 +15,7 @@
* (lots of bits borrowed from Ingo Molnar & Andrew Morton)
*/
+#include <linux/debugfs.h>
#include <linux/stddef.h>
#include <linux/mm.h>
#include <linux/highmem.h>
@@ -758,6 +759,26 @@ void prep_compound_page(struct page *page, unsigned int order)
prep_compound_head(page, order);
}
+enum zero_state {
+ NOT_ZEROED,
+ PRE_ZEROED
+};
+
+static enum zero_state pre_zeroed(struct page *page)
+{
+ if (page_private(page) & BUDDY_ZEROED)
+ return PRE_ZEROED;
+ return NOT_ZEROED;
+}
+
+static void set_buddy_private(struct page *page, unsigned long value)
+{
+ WARN_ON(!PageBuddy(page));
+
+
+ set_page_private(page, value);
+}
+
#ifdef CONFIG_DEBUG_PAGEALLOC
unsigned int _debug_guardpage_minorder;
@@ -800,7 +821,7 @@ static inline bool set_page_guard(struct zone *zone, struct page *page,
__SetPageGuard(page);
INIT_LIST_HEAD(&page->lru);
- set_page_private(page, order);
+ set_buddy_private(page, order);
/* Guard pages are not available for any usage */
__mod_zone_freepage_state(zone, -(1 << order), migratetype);
@@ -815,7 +836,7 @@ static inline void clear_page_guard(struct zone *zone, struct page *page,
__ClearPageGuard(page);
- set_page_private(page, 0);
+ set_buddy_private(page, 0);
if (!is_migrate_isolate(migratetype))
__mod_zone_freepage_state(zone, (1 << order), migratetype);
}
@@ -880,12 +901,80 @@ void init_mem_debugging_and_hardening(void)
#endif
}
-static inline void set_buddy_order(struct page *page, unsigned int order)
+u64 prezero_really_skip = 1;
+u64 prezero_counter = 0;
+u64 prezero_could_have_skipped = 0;
+u64 prezero_check_zero_highpage = 0;
+u64 prezero_buddy_sane_checks = 0;
+u64 prezero_buddy_order = 9;
+static int prezero_debugfs(void)
{
- set_page_private(page, order);
+ debugfs_create_u64("prezero_really_skip", 0644, NULL, &prezero_really_skip);
+ debugfs_create_u64("prezero_counter", 0644, NULL, &prezero_counter);
+ debugfs_create_u64("prezero_check_zero_highpage", 0644, NULL, &prezero_check_zero_highpage);
+ debugfs_create_u64("prezero_could_have_skipped", 0644, NULL, &prezero_could_have_skipped);
+ debugfs_create_u64("prezero_buddy_sane_checks", 0644, NULL, &prezero_buddy_sane_checks);
+ debugfs_create_u64("prezero_buddy_order", 0644, NULL, &prezero_buddy_order);
+
+ return 0;
+}
+late_initcall(prezero_debugfs);
+
+void check_zero_highpage(struct page *page, int order, int numpages, int line, struct page *op)
+{
+ int nr;
+
+ if (!prezero_check_zero_highpage)
+ return;
+
+
+
+ if (!memchr_inv(page_address(page), 0, PAGE_SIZE<<order))
+ return;
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHMEM));
+
+ printk("check_zero_highpage() BAD pfn=0x%lx/%d numpages: %d from line %d\n", page_to_pfn(page), order, numpages, line);
+// trace_printk("check_zero_highpage() BAD pfn=0x%lx order=%d numpages: %d from line %d\n", page_to_pfn(page), order, numpages, line);
+// trace_printk("check_zero_highpage() real pfn=0x%lx\n", page_to_pfn(op));
+// tracing_off();
+ WARN_ON(1);
+ for (nr = 0; nr < 1<<order; nr++) {
+ struct page *tmp = &page[nr];
+ if (PageBuddy(tmp))
+ printk("page[0x%x] had PageBuddy pfn=0x%lx\n", nr, page_to_pfn(tmp));
+ clear_highpage(&page[nr]);
+ }
+}
+
+/*
+ * Only use this for pages which are new to the buddy allocator.
+ * They should not yet have PageBuddy() set.
+ */
+static inline void mark_new_buddy(struct page *page, unsigned int order,
+ enum zero_state zero)
+{
+ unsigned long private = order;
+
+ WARN_ON(PageBuddy(page));
+
+ if (zero == PRE_ZEROED) {
+ private |= BUDDY_ZEROED;
+ check_zero_highpage(page, order, 1<<order, __LINE__, page);
+ }
+
__SetPageBuddy(page);
+ set_buddy_private(page, private);
}
+/*
+static inline void change_buddy_order(struct page *page, unsigned int order)
+{
+ WARN_ON(!PageBuddy(page));
+ __SetPageBuddy(page);
+ set_page_private(page, order);
+}
+*/
+
/*
* This function checks whether a page is free && is the buddy
* we can coalesce a page and its buddy if
@@ -970,12 +1059,16 @@ compaction_capture(struct capture_control *capc, struct page *page,
}
#endif /* CONFIG_COMPACTION */
+#define list_check_buddy_is_sane(p, o) __list_check_buddy_is_sane(p, o, __LINE__)
+void __list_check_buddy_is_sane(struct page *page, int order, int line);
+
/* Used for pages not on another list */
static inline void add_to_free_list(struct page *page, struct zone *zone,
unsigned int order, int migratetype)
{
struct free_area *area = &zone->free_area[order];
+ list_check_buddy_is_sane(page, order);
list_add(&page->lru, &area->free_list[migratetype]);
area->nr_free++;
}
@@ -986,6 +1079,7 @@ static inline void add_to_free_list_tail(struct page *page, struct zone *zone,
{
struct free_area *area = &zone->free_area[order];
+ list_check_buddy_is_sane(page, order);
list_add_tail(&page->lru, &area->free_list[migratetype]);
area->nr_free++;
}
@@ -1000,6 +1094,7 @@ static inline void move_to_free_list(struct page *page, struct zone *zone,
{
struct free_area *area = &zone->free_area[order];
+ list_check_buddy_is_sane(page, order);
list_move_tail(&page->lru, &area->free_list[migratetype]);
}
@@ -1011,11 +1106,117 @@ static inline void del_page_from_free_list(struct page *page, struct zone *zone,
__ClearPageReported(page);
list_del(&page->lru);
+ set_buddy_private(page, 0);
__ClearPageBuddy(page);
- set_page_private(page, 0);
zone->free_area[order].nr_free--;
}
+bool __zero_one_page(struct zone *zone, int order)
+{
+ struct page *page;
+ int numpages = 1<<order;
+ int i;
+ int migratetype = MIGRATE_RECLAIMABLE;
+ struct free_area *area;
+ bool did_zero = false;
+ int got_mt;
+ int order_orig;
+
+ spin_lock(&zone->lock);
+ /* mostly ripped from __rmqueue_smallest() */
+ area = &(zone->free_area[order]);
+
+ /* Look for a page to zero in all migratetypes: */
+ while (migratetype >= 0) {
+ struct list_head *lh = &area->free_list[migratetype];
+ page = get_page_from_free_area(area, migratetype);
+ got_mt = migratetype;
+
+
+
+
+
+
+ /* Was a page located that needs to be zeroed? */
+ if (page && (pre_zeroed(page) == NOT_ZEROED))
+ break;
+
+ /* No page was found to zero. Try another migratetype. */
+ page = NULL;
+ migratetype--;
+ }
+ if (!page) {
+ spin_unlock(&zone->lock);
+ return did_zero;
+ }
+
+ order_orig = buddy_order(page);
+
+ del_page_from_free_list(page, zone, order);
+ spin_unlock(&zone->lock);
+
+ did_zero = true;
+ for (i = 0; i < numpages; i++) {
+ clear_highpage(page + i);
+ }
+
+ spin_lock(&zone->lock);
+ {
+ int pz_before = pre_zeroed(page);
+ int order_before = buddy_order(page);
+ int pz_after;
+ int order_after;
+
+ mark_new_buddy(page, order, PRE_ZEROED);
+ pz_after = pre_zeroed(page);
+ order_after = buddy_order(page);
+
+
+
+ }
+ add_to_free_list_tail(page, zone, order, migratetype);
+ //did_some_prezeroing = 1;
+ check_zero_highpage(page , order, 1<<order, __LINE__, page);
+ spin_unlock(&zone->lock);
+ return did_zero;
+}
+
+
+int zero_pages(struct zone *zone, int order, int do_count)
+{
+ int count = 0;
+
+ while (__zero_one_page(zone, order)) {
+ cond_resched();
+ count++;
+ // arbitrary limit to keep this from
+ // taking insane amounts of time:
+ if (count >= do_count)
+ break;
+ }
+
+
+
+
+
+
+ return count;
+}
+
+void zero_some_pages(struct zone *zone, int pages)
+{
+ int order;
+ long zero_count = 0;
+
+ for (order = MAX_ORDER-1; order >= prezero_buddy_order; order--) {
+ long did = zero_pages(zone, order, pages);
+ zero_count += did << order;
+ if (zero_count > pages)
+ break;
+ }
+
+}
+
/*
* If this is not the largest possible page, check if the buddy
* of the next-highest order is free. If it is, it's possible
@@ -1141,7 +1342,8 @@ static inline void __free_one_page(struct page *page,
}
done_merging:
- set_buddy_order(page, order);
+ list_check_buddy_is_sane(page, order);
+ mark_new_buddy(page, order, NOT_ZEROED);
if (fpi_flags & FPI_TO_TAIL)
to_tail = true;
@@ -1285,8 +1487,20 @@ static void kernel_init_free_pages(struct page *page, int numpages, bool zero_ta
kasan_disable_current();
for (i = 0; i < numpages; i++) {
u8 tag = page_kasan_tag(page + i);
+ bool need_to_zero = true;
+
page_kasan_tag_reset(page + i);
- clear_highpage(page + i);
+ if (pre_zeroed(page) == PRE_ZEROED) {
+ check_zero_highpage(page, ilog2(numpages), numpages, __LINE__, page);
+
+ if (prezero_really_skip)
+ need_to_zero = false;
+ prezero_could_have_skipped++;
+ }
+ if (need_to_zero)
+ clear_highpage(page + i);
+ else
+ prezero_counter++;
page_kasan_tag_set(page + i, tag);
}
kasan_enable_current();
@@ -1329,6 +1543,11 @@ static __always_inline bool free_pages_prepare(struct page *page,
ClearPageHasHWPoisoned(page);
}
for (i = 1; i < (1 << order); i++) {
+ /*
+ * This will leave BUDDY_ZEROED in place
+ * in tail pages. It should get cleared
+ * up before anyone notices in expand().
+ */
if (compound)
bad += free_tail_pages_check(page, page + i);
if (unlikely(check_free_page(page + i))) {
@@ -1393,44 +1612,58 @@ static __always_inline bool free_pages_prepare(struct page *page,
return true;
}
-#ifdef CONFIG_DEBUG_VM
/*
- * With DEBUG_VM enabled, order-0 pages are checked immediately when being freed
- * to pcp lists. With debug_pagealloc also enabled, they are also rechecked when
- * moved from pcp lists to free lists.
+ * Is extra page-free-time debugging needed? Returning true here will wreck
+ * performance, but add extra sanity checks to pages at free time. Only
+ * turn on when debugging.
*/
-static bool free_pcp_prepare(struct page *page, unsigned int order)
+static inline bool extra_debug_free(void)
{
- return free_pages_prepare(page, order, true, FPI_NONE);
+ return IS_ENABLED(CONFIG_DEBUG_VM) || debug_pagealloc_enabled_static();
}
-static bool bulkfree_pcp_prepare(struct page *page)
-{
- if (debug_pagealloc_enabled_static())
- return check_free_page(page);
- else
- return false;
-}
-#else
/*
- * With DEBUG_VM disabled, order-0 pages being freed are checked only when
- * moving from pcp lists to free list in order to reduce overhead. With
- * debug_pagealloc enabled, they are checked also immediately when being freed
- * to the pcp lists.
+ * Called when pages are freed into the allocaor but before being added to the
+ * pcp lists. Only do free page checking when some form of debugging is on to
+ * reduce overhead.
*/
static bool free_pcp_prepare(struct page *page, unsigned int order)
{
- if (debug_pagealloc_enabled_static())
- return free_pages_prepare(page, order, true, FPI_NONE);
- else
- return free_pages_prepare(page, order, false, FPI_NONE);
+
+ page->private = 0;
+
+
+ return free_pages_prepare(page, order, extra_debug_free(), FPI_NONE);
}
-static bool bulkfree_pcp_prepare(struct page *page)
+/*
+ * Called when pages are moved from the pcp lists to the main buddy free lists.
+ *
+ * These pages should have been checked when they were initially freed into the
+ * allocator via free_pcp_prepare(). Check them again if one the extra free
+ * debugging checks are on.
+ */
+static bool bulkfree_pcp_prepare(struct page *page, int order)
{
- return check_free_page(page);
+ unsigned long private = page->private;
+
+
+ /*
+ * Only BUDDY_ZEROED should be set in page->private at
+ * this point. If any other bit is set, we have uno
+ * problemo.
+ */
+ if ((private & ~BUDDY_ZEROED) && printk_ratelimit()) {
+ printk("%s()::%d %lx\n", __func__, __LINE__, page->private);
+ page->private = 0;
+
+ }
+
+ if (extra_debug_free())
+ return check_free_page(page);
+ else
+ return false;
}
-#endif /* CONFIG_DEBUG_VM */
static inline void prefetch_buddy(struct page *page)
{
@@ -1493,7 +1726,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
nr_freed += 1 << order;
count -= 1 << order;
- if (bulkfree_pcp_prepare(page))
+ if (bulkfree_pcp_prepare(page, order))
continue;
/* Encode order with the migratetype */
@@ -2294,7 +2527,7 @@ void __init init_cma_reserved_pageblock(struct page *page)
* -- nyc
*/
static inline void expand(struct zone *zone, struct page *page,
- int low, int high, int migratetype)
+ int low, int high, int migratetype, enum zero_state page_prezeroed)
{
unsigned long size = 1 << high;
@@ -2312,8 +2545,8 @@ static inline void expand(struct zone *zone, struct page *page,
if (set_page_guard(zone, &page[size], high, migratetype))
continue;
+ mark_new_buddy(&page[size], high, page_prezeroed);
add_to_free_list(&page[size], zone, high, migratetype);
- set_buddy_order(&page[size], high);
}
}
@@ -2392,10 +2625,19 @@ static bool check_new_pages(struct page *page, unsigned int order)
return false;
}
-inline void post_alloc_hook(struct page *page, unsigned int order,
+noinline void post_alloc_hook(struct page *page, unsigned int order,
gfp_t gfp_flags)
{
- set_page_private(page, 0);
+ if ((page->private & ~BUDDY_ZEROED) && printk_ratelimit()) {
+ printk("%s()::%d BAD page private: priv=%lx\n", __func__, __LINE__, page->private);
+ page->private = 0;
+ /*
+ * PageBuddy() is clear. This trips the
+ * PageBuddy check in set_buddy_private().
+ */
+ //set_buddy_private(page, 0);
+ dump_stack();
+ }
set_page_refcounted(page);
arch_alloc_page(page, order);
@@ -2428,7 +2670,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
page_table_check_alloc(page, order);
}
-static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
+static noinline void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
unsigned int alloc_flags)
{
post_alloc_hook(page, order, gfp_flags);
@@ -2462,13 +2704,30 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
/* Find a page of the appropriate size in the preferred list */
for (current_order = order; current_order < MAX_ORDER; ++current_order) {
+ enum zero_state page_pz;
area = &(zone->free_area[current_order]);
page = get_page_from_free_area(area, migratetype);
if (!page)
continue;
+ /* stash this away before del_page_from_free_list() zaps it: */
+ page_pz = pre_zeroed(page);
+
del_page_from_free_list(page, zone, current_order);
- expand(zone, page, order, current_order, migratetype);
+ expand(zone, page, order, current_order, migratetype, page_pz);
set_pcppage_migratetype(page, migratetype);
+ /*
+ * This is a bit of a kludge. The state was zapped above
+ * and is restored here. We should probably
+ * think about if del_page_from_free_list()
+ * leaves BUDDY_ZEROED in place and what the
+ * implications are.
+ *
+ * Without this, pages leaving the buddy always
+ * have page->private=0.
+ */
+ if (page_pz == PRE_ZEROED) {
+ page->private = BUDDY_ZEROED;
+ }
return page;
}
@@ -9490,7 +9749,9 @@ static void break_down_buddy_pages(struct zone *zone, struct page *page,
if (current_buddy != target) {
add_to_free_list(current_buddy, zone, high, migratetype);
- set_buddy_order(current_buddy, high);
+ // This is very rare. Do not bother
+ // trying to preserve zero state:
+ mark_new_buddy(current_buddy, high, NOT_ZEROED);
page = next_page;
}
}
@@ -9573,3 +9834,78 @@ bool has_managed_dma(void)
return false;
}
#endif /* CONFIG_ZONE_DMA */
+
+void __list_check_buddy_low_orders(struct page *page, int order, int line)
+{
+ int nr_pages = 1 << order;
+ int i;
+
+ for (i = 1; i < nr_pages; i++) {
+ struct page *child = &page[i];
+ unsigned long child_pfn = page_to_pfn(child);
+ unsigned long pfn = page_to_pfn(page);
+ if (!PageBuddy(child))
+ continue;
+
+ printk("bad low order: %d pfns: 0x%lx 0x%lx buddy: %d/%d line=%d bo=%d\n",
+ order, pfn, child_pfn,
+ PageBuddy(page),
+ PageBuddy(child),
+ line, buddy_order(child));
+ }
+}
+
+void __list_check_buddy_high_orders(struct page *page, int order, int line)
+{
+ unsigned long pfn = page_to_pfn(page);
+
+ // Highest-order buddy pages (MAX_ORDER-1) are not
+ // merged together and can be on lists together
+ if (order >= MAX_ORDER-1)
+ return;
+
+ while (order < MAX_ORDER-1) {
+ unsigned long buddy_pfn = __find_buddy_pfn(pfn, order);
+ struct page *buddy = pfn_to_page(buddy_pfn);
+ bool bad;
+
+ // not in the buddy, don't care
+ if (!PageBuddy(buddy))
+ goto next;
+
+ // starts after me, can't possible overlap, don't care
+ if (buddy_pfn >= pfn + (1<<order))
+ goto next;
+
+ // Starts before me. Does it cover me?
+ if (buddy_pfn + (1<<buddy_order(buddy)) <= pfn)
+ goto next;
+
+ bad = 1;
+ if (bad) {
+ printk("bad high order: %d pfns: 0x%lx 0x%lx buddy: %d/%d pib=%d line=%d bo=%d bad=%d\n",
+ order, pfn, buddy_pfn, PageBuddy(page),
+ PageBuddy(buddy),
+ page_is_buddy(page, buddy, order),
+ line,
+ buddy_order(buddy),
+ bad);
+ //WARN_ON(1);
+ }
+
+ // combine the PFNs to "move up" one order:
+ pfn = buddy_pfn & pfn;
+ page = pfn_to_page(pfn);
+ next:
+ order++;
+ }
+}
+
+
+void __list_check_buddy_is_sane(struct page *page, int order, int line)
+{
+ if (!prezero_buddy_sane_checks)
+ return;
+ __list_check_buddy_high_orders(page, order, line);
+ __list_check_buddy_low_orders(page, order, line);
+}
--
https://clearlinux.org