From: Andy Whitcroft <apw@shadowen.org>

This patch eliminates the false hole which can form between
ZONE_NORMAL and ZONE_HIGHMEM when 4g/4g split is enabled.  Basically,
the patch changes the allocation of the numa remaps regions (the
source of the holes) such that they officially fall within VMALLOC
space, where they belong.

-apw



diff -upN reference/arch/i386/mm/discontig.c current/arch/i386/mm/discontig.c
--- reference/arch/i386/mm/discontig.c	2004-03-30 21:42:13.000000000 -0800
+++ current/arch/i386/mm/discontig.c	2004-03-30 21:42:13.000000000 -0800
@@ -233,6 +233,13 @@ unsigned long __init setup_memory(void)
 	unsigned long bootmap_size, system_start_pfn, system_max_low_pfn;
 	unsigned long reserve_pages;
 
+	/*
+	 * When mapping a NUMA machine we allocate the node_mem_map arrays
+	 * from node local memory.  They are then mapped directly into KVA
+	 * between zone normal and vmalloc space.  Calculate the size of
+	 * this space and use it to adjust the boundry between ZONE_NORMAL
+	 * and ZONE_HIGHMEM.
+	 */
 	get_memcfg_numa();
 	reserve_pages = calculate_numa_remap_pages();
 
@@ -240,7 +247,10 @@ unsigned long __init setup_memory(void)
 	system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
 
 	find_max_pfn();
-	system_max_low_pfn = max_low_pfn = find_max_low_pfn();
+	system_max_low_pfn = max_low_pfn = find_max_low_pfn() - reserve_pages;
+	printk("reserve_pages = %ld find_max_low_pfn() ~ %ld\n",
+			reserve_pages, max_low_pfn + reserve_pages);
+	printk("max_pfn = %ld\n", max_pfn);
 #ifdef CONFIG_HIGHMEM
 	highstart_pfn = highend_pfn = max_pfn;
 	if (max_pfn > system_max_low_pfn)
@@ -248,7 +258,6 @@ unsigned long __init setup_memory(void)
 	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
 	       pages_to_mb(highend_pfn - highstart_pfn));
 #endif
-	system_max_low_pfn = max_low_pfn = max_low_pfn - reserve_pages;
 	printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
 			pages_to_mb(system_max_low_pfn));
 	printk("min_low_pfn = %ld, max_low_pfn = %ld, highstart_pfn = %ld\n", 
@@ -258,15 +267,16 @@ unsigned long __init setup_memory(void)
 			(ulong) pfn_to_kaddr(max_low_pfn));
 	for (nid = 0; nid < numnodes; nid++) {
 		node_remap_start_vaddr[nid] = pfn_to_kaddr(
-			highstart_pfn - node_remap_offset[nid]);
+			(highstart_pfn + reserve_pages) - node_remap_offset[nid]);
 		allocate_pgdat(nid);
 		printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
 			(ulong) node_remap_start_vaddr[nid],
-			(ulong) pfn_to_kaddr(highstart_pfn
+			(ulong) pfn_to_kaddr(highstart_pfn + reserve_pages
 			    - node_remap_offset[nid] + node_remap_size[nid]));
 	}
 	printk("High memory starts at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(highstart_pfn));
+	vmalloc_earlyreserve = reserve_pages * PAGE_SIZE;
 	for (nid = 0; nid < numnodes; nid++)
 		find_max_pfn_node(nid);
 
diff -upN reference/include/asm-i386/pgtable.h current/include/asm-i386/pgtable.h
--- reference/include/asm-i386/pgtable.h	2004-03-30 16:24:52.000000000 -0800
+++ current/include/asm-i386/pgtable.h	2004-03-30 21:42:13.000000000 -0800
@@ -87,8 +87,8 @@ void paging_init(void);
  * area for the same reason. ;)
  */
 #define VMALLOC_OFFSET	(8*1024*1024)
-#define VMALLOC_START	(((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \
-						~(VMALLOC_OFFSET-1))
+#define VMALLOC_START	(((unsigned long) high_memory + vmalloc_earlyreserve + \
+			2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1))
 #ifdef CONFIG_HIGHMEM
 # define VMALLOC_END	(PKMAP_BASE-2*PAGE_SIZE)
 #else
diff -upN reference/include/linux/mm.h current/include/linux/mm.h
--- reference/include/linux/mm.h	2004-03-30 16:24:47.000000000 -0800
+++ current/include/linux/mm.h	2004-03-30 21:42:13.000000000 -0800
@@ -19,6 +19,7 @@ extern unsigned long max_mapnr;
 
 extern unsigned long num_physpages;
 extern void * high_memory;
+extern unsigned long vmalloc_earlyreserve;
 extern int page_cluster;
 
 #include <asm/page.h>
diff -upN reference/mm/memory.c current/mm/memory.c
--- reference/mm/memory.c	2004-03-30 16:24:47.000000000 -0800
+++ current/mm/memory.c	2004-03-30 21:42:13.000000000 -0800
@@ -66,12 +66,21 @@ EXPORT_SYMBOL(mem_map);
 #endif
 
 unsigned long num_physpages;
+/*
+ * A number of key systems in x86 including ioremap() rely on the assumption
+ * that high_memory defines the upper bound on direct map memory, then end
+ * of ZONE_NORMAL.  Under CONFIG_DISCONTIG this means that max_low_pfn and
+ * highstart_pfn must be the same; there must be no gap between ZONE_NORMAL
+ * and ZONE_HIGHMEM.
+ */
 void * high_memory;
 struct page *highmem_start_page;
+unsigned long vmalloc_earlyreserve;
 
 EXPORT_SYMBOL(num_physpages);
 EXPORT_SYMBOL(highmem_start_page);
 EXPORT_SYMBOL(high_memory);
+EXPORT_SYMBOL(vmalloc_earlyreserve);
 
 /*
  * We special-case the C-O-W ZERO_PAGE, because it's such
