Green shading in the line number column means the source is part of the translation unit, red means it is conditionally excluded. Highlighted line numbers link to the translation unit page. Highlighted macros link to the macro page.
1: #ifndef _LINUX_MMZONE_H 2: #define _LINUX_MMZONE_H 3: 4: #ifndef __ASSEMBLY__ 5: #ifndef __GENERATING_BOUNDS_H 6: 7: #include <linux/spinlock.h> 8: #include <linux/list.h> 9: #include <linux/wait.h> 10: #include <linux/bitops.h> 11: #include <linux/cache.h> 12: #include <linux/threads.h> 13: #include <linux/numa.h> 14: #include <linux/init.h> 15: #include <linux/seqlock.h> 16: #include <linux/nodemask.h> 17: #include <linux/pageblock-flags.h> 18: #include <linux/page-flags-layout.h> 19: #include <linux/atomic.h> 20: #include <asm/page.h> 21: 22: /* Free memory management - zoned buddy allocator. */ 23: #ifndef CONFIG_FORCE_MAX_ZONEORDER 24: #define MAX_ORDER 11 25: #else 26: #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER 27: #endif 28: #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1)) 29: 30: /* 31: * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed 32: * costly to service. That is between allocation orders which should 33: * coalesce naturally under reasonable reclaim pressure and those which 34: * will not. 35: */ 36: #define PAGE_ALLOC_COSTLY_ORDER 3 37: 38: enum { 39: MIGRATE_UNMOVABLE, 40: MIGRATE_RECLAIMABLE, 41: MIGRATE_MOVABLE, 42: MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ 43: MIGRATE_RESERVE = MIGRATE_PCPTYPES, 44: #ifdef CONFIG_CMA 45: /* 46: * MIGRATE_CMA migration type is designed to mimic the way 47: * ZONE_MOVABLE works. Only movable pages can be allocated 48: * from MIGRATE_CMA pageblocks and page allocator never 49: * implicitly change migration type of MIGRATE_CMA pageblock. 50: * 51: * The way to use it is to change migratetype of a range of 52: * pageblocks to MIGRATE_CMA which can be done by 53: * __free_pageblock_cma() function. What is important though 54: * is that a range of pageblocks must be aligned to 55: * MAX_ORDER_NR_PAGES should biggest page be bigger then 56: * a single pageblock. 57: */ 58: MIGRATE_CMA, 59: #endif 60: #ifdef CONFIG_MEMORY_ISOLATION 61: MIGRATE_ISOLATE, /* can't allocate from here */ 62: #endif 63: MIGRATE_TYPES 64: }; 65: 66: #ifdef CONFIG_CMA 67: # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) 68: #else 69: # define is_migrate_cma(migratetype) false 70: #endif 71: 72: #define for_each_migratetype_order(order, type) \ 73: for (order = 0; order < MAX_ORDER; order++) \ 74: for (type = 0; type < MIGRATE_TYPES; type++) 75: 76: extern int page_group_by_mobility_disabled; 77: 78: static inline int get_pageblock_migratetype(struct page *page) 79: { 80: return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end); 81: } 82: 83: struct free_area { 84: struct list_head free_list[MIGRATE_TYPES]; 85: unsigned long nr_free; 86: }; 87: 88: struct pglist_data; 89: 90: /* 91: * zone->lock and zone->lru_lock are two of the hottest locks in the kernel. 92: * So add a wild amount of padding here to ensure that they fall into separate 93: * cachelines. There are very few zone structures in the machine, so space 94: * consumption is not a concern here. 95: */ 96: #if defined(CONFIG_SMP) 97: struct zone_padding { 98: char x[0]; 99: } ____cacheline_internodealigned_in_smp; 100: #define ZONE_PADDING(name) struct zone_padding name; 101: #else 102: #define ZONE_PADDING(name) 103: #endif 104: 105: enum zone_stat_item { 106: /* First 128 byte cacheline (assuming 64 bit words) */ 107: NR_FREE_PAGES, 108: NR_ALLOC_BATCH, 109: NR_LRU_BASE, 110: NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ 111: NR_ACTIVE_ANON, /* " " " " " */ 112: NR_INACTIVE_FILE, /* " " " " " */ 113: NR_ACTIVE_FILE, /* " " " " " */ 114: NR_UNEVICTABLE, /* " " " " " */ 115: NR_MLOCK, /* mlock()ed pages found and moved off LRU */ 116: NR_ANON_PAGES, /* Mapped anonymous pages */ 117: NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. 118: only modified from process context */ 119: NR_FILE_PAGES, 120: NR_FILE_DIRTY, 121: NR_WRITEBACK, 122: NR_SLAB_RECLAIMABLE, 123: NR_SLAB_UNRECLAIMABLE, 124: NR_PAGETABLE, /* used for pagetables */ 125: NR_KERNEL_STACK, 126: /* Second 128 byte cacheline */ 127: NR_UNSTABLE_NFS, /* NFS unstable pages */ 128: NR_BOUNCE, 129: NR_VMSCAN_WRITE, 130: NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ 131: NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ 132: NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ 133: NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ 134: NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ 135: NR_DIRTIED, /* page dirtyings since bootup */ 136: NR_WRITTEN, /* page writings since bootup */ 137: #ifdef CONFIG_NUMA 138: NUMA_HIT, /* allocated in intended node */ 139: NUMA_MISS, /* allocated in non intended node */ 140: NUMA_FOREIGN, /* was intended here, hit elsewhere */ 141: NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ 142: NUMA_LOCAL, /* allocation from local node */ 143: NUMA_OTHER, /* allocation from other node */ 144: #endif 145: NR_ANON_TRANSPARENT_HUGEPAGES, 146: NR_FREE_CMA_PAGES, 147: NR_VM_ZONE_STAT_ITEMS }; 148: 149: /* 150: * We do arithmetic on the LRU lists in various places in the code, 151: * so it is important to keep the active lists LRU_ACTIVE higher in 152: * the array than the corresponding inactive lists, and to keep 153: * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists. 154: * 155: * This has to be kept in sync with the statistics in zone_stat_item 156: * above and the descriptions in vmstat_text in mm/vmstat.c 157: */ 158: #define LRU_BASE 0 159: #define LRU_ACTIVE 1 160: #define LRU_FILE 2 161: 162: enum lru_list { 163: LRU_INACTIVE_ANON = LRU_BASE, 164: LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE, 165: LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE, 166: LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE, 167: LRU_UNEVICTABLE, 168: NR_LRU_LISTS 169: }; 170: 171: #define for_each_lru(lru) for (lru = 0; lru < NR_LRU_LISTS; lru++) 172: 173: #define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++) 174: 175: static inline int is_file_lru(enum lru_list lru) 176: { 177: return (lru == LRU_INACTIVE_FILE || lru == LRU_ACTIVE_FILE); 178: } 179: 180: static inline int is_active_lru(enum lru_list lru) 181: { 182: return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); 183: } 184: 185: static inline int is_unevictable_lru(enum lru_list lru) 186: { 187: return (lru == LRU_UNEVICTABLE); 188: } 189: 190: struct zone_reclaim_stat { 191: /* 192: * The pageout code in vmscan.c keeps track of how many of the 193: * mem/swap backed and file backed pages are referenced. 194: * The higher the rotated/scanned ratio, the more valuable 195: * that cache is. 196: * 197: * The anon LRU stats live in [0], file LRU stats in [1] 198: */ 199: unsigned long recent_rotated[2]; 200: unsigned long recent_scanned[2]; 201: }; 202: 203: struct lruvec { 204: struct list_head lists[NR_LRU_LISTS]; 205: struct zone_reclaim_stat reclaim_stat; 206: #ifdef CONFIG_MEMCG 207: struct zone *zone; 208: #endif 209: }; 210: 211: /* Mask used at gathering information at once (see memcontrol.c) */ 212: #define LRU_ALL_FILE (BIT(LRU_INACTIVE_FILE) | BIT(LRU_ACTIVE_FILE)) 213: #define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON)) 214: #define LRU_ALL ((1 << NR_LRU_LISTS) - 1) 215: 216: /* Isolate clean file */ 217: #define ISOLATE_CLEAN ((__force isolate_mode_t)0x1) 218: /* Isolate unmapped file */ 219: #define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2) 220: /* Isolate for asynchronous migration */ 221: #define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4) 222: /* Isolate unevictable pages */ 223: #define ISOLATE_UNEVICTABLE ((__force isolate_mode_t)0x8) 224: 225: /* LRU Isolation modes. */ 226: typedef unsigned __bitwise__ isolate_mode_t; 227: 228: enum zone_watermarks { 229: WMARK_MIN, 230: WMARK_LOW, 231: WMARK_HIGH, 232: NR_WMARK 233: }; 234: 235: #define min_wmark_pages(z) (z->watermark[WMARK_MIN]) 236: #define low_wmark_pages(z) (z->watermark[WMARK_LOW]) 237: #define high_wmark_pages(z) (z->watermark[WMARK_HIGH]) 238: 239: struct per_cpu_pages { 240: int count; /* number of pages in the list */ 241: int high; /* high watermark, emptying needed */ 242: int batch; /* chunk size for buddy add/remove */ 243: 244: /* Lists of pages, one per migrate type stored on the pcp-lists */ 245: struct list_head lists[MIGRATE_PCPTYPES]; 246: }; 247: 248: struct per_cpu_pageset { 249: struct per_cpu_pages pcp; 250: #ifdef CONFIG_NUMA 251: s8 expire; 252: #endif 253: #ifdef CONFIG_SMP 254: s8 stat_threshold; 255: s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; 256: #endif 257: }; 258: 259: #endif /* !__GENERATING_BOUNDS.H */ 260: 261: enum zone_type { 262: #ifdef CONFIG_ZONE_DMA 263: /* 264: * ZONE_DMA is used when there are devices that are not able 265: * to do DMA to all of addressable memory (ZONE_NORMAL). Then we 266: * carve out the portion of memory that is needed for these devices. 267: * The range is arch specific. 268: * 269: * Some examples 270: * 271: * Architecture Limit 272: * --------------------------- 273: * parisc, ia64, sparc <4G 274: * s390 <2G 275: * arm Various 276: * alpha Unlimited or 0-16MB. 277: * 278: * i386, x86_64 and multiple other arches 279: * <16M. 280: */ 281: ZONE_DMA, 282: #endif 283: #ifdef CONFIG_ZONE_DMA32 284: /* 285: * x86_64 needs two ZONE_DMAs because it supports devices that are 286: * only able to do DMA to the lower 16M but also 32 bit devices that 287: * can only do DMA areas below 4G. 288: */ 289: ZONE_DMA32, 290: #endif 291: /* 292: * Normal addressable memory is in ZONE_NORMAL. DMA operations can be 293: * performed on pages in ZONE_NORMAL if the DMA devices support 294: * transfers to all addressable memory. 295: */ 296: ZONE_NORMAL, 297: #ifdef CONFIG_HIGHMEM 298: /* 299: * A memory area that is only addressable by the kernel through 300: * mapping portions into its own address space. This is for example 301: * used by i386 to allow the kernel to address the memory beyond 302: * 900MB. The kernel will set up special mappings (page 303: * table entries on i386) for each page that the kernel needs to 304: * access. 305: */ 306: ZONE_HIGHMEM, 307: #endif 308: ZONE_MOVABLE, 309: __MAX_NR_ZONES 310: }; 311: 312: #ifndef __GENERATING_BOUNDS_H 313: 314: struct zone { 315: /* Fields commonly accessed by the page allocator */ 316: 317: /* zone watermarks, access with *_wmark_pages(zone) macros */ 318: unsigned long watermark[NR_WMARK]; 319: 320: /* 321: * When free pages are below this point, additional steps are taken 322: * when reading the number of free pages to avoid per-cpu counter 323: * drift allowing watermarks to be breached 324: */ 325: unsigned long percpu_drift_mark; 326: 327: /* 328: * We don't know if the memory that we're going to allocate will be freeable 329: * or/and it will be released eventually, so to avoid totally wasting several 330: * GB of ram we must reserve some of the lower zone memory (otherwise we risk 331: * to run OOM on the lower zones despite there's tons of freeable ram 332: * on the higher zones). This array is recalculated at runtime if the 333: * sysctl_lowmem_reserve_ratio sysctl changes. 334: */ 335: unsigned long lowmem_reserve[MAX_NR_ZONES]; 336: 337: /* 338: * This is a per-zone reserve of pages that should not be 339: * considered dirtyable memory. 340: */ 341: unsigned long dirty_balance_reserve; 342: 343: #ifdef CONFIG_NUMA 344: int node; 345: /* 346: * zone reclaim becomes active if more unmapped pages exist. 347: */ 348: unsigned long min_unmapped_pages; 349: unsigned long min_slab_pages; 350: #endif 351: struct per_cpu_pageset __percpu *pageset; 352: /* 353: * free areas of different sizes 354: */ 355: spinlock_t lock; 356: #if defined CONFIG_COMPACTION || defined CONFIG_CMA 357: /* Set to true when the PG_migrate_skip bits should be cleared */ 358: bool compact_blockskip_flush; 359: 360: /* pfns where compaction scanners should start */ 361: unsigned long compact_cached_free_pfn; 362: unsigned long compact_cached_migrate_pfn; 363: #endif 364: #ifdef CONFIG_MEMORY_HOTPLUG 365: /* see spanned/present_pages for more description */ 366: seqlock_t span_seqlock; 367: #endif 368: struct free_area free_area[MAX_ORDER]; 369: 370: #ifndef CONFIG_SPARSEMEM 371: /* 372: * Flags for a pageblock_nr_pages block. See pageblock-flags.h. 373: * In SPARSEMEM, this map is stored in struct mem_section 374: */ 375: unsigned long *pageblock_flags; 376: #endif /* CONFIG_SPARSEMEM */ 377: 378: #ifdef CONFIG_COMPACTION 379: /* 380: * On compaction failure, 1<<compact_defer_shift compactions 381: * are skipped before trying again. The number attempted since 382: * last failure is tracked with compact_considered. 383: */ 384: unsigned int compact_considered; 385: unsigned int compact_defer_shift; 386: int compact_order_failed; 387: #endif 388: 389: ZONE_PADDING(_pad1_) 390: 391: /* Fields commonly accessed by the page reclaim scanner */ 392: spinlock_t lru_lock; 393: struct lruvec lruvec; 394: 395: unsigned long pages_scanned; /* since last reclaim */ 396: unsigned long flags; /* zone flags, see below */ 397: 398: /* Zone statistics */ 399: atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; 400: 401: /* 402: * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on 403: * this zone's LRU. Maintained by the pageout code. 404: */ 405: unsigned int inactive_ratio; 406: 407: 408: ZONE_PADDING(_pad2_) 409: /* Rarely used or read-mostly fields */ 410: 411: /* 412: * wait_table -- the array holding the hash table 413: * wait_table_hash_nr_entries -- the size of the hash table array 414: * wait_table_bits -- wait_table_size == (1 << wait_table_bits) 415: * 416: * The purpose of all these is to keep track of the people 417: * waiting for a page to become available and make them 418: * runnable again when possible. The trouble is that this 419: * consumes a lot of space, especially when so few things 420: * wait on pages at a given time. So instead of using 421: * per-page waitqueues, we use a waitqueue hash table. 422: * 423: * The bucket discipline is to sleep on the same queue when 424: * colliding and wake all in that wait queue when removing. 425: * When something wakes, it must check to be sure its page is 426: * truly available, a la thundering herd. The cost of a 427: * collision is great, but given the expected load of the 428: * table, they should be so rare as to be outweighed by the 429: * benefits from the saved space. 430: * 431: * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the 432: * primary users of these fields, and in mm/page_alloc.c 433: * free_area_init_core() performs the initialization of them. 434: */ 435: wait_queue_head_t * wait_table; 436: unsigned long wait_table_hash_nr_entries; 437: unsigned long wait_table_bits; 438: 439: /* 440: * Discontig memory support fields. 441: */ 442: struct pglist_data *zone_pgdat; 443: /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ 444: unsigned long zone_start_pfn; 445: 446: /* 447: * spanned_pages is the total pages spanned by the zone, including 448: * holes, which is calculated as: 449: * spanned_pages = zone_end_pfn - zone_start_pfn; 450: * 451: * present_pages is physical pages existing within the zone, which 452: * is calculated as: 453: * present_pages = spanned_pages - absent_pages(pages in holes); 454: * 455: * managed_pages is present pages managed by the buddy system, which 456: * is calculated as (reserved_pages includes pages allocated by the 457: * bootmem allocator): 458: * managed_pages = present_pages - reserved_pages; 459: * 460: * So present_pages may be used by memory hotplug or memory power 461: * management logic to figure out unmanaged pages by checking 462: * (present_pages - managed_pages). And managed_pages should be used 463: * by page allocator and vm scanner to calculate all kinds of watermarks 464: * and thresholds. 465: * 466: * Locking rules: 467: * 468: * zone_start_pfn and spanned_pages are protected by span_seqlock. 469: * It is a seqlock because it has to be read outside of zone->lock, 470: * and it is done in the main allocator path. But, it is written 471: * quite infrequently. 472: * 473: * The span_seq lock is declared along with zone->lock because it is 474: * frequently read in proximity to zone->lock. It's good to 475: * give them a chance of being in the same cacheline. 476: * 477: * Write access to present_pages at runtime should be protected by 478: * lock_memory_hotplug()/unlock_memory_hotplug(). Any reader who can't 479: * tolerant drift of present_pages should hold memory hotplug lock to 480: * get a stable value. 481: * 482: * Read access to managed_pages should be safe because it's unsigned 483: * long. Write access to zone->managed_pages and totalram_pages are 484: * protected by managed_page_count_lock at runtime. Idealy only 485: * adjust_managed_page_count() should be used instead of directly 486: * touching zone->managed_pages and totalram_pages. 487: */ 488: unsigned long spanned_pages; 489: unsigned long present_pages; 490: unsigned long managed_pages; 491: 492: /* 493: * rarely used fields: 494: */ 495: const char *name; 496: } ____cacheline_internodealigned_in_smp; 497: 498: typedef enum { 499: ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */ 500: ZONE_OOM_LOCKED, /* zone is in OOM killer zonelist */ 501: ZONE_CONGESTED, /* zone has many dirty pages backed by 502: * a congested BDI 503: */ 504: ZONE_TAIL_LRU_DIRTY, /* reclaim scanning has recently found 505: * many dirty file pages at the tail 506: * of the LRU. 507: */ 508: ZONE_WRITEBACK, /* reclaim scanning has recently found 509: * many pages under writeback 510: */ 511: } zone_flags_t; 512: 513: static inline void zone_set_flag(struct zone *zone, zone_flags_t flag) 514: { 515: set_bit(flag, &zone->flags); 516: } 517: 518: static inline int zone_test_and_set_flag(struct zone *zone, zone_flags_t flag) 519: { 520: return test_and_set_bit(flag, &zone->flags); 521: } 522: 523: static inline void zone_clear_flag(struct zone *zone, zone_flags_t flag) 524: { 525: clear_bit(flag, &zone->flags); 526: } 527: 528: static inline int zone_is_reclaim_congested(const struct zone *zone) 529: { 530: return test_bit(ZONE_CONGESTED, &zone->flags); 531: } 532: 533: static inline int zone_is_reclaim_dirty(const struct zone *zone) 534: { 535: return test_bit(ZONE_TAIL_LRU_DIRTY, &zone->flags); 536: } 537: 538: static inline int zone_is_reclaim_writeback(const struct zone *zone) 539: { 540: return test_bit(ZONE_WRITEBACK, &zone->flags); 541: } 542: 543: static inline int zone_is_reclaim_locked(const struct zone *zone) 544: { 545: return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags); 546: } 547: 548: static inline int zone_is_oom_locked(const struct zone *zone) 549: { 550: return test_bit(ZONE_OOM_LOCKED, &zone->flags); 551: } 552: 553: static inline unsigned long zone_end_pfn(const struct zone *zone) 554: { 555: return zone->zone_start_pfn + zone->spanned_pages; 556: } 557: 558: static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn) 559: { 560: return zone->zone_start_pfn <= pfn && pfn < zone_end_pfn(zone); 561: } 562: 563: static inline bool zone_is_initialized(struct zone *zone) 564: { 565: return !!zone->wait_table; 566: } 567: 568: static inline bool zone_is_empty(struct zone *zone) 569: { 570: return zone->spanned_pages == 0; 571: } 572: 573: /* 574: * The "priority" of VM scanning is how much of the queues we will scan in one 575: * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the 576: * queues ("queue_length >> 12") during an aging round. 577: */ 578: #define DEF_PRIORITY 12 579: 580: /* Maximum number of zones on a zonelist */ 581: #define MAX_ZONES_PER_ZONELIST (MAX_NUMNODES * MAX_NR_ZONES) 582: 583: #ifdef CONFIG_NUMA 584: 585: /* 586: * The NUMA zonelists are doubled because we need zonelists that restrict the 587: * allocations to a single node for GFP_THISNODE. 588: * 589: * [0] : Zonelist with fallback 590: * [1] : No fallback (GFP_THISNODE) 591: */ 592: #define MAX_ZONELISTS 2 593: 594: 595: /* 596: * We cache key information from each zonelist for smaller cache 597: * footprint when scanning for free pages in get_page_from_freelist(). 598: * 599: * 1) The BITMAP fullzones tracks which zones in a zonelist have come 600: * up short of free memory since the last time (last_fullzone_zap) 601: * we zero'd fullzones. 602: * 2) The array z_to_n[] maps each zone in the zonelist to its node 603: * id, so that we can efficiently evaluate whether that node is 604: * set in the current tasks mems_allowed. 605: * 606: * Both fullzones and z_to_n[] are one-to-one with the zonelist, 607: * indexed by a zones offset in the zonelist zones[] array. 608: * 609: * The get_page_from_freelist() routine does two scans. During the 610: * first scan, we skip zones whose corresponding bit in 'fullzones' 611: * is set or whose corresponding node in current->mems_allowed (which 612: * comes from cpusets) is not set. During the second scan, we bypass 613: * this zonelist_cache, to ensure we look methodically at each zone. 614: * 615: * Once per second, we zero out (zap) fullzones, forcing us to 616: * reconsider nodes that might have regained more free memory. 617: * The field last_full_zap is the time we last zapped fullzones. 618: * 619: * This mechanism reduces the amount of time we waste repeatedly 620: * reexaming zones for free memory when they just came up low on 621: * memory momentarilly ago. 622: * 623: * The zonelist_cache struct members logically belong in struct 624: * zonelist. However, the mempolicy zonelists constructed for 625: * MPOL_BIND are intentionally variable length (and usually much 626: * shorter). A general purpose mechanism for handling structs with 627: * multiple variable length members is more mechanism than we want 628: * here. We resort to some special case hackery instead. 629: * 630: * The MPOL_BIND zonelists don't need this zonelist_cache (in good 631: * part because they are shorter), so we put the fixed length stuff 632: * at the front of the zonelist struct, ending in a variable length 633: * zones[], as is needed by MPOL_BIND. 634: * 635: * Then we put the optional zonelist cache on the end of the zonelist 636: * struct. This optional stuff is found by a 'zlcache_ptr' pointer in 637: * the fixed length portion at the front of the struct. This pointer 638: * both enables us to find the zonelist cache, and in the case of 639: * MPOL_BIND zonelists, (which will just set the zlcache_ptr to NULL) 640: * to know that the zonelist cache is not there. 641: * 642: * The end result is that struct zonelists come in two flavors: 643: * 1) The full, fixed length version, shown below, and 644: * 2) The custom zonelists for MPOL_BIND. 645: * The custom MPOL_BIND zonelists have a NULL zlcache_ptr and no zlcache. 646: * 647: * Even though there may be multiple CPU cores on a node modifying 648: * fullzones or last_full_zap in the same zonelist_cache at the same 649: * time, we don't lock it. This is just hint data - if it is wrong now 650: * and then, the allocator will still function, perhaps a bit slower. 651: */ 652: 653: 654: struct zonelist_cache { 655: unsigned short z_to_n[MAX_ZONES_PER_ZONELIST]; /* zone->nid */ 656: DECLARE_BITMAP(fullzones, MAX_ZONES_PER_ZONELIST); /* zone full? */ 657: unsigned long last_full_zap; /* when last zap'd (jiffies) */ 658: }; 659: #else 660: #define MAX_ZONELISTS 1 661: struct zonelist_cache; 662: #endif 663: 664: /* 665: * This struct contains information about a zone in a zonelist. It is stored 666: * here to avoid dereferences into large structures and lookups of tables 667: */ 668: struct zoneref { 669: struct zone *zone; /* Pointer to actual zone */ 670: int zone_idx; /* zone_idx(zoneref->zone) */ 671: }; 672: 673: /* 674: * One allocation request operates on a zonelist. A zonelist 675: * is a list of zones, the first one is the 'goal' of the 676: * allocation, the other zones are fallback zones, in decreasing 677: * priority. 678: * 679: * If zlcache_ptr is not NULL, then it is just the address of zlcache, 680: * as explained above. If zlcache_ptr is NULL, there is no zlcache. 681: * * 682: * To speed the reading of the zonelist, the zonerefs contain the zone index 683: * of the entry being read. Helper functions to access information given 684: * a struct zoneref are 685: * 686: * zonelist_zone() - Return the struct zone * for an entry in _zonerefs 687: * zonelist_zone_idx() - Return the index of the zone for an entry 688: * zonelist_node_idx() - Return the index of the node for an entry 689: */ 690: struct zonelist { 691: struct zonelist_cache *zlcache_ptr; // NULL or &zlcache 692: struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1]; 693: #ifdef CONFIG_NUMA 694: struct zonelist_cache zlcache; // optional ... 695: #endif 696: }; 697: 698: #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 699: struct node_active_region { 700: unsigned long start_pfn; 701: unsigned long end_pfn; 702: int nid; 703: }; 704: #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ 705: 706: #ifndef CONFIG_DISCONTIGMEM 707: /* The array of struct pages - for discontigmem use pgdat->lmem_map */ 708: extern struct page *mem_map; 709: #endif 710: 711: /* 712: * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM 713: * (mostly NUMA machines?) to denote a higher-level memory zone than the 714: * zone denotes. 715: * 716: * On NUMA machines, each NUMA node would have a pg_data_t to describe 717: * it's memory layout. 718: * 719: * Memory statistics and page replacement data structures are maintained on a 720: * per-zone basis. 721: */ 722: struct bootmem_data; 723: typedef struct pglist_data { 724: struct zone node_zones[MAX_NR_ZONES]; 725: struct zonelist node_zonelists[MAX_ZONELISTS]; 726: int nr_zones; 727: #ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */ 728: struct page *node_mem_map; 729: #ifdef CONFIG_MEMCG 730: struct page_cgroup *node_page_cgroup; 731: #endif 732: #endif 733: #ifndef CONFIG_NO_BOOTMEM 734: struct bootmem_data *bdata; 735: #endif 736: #ifdef CONFIG_MEMORY_HOTPLUG 737: /* 738: * Must be held any time you expect node_start_pfn, node_present_pages 739: * or node_spanned_pages stay constant. Holding this will also 740: * guarantee that any pfn_valid() stays that way. 741: * 742: * pgdat_resize_lock() and pgdat_resize_unlock() are provided to 743: * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG. 744: * 745: * Nests above zone->lock and zone->span_seqlock 746: */ 747: spinlock_t node_size_lock; 748: #endif 749: unsigned long node_start_pfn; 750: unsigned long node_present_pages; /* total number of physical pages */ 751: unsigned long node_spanned_pages; /* total size of physical page 752: range, including holes */ 753: int node_id; 754: nodemask_t reclaim_nodes; /* Nodes allowed to reclaim from */ 755: wait_queue_head_t kswapd_wait; 756: wait_queue_head_t pfmemalloc_wait; 757: struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */ 758: int kswapd_max_order; 759: enum zone_type classzone_idx; 760: #ifdef CONFIG_NUMA_BALANCING 761: /* 762: * Lock serializing the per destination node AutoNUMA memory 763: * migration rate limiting data. 764: */ 765: spinlock_t numabalancing_migrate_lock; 766: 767: /* Rate limiting time interval */ 768: unsigned long numabalancing_migrate_next_window; 769: 770: /* Number of pages migrated during the rate limiting time interval */ 771: unsigned long numabalancing_migrate_nr_pages; 772: #endif 773: } pg_data_t; 774: 775: #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) 776: #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) 777: #ifdef CONFIG_FLAT_NODE_MEM_MAP 778: #define pgdat_page_nr(pgdat, pagenr) ((pgdat)->node_mem_map + (pagenr)) 779: #else 780: #define pgdat_page_nr(pgdat, pagenr) pfn_to_page((pgdat)->node_start_pfn + (pagenr)) 781: #endif 782: #define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) 783: 784: #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) 785: #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid)) 786: 787: static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) 788: { 789: return pgdat->node_start_pfn + pgdat->node_spanned_pages; 790: } 791: 792: static inline bool pgdat_is_empty(pg_data_t *pgdat) 793: { 794: return !pgdat->node_start_pfn && !pgdat->node_spanned_pages; 795: } 796: 797: #include <linux/memory_hotplug.h> 798: 799: extern struct mutex zonelists_mutex; 800: void build_all_zonelists(pg_data_t *pgdat, struct zone *zone); 801: void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); 802: bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, 803: int classzone_idx, int alloc_flags); 804: bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, 805: int classzone_idx, int alloc_flags); 806: enum memmap_context { 807: MEMMAP_EARLY, 808: MEMMAP_HOTPLUG, 809: }; 810: extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn, 811: unsigned long size, 812: enum memmap_context context); 813: 814: extern void lruvec_init(struct lruvec *lruvec); 815: 816: static inline struct zone *lruvec_zone(struct lruvec *lruvec) 817: { 818: #ifdef CONFIG_MEMCG 819: return lruvec->zone; 820: #else 821: return container_of(lruvec, struct zone, lruvec); 822: #endif 823: } 824: 825: #ifdef CONFIG_HAVE_MEMORY_PRESENT 826: void memory_present(int nid, unsigned long start, unsigned long end); 827: #else 828: static inline void memory_present(int nid, unsigned long start, unsigned long end) {} 829: #endif 830: 831: #ifdef CONFIG_HAVE_MEMORYLESS_NODES 832: int local_memory_node(int node_id); 833: #else 834: static inline int local_memory_node(int node_id) { return node_id; }; 835: #endif 836: 837: #ifdef CONFIG_NEED_NODE_MEMMAP_SIZE 838: unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); 839: #endif 840: 841: /* 842: * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc. 843: */ 844: #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) 845: 846: static inline int populated_zone(struct zone *zone) 847: { 848: return (!!zone->present_pages); 849: } 850: 851: extern int movable_zone; 852: 853: static inline int zone_movable_is_highmem(void) 854: { 855: #if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) 856: return movable_zone == ZONE_HIGHMEM; 857: #else 858: return 0; 859: #endif 860: } 861: 862: static inline int is_highmem_idx(enum zone_type idx) 863: { 864: #ifdef CONFIG_HIGHMEM 865: return (idx == ZONE_HIGHMEM || 866: (idx == ZONE_MOVABLE && zone_movable_is_highmem())); 867: #else 868: return 0; 869: #endif 870: } 871: 872: /** 873: * is_highmem - helper function to quickly check if a struct zone is a 874: * highmem zone or not. This is an attempt to keep references 875: * to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum. 876: * @zone - pointer to struct zone variable 877: */ 878: static inline int is_highmem(struct zone *zone) 879: { 880: #ifdef CONFIG_HIGHMEM 881: int zone_off = (char *)zone - (char *)zone->zone_pgdat->node_zones; 882: return zone_off == ZONE_HIGHMEM * sizeof(*zone) || 883: (zone_off == ZONE_MOVABLE * sizeof(*zone) && 884: zone_movable_is_highmem()); 885: #else 886: return 0; 887: #endif 888: } 889: 890: /* These two functions are used to setup the per zone pages min values */ 891: struct ctl_table; 892: int min_free_kbytes_sysctl_handler(struct ctl_table *, int, 893: void __user *, size_t *, loff_t *); 894: extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; 895: int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, 896: void __user *, size_t *, loff_t *); 897: int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, 898: void __user *, size_t *, loff_t *); 899: int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, 900: void __user *, size_t *, loff_t *); 901: int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, 902: void __user *, size_t *, loff_t *); 903: 904: extern int numa_zonelist_order_handler(struct ctl_table *, int, 905: void __user *, size_t *, loff_t *); 906: extern char numa_zonelist_order[]; 907: #define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */ 908: 909: #ifndef CONFIG_NEED_MULTIPLE_NODES 910: 911: extern struct pglist_data contig_page_data; 912: #define NODE_DATA(nid) (&contig_page_data) 913: #define NODE_MEM_MAP(nid) mem_map 914: 915: #else /* CONFIG_NEED_MULTIPLE_NODES */ 916: 917: #include <asm/mmzone.h> 918: 919: #endif /* !CONFIG_NEED_MULTIPLE_NODES */ 920: 921: extern struct pglist_data *first_online_pgdat(void); 922: extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat); 923: extern struct zone *next_zone(struct zone *zone); 924: 925: /** 926: * for_each_online_pgdat - helper macro to iterate over all online nodes 927: * @pgdat - pointer to a pg_data_t variable 928: */ 929: #define for_each_online_pgdat(pgdat) \ 930: for (pgdat = first_online_pgdat(); \ 931: pgdat; \ 932: pgdat = next_online_pgdat(pgdat)) 933: /** 934: * for_each_zone - helper macro to iterate over all memory zones 935: * @zone - pointer to struct zone variable 936: * 937: * The user only needs to declare the zone variable, for_each_zone 938: * fills it in. 939: */ 940: #define for_each_zone(zone) \ 941: for (zone = (first_online_pgdat())->node_zones; \ 942: zone; \ 943: zone = next_zone(zone)) 944: 945: #define for_each_populated_zone(zone) \ 946: for (zone = (first_online_pgdat())->node_zones; \ 947: zone; \ 948: zone = next_zone(zone)) \ 949: if (!populated_zone(zone)) \ 950: ; /* do nothing */ \ 951: else 952: 953: static inline struct zone *zonelist_zone(struct zoneref *zoneref) 954: { 955: return zoneref->zone; 956: } 957: 958: static inline int zonelist_zone_idx(struct zoneref *zoneref) 959: { 960: return zoneref->zone_idx; 961: } 962: 963: static inline int zonelist_node_idx(struct zoneref *zoneref) 964: { 965: #ifdef CONFIG_NUMA 966: /* zone_to_nid not available in this context */ 967: return zoneref->zone->node; 968: #else 969: return 0; 970: #endif /* CONFIG_NUMA */ 971: } 972: 973: /** 974: * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point 975: * @z - The cursor used as a starting point for the search 976: * @highest_zoneidx - The zone index of the highest zone to return 977: * @nodes - An optional nodemask to filter the zonelist with 978: * @zone - The first suitable zone found is returned via this parameter 979: * 980: * This function returns the next zone at or below a given zone index that is 981: * within the allowed nodemask using a cursor as the starting point for the 982: * search. The zoneref returned is a cursor that represents the current zone 983: * being examined. It should be advanced by one before calling 984: * next_zones_zonelist again. 985: */ 986: struct zoneref *next_zones_zonelist(struct zoneref *z, 987: enum zone_type highest_zoneidx, 988: nodemask_t *nodes, 989: struct zone **zone); 990: 991: /** 992: * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist 993: * @zonelist - The zonelist to search for a suitable zone 994: * @highest_zoneidx - The zone index of the highest zone to return 995: * @nodes - An optional nodemask to filter the zonelist with 996: * @zone - The first suitable zone found is returned via this parameter 997: * 998: * This function returns the first zone at or below a given zone index that is 999: * within the allowed nodemask. The zoneref returned is a cursor that can be 1000: * used to iterate the zonelist with next_zones_zonelist by advancing it by 1001: * one before calling. 1002: */ 1003: static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, 1004: enum zone_type highest_zoneidx, 1005: nodemask_t *nodes, 1006: struct zone **zone) 1007: { 1008: return next_zones_zonelist(zonelist->_zonerefs, highest_zoneidx, nodes, 1009: zone); 1010: } 1011: 1012: /** 1013: * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask 1014: * @zone - The current zone in the iterator 1015: * @z - The current pointer within zonelist->zones being iterated 1016: * @zlist - The zonelist being iterated 1017: * @highidx - The zone index of the highest zone to return 1018: * @nodemask - Nodemask allowed by the allocator 1019: * 1020: * This iterator iterates though all zones at or below a given zone index and 1021: * within a given nodemask 1022: */ 1023: #define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ 1024: for (z = first_zones_zonelist(zlist, highidx, nodemask, &zone); \ 1025: zone; \ 1026: z = next_zones_zonelist(++z, highidx, nodemask, &zone)) \ 1027: 1028: /** 1029: * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index 1030: * @zone - The current zone in the iterator 1031: * @z - The current pointer within zonelist->zones being iterated 1032: * @zlist - The zonelist being iterated 1033: * @highidx - The zone index of the highest zone to return 1034: * 1035: * This iterator iterates though all zones at or below a given zone index. 1036: */ 1037: #define for_each_zone_zonelist(zone, z, zlist, highidx) \ 1038: for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL) 1039: 1040: #ifdef CONFIG_SPARSEMEM 1041: #include <asm/sparsemem.h> 1042: #endif 1043: 1044: #if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \ 1045: !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) 1046: static inline unsigned long early_pfn_to_nid(unsigned long pfn) 1047: { 1048: return 0; 1049: } 1050: #endif 1051: 1052: #ifdef CONFIG_FLATMEM 1053: #define pfn_to_nid(pfn) (0) 1054: #endif 1055: 1056: #ifdef CONFIG_SPARSEMEM 1057: 1058: /* 1059: * SECTION_SHIFT #bits space required to store a section # 1060: * 1061: * PA_SECTION_SHIFT physical address to/from section number 1062: * PFN_SECTION_SHIFT pfn to/from section number 1063: */ 1064: #define PA_SECTION_SHIFT (SECTION_SIZE_BITS) 1065: #define PFN_SECTION_SHIFT (SECTION_SIZE_BITS - PAGE_SHIFT) 1066: 1067: #define NR_MEM_SECTIONS (1UL << SECTIONS_SHIFT) 1068: 1069: #define PAGES_PER_SECTION (1UL << PFN_SECTION_SHIFT) 1070: #define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1)) 1071: 1072: #define SECTION_BLOCKFLAGS_BITS \ 1073: ((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS) 1074: 1075: #if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS 1076: #error Allocator MAX_ORDER exceeds SECTION_SIZE 1077: #endif 1078: 1079: #define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT) 1080: #define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT) 1081: 1082: #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK) 1083: #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK) 1084: 1085: struct page; 1086: struct page_cgroup; 1087: struct mem_section { 1088: /* 1089: * This is, logically, a pointer to an array of struct 1090: * pages. However, it is stored with some other magic. 1091: * (see sparse.c::sparse_init_one_section()) 1092: * 1093: * Additionally during early boot we encode node id of 1094: * the location of the section here to guide allocation. 1095: * (see sparse.c::memory_present()) 1096: * 1097: * Making it a UL at least makes someone do a cast 1098: * before using it wrong. 1099: */ 1100: unsigned long section_mem_map; 1101: 1102: /* See declaration of similar field in struct zone */ 1103: unsigned long *pageblock_flags; 1104: #ifdef CONFIG_MEMCG 1105: /* 1106: * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use 1107: * section. (see memcontrol.h/page_cgroup.h about this.) 1108: */ 1109: struct page_cgroup *page_cgroup; 1110: unsigned long pad; 1111: #endif 1112: /* 1113: * WARNING: mem_section must be a power-of-2 in size for the 1114: * calculation and use of SECTION_ROOT_MASK to make sense. 1115: */ 1116: }; 1117: 1118: #ifdef CONFIG_SPARSEMEM_EXTREME 1119: #define SECTIONS_PER_ROOT (PAGE_SIZE / sizeof (struct mem_section)) 1120: #else 1121: #define SECTIONS_PER_ROOT 1 1122: #endif 1123: 1124: #define SECTION_NR_TO_ROOT(sec) ((sec) / SECTIONS_PER_ROOT) 1125: #define NR_SECTION_ROOTS DIV_ROUND_UP(NR_MEM_SECTIONS, SECTIONS_PER_ROOT) 1126: #define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1) 1127: 1128: #ifdef CONFIG_SPARSEMEM_EXTREME 1129: extern struct mem_section *mem_section[NR_SECTION_ROOTS]; 1130: #else 1131: extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; 1132: #endif 1133: 1134: static inline struct mem_section *__nr_to_section(unsigned long nr) 1135: { 1136: if (!mem_section[SECTION_NR_TO_ROOT(nr)]) 1137: return NULL; 1138: return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; 1139: } 1140: extern int __section_nr(struct mem_section* ms); 1141: extern unsigned long usemap_size(void); 1142: 1143: /* 1144: * We use the lower bits of the mem_map pointer to store 1145: * a little bit of information. There should be at least 1146: * 3 bits here due to 32-bit alignment. 1147: */ 1148: #define SECTION_MARKED_PRESENT (1UL<<0) 1149: #define SECTION_HAS_MEM_MAP (1UL<<1) 1150: #define SECTION_MAP_LAST_BIT (1UL<<2) 1151: #define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) 1152: #define SECTION_NID_SHIFT 2 1153: 1154: static inline struct page *__section_mem_map_addr(struct mem_section *section) 1155: { 1156: unsigned long map = section->section_mem_map; 1157: map &= SECTION_MAP_MASK; 1158: return (struct page *)map; 1159: } 1160: 1161: static inline int present_section(struct mem_section *section) 1162: { 1163: return (section && (section->section_mem_map & SECTION_MARKED_PRESENT)); 1164: } 1165: 1166: static inline int present_section_nr(unsigned long nr) 1167: { 1168: return present_section(__nr_to_section(nr)); 1169: } 1170: 1171: static inline int valid_section(struct mem_section *section) 1172: { 1173: return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP)); 1174: } 1175: 1176: static inline int valid_section_nr(unsigned long nr) 1177: { 1178: return valid_section(__nr_to_section(nr)); 1179: } 1180: 1181: static inline struct mem_section *__pfn_to_section(unsigned long pfn) 1182: { 1183: return __nr_to_section(pfn_to_section_nr(pfn)); 1184: } 1185: 1186: #ifndef CONFIG_HAVE_ARCH_PFN_VALID 1187: static inline int pfn_valid(unsigned long pfn) 1188: { 1189: if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) 1190: return 0; 1191: return valid_section(__nr_to_section(pfn_to_section_nr(pfn))); 1192: } 1193: #endif 1194: 1195: static inline int pfn_present(unsigned long pfn) 1196: { 1197: if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) 1198: return 0; 1199: return present_section(__nr_to_section(pfn_to_section_nr(pfn))); 1200: } 1201: 1202: /* 1203: * These are _only_ used during initialisation, therefore they 1204: * can use __initdata ... They could have names to indicate 1205: * this restriction. 1206: */ 1207: #ifdef CONFIG_NUMA 1208: #define pfn_to_nid(pfn) \ 1209: ({ \ 1210: unsigned long __pfn_to_nid_pfn = (pfn); \ 1211: page_to_nid(pfn_to_page(__pfn_to_nid_pfn)); \ 1212: }) 1213: #else 1214: #define pfn_to_nid(pfn) (0) 1215: #endif 1216: 1217: #define early_pfn_valid(pfn) pfn_valid(pfn) 1218: void sparse_init(void); 1219: #else 1220: #define sparse_init() do {} while (0) 1221: #define sparse_index_init(_sec, _nid) do {} while (0) 1222: #endif /* CONFIG_SPARSEMEM */ 1223: 1224: #ifdef CONFIG_NODES_SPAN_OTHER_NODES 1225: bool early_pfn_in_nid(unsigned long pfn, int nid); 1226: #else 1227: #define early_pfn_in_nid(pfn, nid) (1) 1228: #endif 1229: 1230: #ifndef early_pfn_valid 1231: #define early_pfn_valid(pfn) (1) 1232: #endif 1233: 1234: void memory_present(int nid, unsigned long start, unsigned long end); 1235: unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); 1236: 1237: /* 1238: * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we 1239: * need to check pfn validility within that MAX_ORDER_NR_PAGES block. 1240: * pfn_valid_within() should be used in this case; we optimise this away 1241: * when we have no holes within a MAX_ORDER_NR_PAGES block. 1242: */ 1243: #ifdef CONFIG_HOLES_IN_ZONE 1244: #define pfn_valid_within(pfn) pfn_valid(pfn) 1245: #else 1246: #define pfn_valid_within(pfn) (1) 1247: #endif 1248: 1249: #ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL 1250: /* 1251: * pfn_valid() is meant to be able to tell if a given PFN has valid memmap 1252: * associated with it or not. In FLATMEM, it is expected that holes always 1253: * have valid memmap as long as there is valid PFNs either side of the hole. 1254: * In SPARSEMEM, it is assumed that a valid section has a memmap for the 1255: * entire section. 1256: * 1257: * However, an ARM, and maybe other embedded architectures in the future 1258: * free memmap backing holes to save memory on the assumption the memmap is 1259: * never used. The page_zone linkages are then broken even though pfn_valid() 1260: * returns true. A walker of the full memmap must then do this additional 1261: * check to ensure the memmap they are looking at is sane by making sure 1262: * the zone and PFN linkages are still valid. This is expensive, but walkers 1263: * of the full memmap are extremely rare. 1264: */ 1265: int memmap_valid_within(unsigned long pfn, 1266: struct page *page, struct zone *zone); 1267: #else 1268: static inline int memmap_valid_within(unsigned long pfn, 1269: struct page *page, struct zone *zone) 1270: { 1271: return 1; 1272: } 1273: #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ 1274: 1275: #endif /* !__GENERATING_BOUNDS.H */ 1276: #endif /* !__ASSEMBLY__ */ 1277: #endif /* _LINUX_MMZONE_H */ 1278: