Ядро Linux в комментариях

       

Include/linux/mm.h


15083 #ifndef _LINUX_MM_H 15084 #define _LINUX_MM_H 15085 15086 #include <linux/sched.h> 15087 #include <linux/errno.h> 15088 15089 #ifdef __KERNEL__ 15090 15091 #include <linux/string.h> 15092 15093 extern unsigned long max_mapnr; 15094 extern unsigned long num_physpages; 15095 extern void * high_memory; 15096 extern int page_cluster; 15097 15098 #include <asm/page.h> 15099 #include <asm/atomic.h> 15100 15101 /* Linux kernel virtual memory manager primitives. The 15102 * idea being to have a "virtual" mm in the same way we 15103 * have a virtual fs - giving a cleaner interface to the 15104 * mm details, and allowing different kinds of memory 15105 * mappings (from shared memory to executable loading to 15106 * arbitrary mmap() functions). */ 15107 15108 /* This struct defines a memory VMM memory area. There is 15109 * one of these per VM-area/task. A VM area is any part 15110 * of the process virtual memory space that has a special 15111 * rule for the page-fault handlers (ie a shared library, 15112 * the executable area etc). */ 15113 struct vm_area_struct { 15114 struct mm_struct * vm_mm; /* VM area parameters */ 15115 unsigned long vm_start; 15116 unsigned long vm_end; 15117 15118 /* linked list of VM areas per task, sorted by addr */ 15119 struct vm_area_struct *vm_next; 15120 15121 pgprot_t vm_page_prot; 15122 unsigned short vm_flags; 15123 15124 /* AVL tree of VM areas per task, sorted by address */ 15125 short vm_avl_height; 15126 struct vm_area_struct * vm_avl_left; 15127 struct vm_area_struct * vm_avl_right; 15128 15129 /* For areas with inode, the list inode->i_mmap, for 15130 * shm areas, the list of attaches, else unused. */ 15131 struct vm_area_struct *vm_next_share; 15132 struct vm_area_struct **vm_pprev_share; 15133 15134 struct vm_operations_struct * vm_ops; 15135 unsigned long vm_offset; 15136 struct file * vm_file; 15137 unsigned long vm_pte; /* shared mem */ 15138 }; 15139 15140 /* vm_flags.. */ 15141 #define VM_READ 0x0001 /* currently active flags */ 15142 #define VM_WRITE 0x0002 15143 #define VM_EXEC 0x0004 15144 #define VM_SHARED 0x0008 15145 15146 #define VM_MAYREAD 0x0010 /* lims for mprotect() etc*/ 15147 #define VM_MAYWRITE 0x0020 15148 #define VM_MAYEXEC 0x0040 15149 #define VM_MAYSHARE 0x0080 15150 15151 #define VM_GROWSDOWN 0x0100 /* general info on segment*/ 15152 #define VM_GROWSUP 0x0200 15153 #define VM_SHM 0x0400 /* shrd mem,don't swap out*/ 15154 #define VM_DENYWRITE 0x0800 /* ETXTBSY on write. */ 15155 15156 #define VM_EXECUTABLE 0x1000 15157 #define VM_LOCKED 0x2000 15158 #define VM_IO 0x4000 /* Mem-mapped I/O /similar*/ 15159 15160 #define VM_STACK_FLAGS 0x0177 15161 15162 /* mapping from the currently active vm_flags protection 15163 * bits (the low four bits) to a page protection mask. */ 15164 extern pgprot_t protection_map[16]; 15165 15166 15167 /* These are the virtual MM functions - opening of an 15168 * area, closing and unmapping it (needed to keep files 15169 * on disk up-to-date etc), pointer to the functions 15170 * called when a no-page or a wp-page exception occurs.*/ 15171 struct vm_operations_struct { 15172 void (*open)(struct vm_area_struct * area); 15173 void (*close)(struct vm_area_struct * area); 15174 void (*unmap)(struct vm_area_struct *area, 15175 unsigned long, size_t); 15176 void (*protect)(struct vm_area_struct *area, 15177 unsigned long, size_t, unsigned int newprot); 15178 int (*sync)(struct vm_area_struct *area, 15179 unsigned long, size_t, unsigned int flags); 15180 void (*advise)(struct vm_area_struct *area, 15181 unsigned long, size_t, unsigned int advise); 15182 unsigned long (*nopage)(struct vm_area_struct * area, 15183 unsigned long address, int write_access); 15184 unsigned long (*wppage)(struct vm_area_struct * area, 15185 unsigned long address, unsigned long page); 15186 int (*swapout)(struct vm_area_struct *, struct page *); 15187 pte_t (*swapin)(struct vm_area_struct *, unsigned long, 15188 unsigned long); 15189 }; 15190 15191 /* Try to keep the most commonly accessed fields in 15192 * single cache lines here (16 bytes or greater). This 15193 * ordering should be particularly beneficial on 32-bit 15194 * processors. 15195 * 15196 * The first line is data used in page cache lookup, the 15197 * second line is used for linear searches (eg. clock 15198 * algorithm scans). */ 15199 typedef struct page { 15200 /* these must be first (free area handling) */ 15201 struct page *next; 15202 struct page *prev; 15203 struct inode *inode; 15204 unsigned long offset; 15205 struct page *next_hash; 15206 atomic_t count; 15207 /* atomic flags, some possibly updated asynchronously*/ 15208 unsigned long flags; 15209 struct wait_queue *wait; 15210 struct page **pprev_hash; 15211 struct buffer_head * buffers; 15212 } mem_map_t; 15213 15214 /* Page flag bit values */ 15215 #define PG_locked 0 15216 #define PG_error 1 15217 #define PG_referenced 2 15218 #define PG_dirty 3 15219 #define PG_uptodate 4 15220 #define PG_free_after 5 15221 #define PG_decr_after 6 15222 #define PG_swap_unlock_after 7 15223 #define PG_DMA 8 15224 #define PG_Slab 9 15225 #define PG_swap_cache 10 15226 #define PG_skip 11 15227 #define PG_reserved 31 15228 15229 /* Make it prettier to test the above... */ 15230 #define PageLocked(page) \ 15231 (test_bit(PG_locked, &(page)->flags)) 15232 #define PageError(page) \ 15233 (test_bit(PG_error, &(page)->flags)) 15234 #define PageReferenced(page) \ 15235 (test_bit(PG_referenced, &(page)->flags)) 15236 #define PageDirty(page) \ 15237 (test_bit(PG_dirty, &(page)->flags)) 15238 #define PageUptodate(page) \ 15239 (test_bit(PG_uptodate, &(page)->flags)) 15240 #define PageFreeAfter(page) \ 15241 (test_bit(PG_free_after, &(page)->flags)) 15242 #define PageDecrAfter(page) \ 15243 (test_bit(PG_decr_after, &(page)->flags)) 15244 #define PageSwapUnlockAfter(page) \ 15245 (test_bit(PG_swap_unlock_after, &(page)->flags)) 15246 #define PageDMA(page) \ 15247 (test_bit(PG_DMA, &(page)->flags)) 15248 #define PageSlab(page) \ 15249 (test_bit(PG_Slab, &(page)->flags)) 15250 #define PageSwapCache(page) \ 15251 (test_bit(PG_swap_cache, &(page)->flags)) 15252 #define PageReserved(page) \ 15253 (test_bit(PG_reserved, &(page)->flags)) 15254 15255 #define PageSetSlab(page) \ 15256 (set_bit(PG_Slab, &(page)->flags)) 15257 #define PageSetSwapCache(page) \ 15258 (set_bit(PG_swap_cache, &(page)->flags)) 15259 15260 #define PageTestandSetDirty(page) \ 15261 (test_and_set_bit(PG_dirty, &(page)->flags)) 15262 #define PageTestandSetSwapCache(page) \ 15263 (test_and_set_bit(PG_swap_cache, &(page)->flags)) 15264 15265 #define PageClearSlab(page) \ 15266 (clear_bit(PG_Slab, &(page)->flags)) 15267 #define PageClearSwapCache(page) \ 15268 (clear_bit(PG_swap_cache, &(page)->flags)) 15269 15270 #define PageTestandClearDirty(page) \ 15271 (test_and_clear_bit(PG_dirty, &(page)->flags)) 15272 #define PageTestandClearSwapCache(page) \ 15273 (test_and_clear_bit(PG_swap_cache, &(page)->flags)) 15274 15275 /* Various page->flags bits: 15276 * 15277 * PG_reserved is set for a page which must never be 15278 * accessed (which may not even be present). 15279 * 15280 * PG_DMA is set for those pages which lie in the range 15281 * of physical addresses capable of carrying DMA 15282 * transfers. 15283 * 15284 * Multiple processes may "see" the same page. E.g. for 15285 * untouched mappings of /dev/null, all processes see the 15286 * same page full of zeroes, and text pages of 15287 * executables and shared libraries have only one copy in 15288 * memory, at most, normally. 15289 * 15290 * For the non-reserved pages, page->count denotes a 15291 * reference count. 15292 * page->count == 0 means the page is free. 15293 * page->count == 1 means the page is used for exactly 15294 * one purpose 15295 * (e.g. a private data page of one process). 15296 * 15297 * A page may be used for kmalloc() or anyone else who 15298 * does a get_free_page(). In this case the page->count 15299 * is at least 1, and all other fields are unused but 15300 * should be 0 or NULL. The management of this page is 15301 * the responsibility of the one who uses it. 15302 * 15303 * The other pages (we may call them "process pages") are 15304 * completely managed by the Linux memory manager: I/O, 15305 * buffers, swapping etc. The following discussion 15306 * applies only to them. 15307 * 15308 * A page may belong to an inode's memory mapping. In 15309 * this case, page->inode is the pointer to the inode, 15310 * and page->offset is the file offset of the page (not 15311 * necessarily a multiple of PAGE_SIZE). 15312 * 15313 * A page may have buffers allocated to it. In this case, 15314 * page->buffers is a circular list of these buffer 15315 * heads. Else, page->buffers == NULL. 15316 * 15317 * For pages belonging to inodes, the page->count is the 15318 * number of attaches, plus 1 if buffers are allocated to 15319 * the page. 15320 * 15321 * All pages belonging to an inode make up a doubly 15322 * linked list inode->i_pages, using the fields 15323 * page->next and page->prev. (These fields are also used 15324 * for freelist management when page->count==0.) There 15325 * is also a hash table mapping (inode,offset) to the 15326 * page in memory if present. The lists for this hash 15327 * table use the fields page->next_hash and 15328 * page->pprev_hash. 15329 * 15330 * All process pages can do I/O: 15331 * - inode pages may need to be read from disk, 15332 * - inode pages which have been modified and are 15333 * MAP_SHARED may need to be written to disk, 15334 * - private pages which have been modified may need to 15335 * be swapped out to swap space and (later) to be read 15336 * back into memory. 15337 * During disk I/O, PG_locked is used. This bit is set 15338 * before I/O and reset when I/O completes. page->wait is 15339 * a wait queue of all tasks waiting for the I/O on this 15340 * page to complete. 15341 * PG_uptodate tells whether the page's contents is 15342 * valid. When a read completes, the page becomes 15343 * uptodate, unless a disk I/O error happened. 15344 * When a write completes, and PG_free_after is set, the 15345 * page is freed without any further delay. 15346 * 15347 * For choosing which pages to swap out, inode pages 15348 * carry a PG_referenced bit, which is set any time the 15349 * system accesses that page through the (inode,offset) 15350 * hash table. 15351 * 15352 * PG_skip is used on sparc/sparc64 architectures to 15353 * "skip" certain parts of the address space. 15354 * 15355 * PG_error is set to indicate that an I/O error occurred 15356 * on this page. */ 15357 15358 extern mem_map_t * mem_map; 15359 15360 /* This is timing-critical - most of the time in getting 15361 * a new page goes to clearing the page. If you want a 15362 * page without the clearing overhead, just use 15363 * __get_free_page() directly.. */ 15364 #define __get_free_page(gfp_mask) \ 15365 __get_free_pages((gfp_mask),0) 15366 #define __get_dma_pages(gfp_mask, order) \ 15367 __get_free_pages((gfp_mask) | GFP_DMA,(order)) 15368 extern unsigned long 15369 FASTCALL(__get_free_pages(int gfp_mask, 15370 unsigned long gfp_order)); 15371 15372 extern inline unsigned long get_free_page(int gfp_mask) 15373 { 15374 unsigned long page; 15375 15376 page = __get_free_page(gfp_mask); 15377 if (page) 15378 clear_page(page); 15379 return page; 15380 } 15381 15382 extern int low_on_memory; 15383 15384 /* memory.c & swap.c*/ 15385 15386 #define free_page(addr) free_pages((addr),0) 15387 extern void FASTCALL(free_pages(unsigned long addr, 15388 unsigned long order)); 15389 extern void FASTCALL(__free_page(struct page *)); 15390 15391 extern void show_free_areas(void); 15392 extern unsigned long put_dirty_page( 15393 struct task_struct * tsk, unsigned long page, 15394 unsigned long address); 15395 15396 extern void free_page_tables(struct mm_struct * mm); 15397 extern void clear_page_tables(struct mm_struct *, 15398 unsigned long, int); 15399 extern int new_page_tables(struct task_struct * tsk); 15400 15401 extern void zap_page_range(struct mm_struct *mm, 15402 unsigned long address, unsigned long size); 15403 extern int copy_page_range(struct mm_struct *dst, 15404 struct mm_struct *src, struct vm_area_struct *vma); 15405 extern int remap_page_range(unsigned long from, 15406 unsigned long to, unsigned long size, pgprot_t prot); 15407 extern int zeromap_page_range(unsigned long from, 15408 unsigned long size, pgprot_t prot); 15409 15410 extern void vmtruncate(struct inode * inode, 15411 unsigned long offset); 15412 extern int handle_mm_fault(struct task_struct *tsk, 15413 struct vm_area_struct *vma, unsigned long address, 15414 int write_access); 15415 extern void make_pages_present(unsigned long addr, 15416 unsigned long end); 15417 15418 extern int pgt_cache_water[2]; 15419 extern int check_pgt_cache(void); 15420 15421 extern unsigned long paging_init(unsigned long start_mem, 15422 unsigned long end_mem); 15423 extern void mem_init(unsigned long start_mem, 15424 unsigned long end_mem); 15425 extern void show_mem(void); 15426 extern void oom(struct task_struct * tsk); 15427 extern void si_meminfo(struct sysinfo * val); 15428 15429 /* mmap.c */ 15430 extern void vma_init(void); 15431 extern void merge_segments(struct mm_struct *, 15432 unsigned long, unsigned long); 15433 extern void insert_vm_struct(struct mm_struct *, 15434 struct vm_area_struct *); 15435 extern void build_mmap_avl(struct mm_struct *); 15436 extern void exit_mmap(struct mm_struct *); 15437 extern unsigned long get_unmapped_area(unsigned long, 15438 unsigned long); 15439 15440 extern unsigned long do_mmap(struct file *, 15441 unsigned long, unsigned long, unsigned long, 15442 unsigned long, unsigned long); 15443 extern int do_munmap(unsigned long, size_t); 15444 15445 /* filemap.c */ 15446 extern void remove_inode_page(struct page *); 15447 extern unsigned long page_unuse(struct page *); 15448 extern int shrink_mmap(int, int); 15449 extern void truncate_inode_pages(struct inode *, 15450 unsigned long); 15451 extern unsigned long get_cached_page(struct inode *, 15452 unsigned long, int); 15453 extern void put_cached_page(unsigned long); 15454 15455 /* GFP bitmasks.. */ 15456 #define __GFP_WAIT 0x01 15457 #define __GFP_LOW 0x02 15458 #define __GFP_MED 0x04 15459 #define __GFP_HIGH 0x08 15460 #define __GFP_IO 0x10 15461 #define __GFP_SWAP 0x20 15462 15463 #define __GFP_DMA 0x80 15464 15465 #define GFP_BUFFER (__GFP_LOW | __GFP_WAIT) 15466 #define GFP_ATOMIC (__GFP_HIGH) 15467 #define GFP_USER (__GFP_LOW | __GFP_WAIT | __GFP_IO) 15468 #define GFP_KERNEL (__GFP_MED | __GFP_WAIT | __GFP_IO) 15469 #define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO) 15470 #define GFP_KSWAPD (__GFP_IO | __GFP_SWAP) 15471 15472 /* Flag - indicates that the buffer will be suitable for 15473 DMA. Ignored on some platforms, used as appropriate 15474 on others */ 15475 15476 #define GFP_DMA __GFP_DMA 15477 15478 /* vma is the first one with address < vma->vm_end, and 15479 * even address < vma->vm_start. Have to extend vma. */ 15480 static inline int expand_stack( 15481 struct vm_area_struct * vma, unsigned long address) 15482 { 15483 unsigned long grow; 15484 15485 address &= PAGE_MASK; 15486 grow = vma->vm_start - address; 15487 if (vma->vm_end - address 15488 >(unsigned long)current->rlim[RLIMIT_STACK].rlim_cur 15489 (vma->vm_mm->total_vm << PAGE_SHIFT) + grow 15490 >(unsigned long)current->rlim[RLIMIT_AS].rlim_cur) 15491 return -ENOMEM; 15492 vma->vm_start = address; 15493 vma->vm_offset -= grow; 15494 vma->vm_mm->total_vm += grow >> PAGE_SHIFT; 15495 if (vma->vm_flags & VM_LOCKED) 15496 vma->vm_mm->locked_vm += grow >> PAGE_SHIFT; 15497 return 0; 15498 } 15499 15500 /* Look up the first VMA which satisfies addr < vm_end, 15501 NULL if none. */ 15502 extern struct vm_area_struct * find_vma( 15503 struct mm_struct * mm, unsigned long addr); 15504 15505 /* Look up the first VMA which intersects the interval 15506 start_addr..end_addr-1, NULL if none. Assume 15507 start_addr < end_addr. */ 15508 static inline struct vm_area_struct * 15509 find_vma_intersection(struct mm_struct * mm, 15510 unsigned long start_addr, unsigned long end_addr) 15511 { 15512 struct vm_area_struct * vma = find_vma(mm,start_addr); 15513 15514 if (vma && end_addr <= vma->vm_start) 15515 vma = NULL; 15516 return vma; 15517 } 15518 15519 #define buffer_under_min() \ 15520 ((buffermem >> PAGE_SHIFT) * 100 < \ 15521 buffer_mem.min_percent * num_physpages) 15522 #define pgcache_under_min() \ 15523 (page_cache_size * 100 < \ 15524 page_cache.min_percent * num_physpages) 15525 15526 #endif /* __KERNEL__ */ 15527 15528 #endif



Содержание раздела