Ядро Linux в комментариях

       

Mm/mmap.c


33062 /* 33063 * linux/mm/mmap.c 33064 * 33065 * Written by obz. 33066 */ 33067 #include <linux/slab.h> 33068 #include <linux/shm.h> 33069 #include <linux/mman.h> 33070 #include <linux/pagemap.h> 33071 #include <linux/swap.h> 33072 #include <linux/swapctl.h> 33073 #include <linux/smp_lock.h> 33074 #include <linux/init.h> 33075 #include <linux/file.h> 33076 33077 #include <asm/uaccess.h> 33078 #include <asm/pgtable.h> 33079 33080 /* description of effects of mapping type and prot in 33081 * current implementation. this is due to the limited 33082 * x86 page protection hardware. The expected behavior 33083 * is in parens (Y = yes, N = no, C = copy): 33084 * 33085 * map_type prot 33086 * PROT_NONE PROT_READ PROT_WRITE PROT_EXEC 33087 * MAP_SHARED r: (N) N r: (Y) Y r: (N) Y r: (N) Y 33088 * w: (N) N w: (N) N w: (Y) Y w: (N) N 33089 * x: (N) N x: (N) Y x: (N) Y x: (Y) Y 33090 * 33091 * MAP_PRIVATE r: (N) N r: (Y) Y r: (N) Y r: (N) Y 33092 * w: (N) N w: (N) N w: (C) C w: (N) N 33093 * x: (N) N x: (N) Y x: (N) Y x: (Y) Y 33094 */ 33095 pgprot_t protection_map[16] = { 33096 __P000, __P001, __P010, __P011, 33097 __P100, __P101, __P110, __P111, 33098 __S000, __S001, __S010, __S011, 33099 __S100, __S101, __S110, __S111 33100 }; 33101 33102 /* SLAB cache for vm_area_struct's. */ 33103 kmem_cache_t *vm_area_cachep; 33104 33105 int sysctl_overcommit_memory; 33106 33107 /* Check that a process has enough memory to allocate a 33108 * new virtual mapping. 33109 */ 33110 int vm_enough_memory(long pages) 33111 { 33112 /* Stupid algorithm to decide if we have enough memory: 33113 * while simple, it hopefully works in most obvious 33114 * cases.. Easy to fool it, but this should catch most 33115 * mistakes. */ 33116 /* 23/11/98 NJC: Somewhat less stupid version of 33117 * algorithm, which tries to do "TheRightThing". 33118 * Instead of using half of (buffers+cache), use the 33119 * minimum values. Allow an extra 2% of num_physpages 33120 * for safety margin. */ 33121 33122 long free; 33123 33124 /* Sometimes we want to use more memory than we 33125 * have. */ 33126 if (sysctl_overcommit_memory) 33127 return 1; 33128 33129 free = buffermem >> PAGE_SHIFT; 33130 free += page_cache_size; 33131 free += nr_free_pages; 33132 free += nr_swap_pages; 33133 free -= (page_cache.min_percent + 33134 buffer_mem.min_percent + 2)*num_physpages/100; 33135 return free > pages; 33136 } 33137 33138 /* Remove one vm structure from the inode's i_mmap 33139 * ring. */ 33140 static inline void remove_shared_vm_struct( 33141 struct vm_area_struct *vma) 33142 { 33143 struct file * file = vma->vm_file; 33144 33145 if (file) { 33146 if (vma->vm_flags & VM_DENYWRITE) 33147 file->f_dentry->d_inode->i_writecount++; 33148 if(vma->vm_next_share) 33149 vma->vm_next_share->vm_pprev_share = 33150 vma->vm_pprev_share; 33151 *vma->vm_pprev_share = vma->vm_next_share; 33152 } 33153 } 33154


33155 asmlinkage unsigned long sys_brk(unsigned long brk) 33156 { 33157 unsigned long rlim, retval; 33158 unsigned long newbrk, oldbrk; 33159 struct mm_struct *mm = current->mm; 33160 33161 down(&mm->mmap_sem); 33162 33163 /* This lock-kernel is one of the main contention 33164 * points for certain normal loads. And it really 33165 * should not be here: almost everything in 33166 * brk()/mmap()/munmap() is protected sufficiently by 33167 * the mmap semaphore that we got above. 33168 * 33169 * We should move this into the few things that really 33170 * want the lock, namely anything that actually touches 33171 * a file descriptor etc. We can do all the normal 33172 * anonymous mapping cases without ever getting the 33173 * lock at all - the actual memory management code is 33174 * already completely thread-safe. */ 33175 lock_kernel(); 33176 33177 if (brk < mm->end_code) 33178 goto out; 33179 newbrk = PAGE_ALIGN(brk); 33180 oldbrk = PAGE_ALIGN(mm->brk); 33181 if (oldbrk == newbrk) 33182 goto set_brk; 33183 33184 /* Always allow shrinking brk. */

33185 if (brk <= mm->brk) { 33186 if (!do_munmap(newbrk, oldbrk-newbrk)) 33187 goto set_brk; 33188 goto out; 33189 } 33190 33191 /* Check against rlimit and stack.. */ 33192 rlim = current->rlim[RLIMIT_DATA].rlim_cur; 33193 if (rlim < RLIM_INFINITY && brk - mm->end_code > rlim) 33194 goto out; 33195 33196 /* Check against existing mmap mappings. */ 33197 if (find_vma_intersection(mm, oldbrk,newbrk+PAGE_SIZE)) 33198 goto out; 33199 33200 /* Check if we have enough memory.. */ 33201 if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) 33202 goto out; 33203 33204 /* Ok, looks good - let it rip. */ 33205 if (do_mmap(NULL, oldbrk, newbrk-oldbrk, 33206 PROT_READ|PROT_WRITE|PROT_EXEC, 33207 MAP_FIXED|MAP_PRIVATE, 0) != oldbrk) 33208 goto out; 33209 set_brk: 33210 mm->brk = brk; 33211 out: 33212 retval = mm->brk; 33213 unlock_kernel(); 33214 up(&mm->mmap_sem); 33215 return retval; 33216 } 33217 33218 /* Combine the mmap "prot" and "flags" argument into one 33219 * "vm_flags" used internally. Essentially, translate the 33220 * "PROT_xxx" and "MAP_xxx" bits into "VM_xxx". */ 33221 static inline unsigned long vm_flags(unsigned long prot, 33222 unsigned long flags) 33223 { 33224 #define _trans(x,bit1,bit2) \ 33225 ((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0) 33226 33227 unsigned long prot_bits, flag_bits; 33228 prot_bits = 33229 _trans(prot, PROT_READ, VM_READ) | 33230 _trans(prot, PROT_WRITE, VM_WRITE) | 33231 _trans(prot, PROT_EXEC, VM_EXEC); 33232 flag_bits = 33233 _trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN) | 33234 _trans(flags, MAP_DENYWRITE, VM_DENYWRITE) | 33235 _trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE); 33236 return prot_bits | flag_bits; 33237 #undef _trans 33238 } 33239



33240 unsigned long do_mmap(struct file * file, 33241 unsigned long addr, unsigned long len, 33242 unsigned long prot, unsigned long flags, 33243 unsigned long off) 33244 { 33245 struct mm_struct * mm = current->mm; 33246 struct vm_area_struct * vma; 33247 int error; 33248 33249 if ((len = PAGE_ALIGN(len)) == 0) 33250 return addr; 33251 33252 if (len > TASK_SIZE addr > TASK_SIZE-len) 33253 return -EINVAL; 33254 33255 /* offset overflow? */ 33256 if (off + len < off) 33257 return -EINVAL; 33258 33259 /* Too many mappings? */ 33260 if (mm->map_count > MAX_MAP_COUNT) 33261 return -ENOMEM; 33262 33263 /* mlock MCL_FUTURE? */ 33264 if (mm->def_flags & VM_LOCKED) { 33265 unsigned long locked = mm->locked_vm << PAGE_SHIFT; 33266 locked += len; 33267 if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur) 33268 return -EAGAIN; 33269 } 33270 33271 /* Do simple checking here so the lower-level routines 33272 * won't have to. we assume access permissions have 33273 * been handled by the open of the memory object, so we 33274 * don't do any here. */ 33275 if (file != NULL) { 33276 switch (flags & MAP_TYPE) { 33277 case MAP_SHARED: 33278 if ((prot & PROT_WRITE) && !(file->f_mode & 2)) 33279 return -EACCES; 33280 33281 /* Make sure we don't allow writing to an 33282 * append-only file.. */ 33283 if (IS_APPEND(file->f_dentry->d_inode) && 33284 (file->f_mode & 2)) 33285 return -EACCES; 33286 33287 /* make sure there are no mandatory locks on the 33288 * file. */ 33289 if (locks_verify_locked(file->f_dentry->d_inode)) 33290 return -EAGAIN; 33291 33292 /* fall through */ 33293 case MAP_PRIVATE: 33294 if (!(file->f_mode & 1)) 33295 return -EACCES; 33296 break; 33297 33298 default: 33299 return -EINVAL; 33300 } 33301 } else if ((flags & MAP_TYPE) != MAP_PRIVATE) 33302 return -EINVAL; 33303 33304 /* Obtain the address to map to. we verify (or select) 33305 * it and ensure that it represents a valid section of 33306 * the address space. */ 33307 if (flags & MAP_FIXED) { 33308 if (addr & ~PAGE_MASK) 33309 return -EINVAL; 33310 } else { 33311 addr = get_unmapped_area(addr, len); 33312 if (!addr) 33313 return -ENOMEM; 33314 } 33315 33316 /* Determine the object being mapped and call the 33317 * appropriate specific mapper. the address has already 33318 * been validated, but not unmapped, but the maps are 33319 * removed from the list. */ 33320 if (file && (!file->f_op !file->f_op->mmap)) 33321 return -ENODEV; 33322 33323 vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); 33324 if (!vma) 33325 return -ENOMEM; 33326 33327 vma->vm_mm = mm; 33328 vma->vm_start = addr; 33329 vma->vm_end = addr + len; 33330 vma->vm_flags = vm_flags(prot,flags) | mm->def_flags; 33331 33332 if (file) { 33333 if (file->f_mode & 1) 33334 vma->vm_flags |= VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC; 33335 if (flags & MAP_SHARED) { 33336 vma->vm_flags |= VM_SHARED | VM_MAYSHARE; 33337 33338 /* This looks strange, but when we don't have the 33339 * file open for writing, we can demote the shared 33340 * mapping to a simpler private mapping. That also 33341 * takes care of a security hole with ptrace() 33342 * writing to a shared mapping without write 33343 * permissions. 33344 * 33345 * We leave the VM_MAYSHARE bit on, just to get 33346 * correct output from /proc/xxx/maps.. */ 33347 if (!(file->f_mode & 2)) 33348 vma->vm_flags &= ~(VM_MAYWRITE | VM_SHARED); 33349 } 33350 } else 33351 vma->vm_flags |= VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC; 33352 vma->vm_page_prot = 33353 protection_map[vma->vm_flags & 0x0f]; 33354 vma->vm_ops = NULL; 33355 vma->vm_offset = off; 33356 vma->vm_file = NULL; 33357 vma->vm_pte = 0; 33358 33359 /* Clear old maps */ 33360 error = -ENOMEM; 33361 if (do_munmap(addr, len)) 33362 goto free_vma; 33363 33364 /* Check against address space limit. */ 33365 if ((mm->total_vm << PAGE_SHIFT) + len 33366 > current->rlim[RLIMIT_AS].rlim_cur) 33367 goto free_vma; 33368 33369 /* Private writable mapping? Check memory 33370 * availability.. */ 33371 if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == 33372 VM_WRITE && 33373 !(flags & MAP_NORESERVE) && 33374 !vm_enough_memory(len >> PAGE_SHIFT)) 33375 goto free_vma; 33376 33377 if (file) { 33378 int correct_wcount = 0; 33379 if (vma->vm_flags & VM_DENYWRITE) { 33380 if (file->f_dentry->d_inode->i_writecount > 0) { 33381 error = -ETXTBSY; 33382 goto free_vma; 33383 } 33384 /* f_op->mmap might possibly sleep 33385 * (generic_file_mmap doesn't, but other code 33386 * might). In any case, this takes care of any 33387 * race that this might cause. 33388 */ 33389 file->f_dentry->d_inode->i_writecount--; 33390 correct_wcount = 1; 33391 } 33392 error = file->f_op->mmap(file, vma); 33393 /* Fix up the count if necessary, then check for an 33394 * error */ 33395 if (correct_wcount) 33396 file->f_dentry->d_inode->i_writecount++; 33397 if (error) 33398 goto unmap_and_free_vma; 33399 vma->vm_file = file; 33400 file->f_count++; 33401 } 33402 33403 /* merge_segments may merge our vma, so we can't refer 33404 * to it after the call. Save the values we need now 33405 * ... */ 33406 flags = vma->vm_flags; 33407 addr = vma->vm_start; /* can addr have changed?? */ 33408 insert_vm_struct(mm, vma); 33409 merge_segments(mm, vma->vm_start, vma->vm_end); 33410 33411 mm->total_vm += len >> PAGE_SHIFT; 33412 if (flags & VM_LOCKED) { 33413 mm->locked_vm += len >> PAGE_SHIFT; 33414 make_pages_present(addr, addr + len); 33415 } 33416 return addr; 33417 33418 unmap_and_free_vma: 33419 /* Undo any partial mapping done by a device driver. */ 33420 flush_cache_range(mm, vma->vm_start, vma->vm_end); 33421 zap_page_range(mm, vma->vm_start, 33422 vma->vm_end - vma->vm_start); 33423 flush_tlb_range(mm, vma->vm_start, vma->vm_end); 33424 free_vma: 33425 kmem_cache_free(vm_area_cachep, vma); 33426 return error; 33427 } 33428 33429 /* Get an address range which is currently unmapped. For 33430 * mmap() without MAP_FIXED and shmat() with addr=0. 33431 * Return value 0 means ENOMEM. */ 33432 unsigned long get_unmapped_area(unsigned long addr, 33433 unsigned long len) 33434 { 33435 struct vm_area_struct * vmm; 33436 33437 if (len > TASK_SIZE) 33438 return 0; 33439 if (!addr) 33440 addr = TASK_UNMAPPED_BASE; 33441 addr = PAGE_ALIGN(addr); 33442 33443 for (vmm = find_vma(current->mm, addr); ; 33444 vmm = vmm->vm_next) { 33445 /* At this point: (!vmm addr < vmm->vm_end). */ 33446 if (TASK_SIZE - len < addr) 33447 return 0; 33448 if (!vmm addr + len <= vmm->vm_start) 33449 return addr; 33450 addr = vmm->vm_end; 33451 } 33452 } 33453 33454 #define vm_avl_empty (struct vm_area_struct *) NULL 33455 33456 #include "mmap_avl.c" 33457 33458 /* Look up the first VMA which satisfies addr < vm_end, 33459 * NULL if none. */



33460 struct vm_area_struct * find_vma(struct mm_struct * mm, 33461 unsigned long addr) 33462 { 33463 struct vm_area_struct *vma = NULL; 33464 33465 if (mm) { 33466 /* Check the cache first. */ 33467 /* (Cache hit rate is typically around 35%.) */

33468 vma = mm->mmap_cache; 33469 if (!(vma && vma->vm_end > addr && 33470 vma->vm_start <= addr)) { 33471 if (!mm->mmap_avl) { 33472 /* Go through the linear list. */ 33473 vma = mm->mmap; 33474 while (vma && vma->vm_end <= addr) 33475 vma = vma->vm_next; 33476 } else { 33477 /* Then go through the AVL tree quickly. */ 33478 struct vm_area_struct * tree = mm->mmap_avl; 33479 vma = NULL; 33480 for (;;) { 33481 if (tree == vm_avl_empty) 33482 break; 33483 if (tree->vm_end > addr) { 33484 vma = tree; 33485 if (tree->vm_start <= addr) 33486 break; 33487 tree = tree->vm_avl_left; 33488 } else 33489 tree = tree->vm_avl_right; 33490 } 33491 } 33492 if (vma) 33493 mm->mmap_cache = vma; 33494 } 33495 } 33496 return vma; 33497 } 33498 33499 /* Same as find_vma, but also return a pointer to the 33500 * previous VMA in *pprev. */ 33501 struct vm_area_struct * find_vma_prev( 33502 struct mm_struct * mm, unsigned long addr, 33503 struct vm_area_struct **pprev) 33504 { 33505 if (mm) { 33506 if (!mm->mmap_avl) { 33507 /* Go through the linear list. */ 33508 struct vm_area_struct * prev = NULL; 33509 struct vm_area_struct * vma = mm->mmap; 33510 while (vma && vma->vm_end <= addr) { 33511 prev = vma; 33512 vma = vma->vm_next; 33513 } 33514 *pprev = prev; 33515 return vma; 33516 } else { 33517 /* Go through the AVL tree quickly. */ 33518 struct vm_area_struct * vma = NULL; 33519 struct vm_area_struct * last_turn_right = NULL; 33520 struct vm_area_struct * prev = NULL; 33521 struct vm_area_struct * tree = mm->mmap_avl; 33522 for (;;) { 33523 if (tree == vm_avl_empty) 33524 break; 33525 if (tree->vm_end > addr) { 33526 vma = tree; 33527 prev = last_turn_right; 33528 if (tree->vm_start <= addr) 33529 break; 33530 tree = tree->vm_avl_left; 33531 } else { 33532 last_turn_right = tree; 33533 tree = tree->vm_avl_right; 33534 } 33535 } 33536 if (vma) { 33537 if (vma->vm_avl_left != vm_avl_empty) { 33538 prev = vma->vm_avl_left; 33539 while (prev->vm_avl_right != vm_avl_empty) 33540 prev = prev->vm_avl_right; 33541 } 33542 if ((prev ? prev->vm_next : mm->mmap) != vma) 33543 printk("find_vma_prev: tree inconsistent with " 33544 "list\n"); 33545 *pprev = prev; 33546 return vma; 33547 } 33548 } 33549 } 33550 *pprev = NULL; 33551 return NULL; 33552 } 33553 33554 /* Normal function to fix up a mapping 33555 * This function is the default for when an area has no 33556 * specific function. This may be used as part of a more 33557 * specific routine. This function works out what part 33558 * of an area is affected and adjusts the mapping 33559 * information. Since the actual page manipulation is 33560 * done in do_mmap(), none need be done here, though it 33561 * would probably be more appropriate. 33562 * 33563 * By the time this function is called, the area struct 33564 * has been removed from the process mapping list, so it 33565 * needs to be reinserted if necessary. 33566 * 33567 * The 4 main cases are: 33568 * Unmapping the whole area 33569 * Unmapping from the start of the seg to a point in it 33570 * Unmapping from an intermediate point to the end 33571 * Unmapping between to intermediate points, making a 33572 * hole. 33573 * 33574 * Case 4 involves the creation of 2 new areas, for each 33575 * side of the hole. If possible, we reuse the existing 33576 * area rather than allocate a new one, and the return 33577 * indicates whether the old area was reused. */



33578 static int unmap_fixup(struct vm_area_struct *area, 33579 unsigned long addr, size_t len, 33580 struct vm_area_struct **extra) 33581 { 33582 struct vm_area_struct *mpnt; 33583 unsigned long end = addr + len; 33584 33585 area->vm_mm->total_vm -= len >> PAGE_SHIFT; 33586 if (area->vm_flags & VM_LOCKED) 33587 area->vm_mm->locked_vm -= len >> PAGE_SHIFT; 33588 33589 /* Unmapping the whole area. */ 33590 if (addr == area->vm_start && end == area->vm_end) { 33591 if (area->vm_ops && area->vm_ops->close) 33592 area->vm_ops->close(area); 33593 if (area->vm_file) 33594 fput(area->vm_file); 33595 return 0; 33596 } 33597 33598 /* Work out to one of the ends. */ 33599 if (end == area->vm_end) 33600 area->vm_end = addr; 33601 else if (addr == area->vm_start) { 33602 area->vm_offset += (end - area->vm_start); 33603 area->vm_start = end; 33604 } else { 33605 /* Unmapping a hole: 33606 * area->vm_start < addr <= end < area->vm_end */ 33607 /* Add end mapping -- leave beginning for below */ 33608 mpnt = *extra; 33609 *extra = NULL; 33610 33611 mpnt->vm_mm = area->vm_mm; 33612 mpnt->vm_start = end; 33613 mpnt->vm_end = area->vm_end; 33614 mpnt->vm_page_prot = area->vm_page_prot; 33615 mpnt->vm_flags = area->vm_flags; 33616 mpnt->vm_ops = area->vm_ops; 33617 mpnt->vm_offset = 33618 area->vm_offset + (end - area->vm_start); 33619 mpnt->vm_file = area->vm_file; 33620 mpnt->vm_pte = area->vm_pte; 33621 if (mpnt->vm_file) 33622 mpnt->vm_file->f_count++; 33623 if (mpnt->vm_ops && mpnt->vm_ops->open) 33624 mpnt->vm_ops->open(mpnt); 33625 area->vm_end = addr; /* Truncate area */ 33626 insert_vm_struct(current->mm, mpnt); 33627 } 33628 33629 insert_vm_struct(current->mm, area); 33630 return 1; 33631 } 33632 33633 /* Try to free as many page directory entries as we can, 33634 * without having to work very hard at actually scanning 33635 * the page tables themselves. 33636 * 33637 * Right now we try to free page tables if we have a nice 33638 * PGDIR-aligned area that got free'd up. We could be 33639 * more granular if we want to, but this is fast and 33640 * simple, and covers the bad cases. 33641 * 33642 * "prev", if it exists, points to a vma before the one 33643 * we just free'd - but there's no telling how much 33644 * before. */ 33645 static void free_pgtables(struct mm_struct * mm, 33646 struct vm_area_struct *prev, 33647 unsigned long start, unsigned long end) 33648 { 33649 unsigned long first = start & PGDIR_MASK; 33650 unsigned long last = (end + PGDIR_SIZE - 1) & 33651 PGDIR_MASK; 33652 33653 if (!prev) { 33654 prev = mm->mmap; 33655 if (!prev) 33656 goto no_mmaps; 33657 if (prev->vm_end > start) { 33658 if (last > prev->vm_end) 33659 last = prev->vm_end; 33660 goto no_mmaps; 33661 } 33662 } 33663 for (;;) { 33664 struct vm_area_struct *next = prev->vm_next; 33665 33666 if (next) { 33667 if (next->vm_start < start) { 33668 prev = next; 33669 continue; 33670 } 33671 if (last > next->vm_start) 33672 last = next->vm_start; 33673 } 33674 if (prev->vm_end > first) 33675 first = prev->vm_end + PGDIR_SIZE - 1; 33676 break; 33677 } 33678 no_mmaps: 33679 first = first >> PGDIR_SHIFT; 33680 last = last >> PGDIR_SHIFT; 33681 if (last > first) 33682 clear_page_tables(mm, first, last-first); 33683 } 33684 33685 /* Munmap is split into 2 main parts -- this part which 33686 * finds what needs doing, and the areas themselves, 33687 * which do the work. This now handles partial 33688 * unmappings. Jeremy Fitzhardine <jeremy@sw.oz.au> */



33689 int do_munmap( unsigned long addr, size_t len) 33690 { 33691 struct mm_struct * mm; 33692 struct vm_area_struct *mpnt, *prev, **npp, *free, 33693 *extra; 33694 33695 if ((addr & ~PAGE_MASK) addr > TASK_SIZE 33696 len > TASK_SIZE-addr) 33697 return -EINVAL; 33698 33699 if ((len = PAGE_ALIGN(len)) == 0) 33700 return -EINVAL; 33701 33702 /* Check if this memory area is ok - put it on the 33703 * temporary list if so.. The checks here are pretty 33704 * simple -- every area affected in some way (by any 33705 * overlap) is put on the list. If nothing is put on, 33706 * nothing is affected. */ 33707 mm = current->mm; 33708 mpnt = find_vma_prev(mm, addr, &prev); 33709 if (!mpnt) 33710 return 0; 33711 /* we have addr < mpnt->vm_end */ 33712 33713 if (mpnt->vm_start >= addr+len) 33714 return 0; 33715 33716 /* If we'll make "hole", check the vm areas limit */ 33717 if ((mpnt->vm_start < addr && mpnt->vm_end > addr+len) 33718 && mm->map_count >= MAX_MAP_COUNT) 33719 return -ENOMEM; 33720 33721 /* We may need one additional vma to fix up the 33722 * mappings ... and this is the last chance for an 33723 * easy error exit. */ 33724 extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); 33725 if (!extra) 33726 return -ENOMEM; 33727 33728 npp = (prev ? &prev->vm_next : &mm->mmap); 33729 free = NULL; 33730 for (; mpnt && mpnt->vm_start < addr+len; mpnt = *npp){ 33731 *npp = mpnt->vm_next; 33732 mpnt->vm_next = free; 33733 free = mpnt; 33734 if (mm->mmap_avl) 33735 avl_remove(mpnt, &mm->mmap_avl); 33736 } 33737 33738 /* Ok - we have the memory areas we should free on the 33739 * 'free' list, so release them, and unmap the page 33740 * range.. If the one of the segments is only being 33741 * partially unmapped, it will put new 33742 * vm_area_struct(s) into the address space. */ 33743 while ((mpnt = free) != NULL) { 33744 unsigned long st, end, size; 33745 33746 free = free->vm_next; 33747 33748 st = addr < mpnt->vm_start ? mpnt->vm_start : addr; 33749 end = addr+len; 33750 end = end > mpnt->vm_end ? mpnt->vm_end : end; 33751 size = end - st; 33752 33753 if (mpnt->vm_ops && mpnt->vm_ops->unmap) 33754 mpnt->vm_ops->unmap(mpnt, st, size); 33755 33756 remove_shared_vm_struct(mpnt); 33757 mm->map_count--; 33758 33759 flush_cache_range(mm, st, end); 33760 zap_page_range(mm, st, size); 33761 flush_tlb_range(mm, st, end); 33762 33763 /* Fix the mapping, and free the old area if it 33764 * wasn't reused. */ 33765 if (!unmap_fixup(mpnt, st, size, &extra)) 33766 kmem_cache_free(vm_area_cachep, mpnt); 33767 } 33768 33769 /* Release the extra vma struct if it wasn't used */ 33770 if (extra) 33771 kmem_cache_free(vm_area_cachep, extra); 33772 33773 free_pgtables(mm, prev, addr, addr+len); 33774 33775 mm->mmap_cache = NULL; /* Kill the cache. */ 33776 return 0; 33777 } 33778 33779 asmlinkage int sys_munmap(unsigned long addr, size_t len) 33780 { 33781 int ret; 33782 33783 down(&current->mm->mmap_sem); 33784 lock_kernel(); 33785 ret = do_munmap(addr, len); 33786 unlock_kernel(); 33787 up(&current->mm->mmap_sem); 33788 return ret; 33789 } 33790 33791 /* Build the AVL tree corresponding to the VMA list. */ 33792 void build_mmap_avl(struct mm_struct * mm) 33793 { 33794 struct vm_area_struct * vma; 33795 33796 mm->mmap_avl = NULL; 33797 for (vma = mm->mmap; vma; vma = vma->vm_next) 33798 avl_insert(vma, &mm->mmap_avl); 33799 } 33800 33801 /* Release all mmaps. */ 33802 void exit_mmap(struct mm_struct * mm) 33803 { 33804 struct vm_area_struct * mpnt; 33805 33806 mpnt = mm->mmap; 33807 mm->mmap = mm->mmap_avl = mm->mmap_cache = NULL; 33808 mm->rss = 0; 33809 mm->total_vm = 0; 33810 mm->locked_vm = 0; 33811 while (mpnt) { 33812 struct vm_area_struct * next = mpnt->vm_next; 33813 unsigned long start = mpnt->vm_start; 33814 unsigned long end = mpnt->vm_end; 33815 unsigned long size = end - start; 33816 33817 if (mpnt->vm_ops) { 33818 if (mpnt->vm_ops->unmap) 33819 mpnt->vm_ops->unmap(mpnt, start, size); 33820 if (mpnt->vm_ops->close) 33821 mpnt->vm_ops->close(mpnt); 33822 } 33823 mm->map_count--; 33824 remove_shared_vm_struct(mpnt); 33825 zap_page_range(mm, start, size); 33826 if (mpnt->vm_file) 33827 fput(mpnt->vm_file); 33828 kmem_cache_free(vm_area_cachep, mpnt); 33829 mpnt = next; 33830 } 33831 33832 /* This is just debugging */ 33833 if (mm->map_count) 33834 printk("exit_mmap: map count is %d\n", 33835 mm->map_count); 33836 33837 clear_page_tables(mm, 0, USER_PTRS_PER_PGD); 33838 } 33839 33840 /* Insert vm structure into process list sorted by 33841 * address and into the inode's i_mmap ring. */ 33842 void insert_vm_struct(struct mm_struct *mm, 33843 struct vm_area_struct *vmp) 33844 { 33845 struct vm_area_struct **pprev; 33846 struct file * file; 33847 33848 if (!mm->mmap_avl) { 33849 pprev = &mm->mmap; 33850 while (*pprev && (*pprev)->vm_start <= vmp->vm_start) 33851 pprev = &(*pprev)->vm_next; 33852 } else { 33853 struct vm_area_struct *prev, *next; 33854 avl_insert_neighbours(vmp, &mm->mmap_avl, 33855 &prev, &next); 33856 pprev = (prev ? &prev->vm_next : &mm->mmap); 33857 if (*pprev != next) 33858 printk("insert_vm_struct: tree inconsistent with " 33859 "list\n"); 33860 } 33861 vmp->vm_next = *pprev; 33862 *pprev = vmp; 33863 33864 mm->map_count++; 33865 if (mm->map_count >= AVL_MIN_MAP_COUNT && 33866 !mm->mmap_avl) 33867 build_mmap_avl(mm); 33868 33869 file = vmp->vm_file; 33870 if (file) { 33871 struct inode * inode = file->f_dentry->d_inode; 33872 if (vmp->vm_flags & VM_DENYWRITE) 33873 inode->i_writecount--; 33874 33875 /* insert vmp into inode's share list */ 33876 if((vmp->vm_next_share = inode->i_mmap) != NULL) 33877 inode->i_mmap->vm_pprev_share = 33878 &vmp->vm_next_share; 33879 inode->i_mmap = vmp; 33880 vmp->vm_pprev_share = &inode->i_mmap; 33881 } 33882 } 33883 33884 /* Merge the list of memory segments if possible. 33885 * Redundant vm_area_structs are freed. This assumes 33886 * that the list is ordered by address. We don't need to 33887 * traverse the entire list, only those segments which 33888 * intersect or are adjacent to a given interval. 33889 * 33890 * We must already hold the mm semaphore when we get 33891 * here.. */



33892 void merge_segments (struct mm_struct * mm, 33893 unsigned long start_addr, unsigned long end_addr) 33894 { 33895 struct vm_area_struct *prev, *mpnt, *next, *prev1; 33896 33897 mpnt = find_vma_prev(mm, start_addr, &prev1); 33898 if (!mpnt) 33899 return; 33900 33901 if (prev1) { 33902 prev = prev1; 33903 } else { 33904 prev = mpnt; 33905 mpnt = mpnt->vm_next; 33906 } 33907 33908 /* prev and mpnt cycle through the list, as long as 33909 * start_addr < mpnt->vm_end && 33910 * prev->vm_start < end_addr */ 33911 for ( ; mpnt && prev->vm_start < end_addr; 33912 prev = mpnt, mpnt = next) { 33913 next = mpnt->vm_next; 33914 33915 /* To share, we must have the same file, 33916 * operations.. */ 33917 if ((mpnt->vm_file != prev->vm_file) 33918 (mpnt->vm_pte != prev->vm_pte) 33919 (mpnt->vm_ops != prev->vm_ops) 33920 (mpnt->vm_flags != prev->vm_flags) 33921 (prev->vm_end != mpnt->vm_start)) 33922 continue; 33923 33924 /* If we have a file or it's a shared memory area the 33925 * offsets must be contiguous.. */ 33926 if ((mpnt->vm_file != NULL) 33927 (mpnt->vm_flags & VM_SHM)) { 33928 unsigned long off = 33929 prev->vm_offset+prev->vm_end-prev->vm_start; 33930 if (off != mpnt->vm_offset) 33931 continue; 33932 } 33933 33934 /* merge prev with mpnt and set up pointers so the 33935 * new big segment can possibly merge with the next 33936 * one. The old unused mpnt is freed. */ 33937 if (mm->mmap_avl) 33938 avl_remove(mpnt, &mm->mmap_avl); 33939 prev->vm_end = mpnt->vm_end; 33940 prev->vm_next = mpnt->vm_next; 33941 if (mpnt->vm_ops && mpnt->vm_ops->close) { 33942 mpnt->vm_offset += mpnt->vm_end - mpnt->vm_start; 33943 mpnt->vm_start = mpnt->vm_end; 33944 mpnt->vm_ops->close(mpnt); 33945 } 33946 mm->map_count--; 33947 remove_shared_vm_struct(mpnt); 33948 if (mpnt->vm_file) 33949 fput(mpnt->vm_file); 33950 kmem_cache_free(vm_area_cachep, mpnt); 33951 mpnt = prev; 33952 } 33953 mm->mmap_cache = NULL; /* Kill the cache. */ 33954 } 33955 33956 void __init vma_init(void) 33957 { 33958 vm_area_cachep = kmem_cache_create("vm_area_struct", 33959 sizeof(struct vm_area_struct), 33960 0, SLAB_HWCACHE_ALIGN, 33961 NULL, NULL); 33962 if(!vm_area_cachep) 33963 panic("vma_init: Cannot alloc vm_area_struct cache."); 33964 33965 mm_cachep = kmem_cache_create("mm_struct", 33966 sizeof(struct mm_struct), 33967 0, SLAB_HWCACHE_ALIGN, 33968 NULL, NULL); 33969 if(!mm_cachep) 33970 panic("vma_init: Cannot alloc mm_struct cache."); 33971 }


Содержание раздела