Ядро Linux в комментариях

       

Arch/i386/kernel/process.c


1911 /* 1912 * linux/arch/i386/kernel/process.c 1913 * 1914 * Copyright (C) 1995 Linus Torvalds 1915 */ 1916 1917 /* This file handles the architecture-dependent parts of 1918 * process handling.. */ 1919 1920 #define __KERNEL_SYSCALLS__ 1921 #include <stdarg.h> 1922 1923 #include <linux/errno.h> 1924 #include <linux/sched.h> 1925 #include <linux/kernel.h> 1926 #include <linux/mm.h> 1927 #include <linux/smp.h> 1928 #include <linux/smp_lock.h> 1929 #include <linux/stddef.h> 1930 #include <linux/unistd.h> 1931 #include <linux/ptrace.h> 1932 #include <linux/malloc.h> 1933 #include <linux/vmalloc.h> 1934 #include <linux/user.h> 1935 #include <linux/a.out.h> 1936 #include <linux/interrupt.h> 1937 #include <linux/config.h> 1938 #include <linux/unistd.h> 1939 #include <linux/delay.h> 1940 #include <linux/smp.h> 1941 #include <linux/reboot.h> 1942 #include <linux/init.h> 1943 #if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF) 1944 #include <linux/apm_bios.h> 1945 #endif 1946 1947 #include <asm/uaccess.h> 1948 #include <asm/pgtable.h> 1949 #include <asm/system.h> 1950 #include <asm/io.h> 1951 #include <asm/ldt.h> 1952 #include <asm/processor.h> 1953 #include <asm/desc.h> 1954 #ifdef CONFIG_MATH_EMULATION 1955 #include <asm/math_emu.h> 1956 #endif 1957 1958 #include "irq.h" 1959 1960 spinlock_t semaphore_wake_lock = SPIN_LOCK_UNLOCKED; 1961 1962 asmlinkage void ret_from_fork(void) 1963 __asm__("ret_from_fork"); 1964 1965 #ifdef CONFIG_APM 1966 extern int apm_do_idle(void); 1967 extern void apm_do_busy(void); 1968 #endif 1969 1970 static int hlt_counter=0; 1971 1972 #define HARD_IDLE_TIMEOUT (HZ / 3) 1973 1974 void disable_hlt(void) 1975 { 1976 hlt_counter++; 1977 } 1978 1979 void enable_hlt(void) 1980 { 1981 hlt_counter--; 1982 } 1983 1984 #ifndef __SMP__ 1985 1986 static void hard_idle(void) 1987 { 1988 while (!current->need_resched) { 1989 if (boot_cpu_data.hlt_works_ok && !hlt_counter) { 1990 #ifdef CONFIG_APM 1991 /* If the APM BIOS is not enabled, or there 1992 is an error calling the idle routine, we 1993 should hlt if possible. We need to check 1994 need_resched again because an interrupt 1995 may have occurred in apm_do_idle(). */ 1996 start_bh_atomic(); 1997 if (!apm_do_idle() && !current->need_resched) 1998 __asm__("hlt"); 1999 end_bh_atomic(); 2000 #else 2001 __asm__("hlt"); 2002 #endif 2003 } 2004 if (current->need_resched) 2005 break; 2006 schedule(); 2007 } 2008 #ifdef CONFIG_APM 2009 apm_do_busy(); 2010 #endif 2011 } 2012 2013 /* The idle loop on a uniprocessor i386.. */ 2014 static int cpu_idle(void *unused) 2015 { 2016 int work = 1; 2017 unsigned long start_idle = 0; 2018 2019 /* endless idle loop with no priority at all */ 2020 current->priority = 0; 2021 current->counter = -100; 2022 for (;;) { 2023 if (work) 2024 start_idle = jiffies; 2025 2026 if (jiffies - start_idle > HARD_IDLE_TIMEOUT) 2027 hard_idle(); 2028 else { 2029 if (boot_cpu_data.hlt_works_ok && 2030 !hlt_counter && !current->need_resched) 2031 __asm__("hlt"); 2032 } 2033 2034 work = current->need_resched; 2035 schedule(); 2036 check_pgt_cache(); 2037 } 2038 } 2039 2040 #else 2041 2042 /* This is being executed in task 0 'user space'. */ 2043 2044 int cpu_idle(void *unused) 2045 { 2046 /* endless idle loop with no priority at all */ 2047 current->priority = 0; 2048 current->counter = -100; 2049 while(1) { 2050 if (current_cpu_data.hlt_works_ok && !hlt_counter && 2051 !current->need_resched) 2052 __asm__("hlt"); 2053 /* although we are an idle CPU, we do not want to get 2054 * into the scheduler unnecessarily. */ 2055 if (current->need_resched) { 2056 schedule(); 2057 check_pgt_cache(); 2058 } 2059 } 2060 } 2061 2062 #endif 2063 2064 asmlinkage int sys_idle(void) 2065 { 2066 if (current->pid != 0) 2067 return -EPERM; 2068 cpu_idle(NULL); 2069 return 0; 2070 } 2071 2072 /* This routine reboots the machine by asking the 2073 * keyboard controller to pulse the reset-line low. We 2074 * try that for a while, and if it doesn't work, we do 2075 * some other stupid things. */ 2076 2077 static long no_idt[2] = {0, 0}; 2078 static int reboot_mode = 0; 2079 static int reboot_thru_bios = 0; 2080 2081 __initfunc(void reboot_setup(char *str, int *ints)) 2082 { 2083 while(1) { 2084 switch (*str) { 2085 case 'w': /* "warm" reboot (no memory testing etc) */ 2086 reboot_mode = 0x1234; 2087 break; 2088 case 'c': /* "cold" reboot (w/ memory testing etc) */ 2089 reboot_mode = 0x0; 2090 break; 2091 case 'b': /* "bios" reboot by jumping thru the BIOS*/ 2092 reboot_thru_bios = 1; 2093 break; 2094 case 'h': 2095 /* "hard" reboot by toggling RESET and/or crashing 2096 * the CPU */ 2097 reboot_thru_bios = 0; 2098 break; 2099 } 2100 if((str = strchr(str,',')) != NULL) 2101 str++; 2102 else 2103 break; 2104 } 2105 } 2106 2107 /* The following code and data reboots the machine by 2108 * switching to real mode and jumping to the BIOS reset 2109 * entry point, as if the CPU has really been reset. The 2110 * previous version asked the keyboard controller to 2111 * pulse the CPU reset line, which is more thorough, but 2112 * doesn't work with at least one type of 486 2113 * motherboard. It is easy to stop this code working; 2114 * hence the copious comments. */ 2115 static unsigned long long 2116 real_mode_gdt_entries [3] = 2117 { 2118 0x0000000000000000ULL, /* Null descriptor */ 2119 /* 16-bit real-mode 64k code at 0x00000000 */ 2120 0x00009a000000ffffULL, 2121 /* 16-bit real-mode 64k data at 0x00000100 */ 2122 0x000092000100ffffULL 2123 }; 2124 2125 static struct 2126 { 2127 unsigned short size __attribute__ ((packed)); 2128 unsigned long long * base __attribute__ ((packed)); 2129 } 2130 real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, 2131 real_mode_gdt_entries }, 2132 real_mode_idt = { 0x3ff, 0 }; 2133 2134 /* This is 16-bit protected mode code to disable paging 2135 and the cache, switch to real mode and jump to the 2136 BIOS reset code. 2137 2138 The instruction that switches to real mode by writing 2139 to CR0 must be followed immediately by a far jump 2140 instruction, which set CS to a valid value for real 2141 mode, and flushes the prefetch queue to avoid running 2142 instructions that have already been decoded in 2143 protected mode. 2144 2145 Clears all the flags except ET, especially PG 2146 (paging), PE (protected-mode enable) and TS (task 2147 switch for coprocessor state save). Flushes the TLB 2148 after paging has been disabled. Sets CD and NW, to 2149 disable the cache on a 486, and invalidates the cache. 2150 This is more like the state of a 486 after reset. I 2151 don't know if something else should be done for other 2152 chips. 2153 2154 More could be done here to set up the registers as if 2155 a CPU reset had occurred; hopefully real BIOSs don't 2156 assume much. */ 2157 2158 static unsigned char real_mode_switch [] = 2159 { 2160 0x66, 0x0f, 0x20, 0xc0, /*movl %cr0,%eax */ 2161 0x66, 0x83, 0xe0, 0x11, /*andl $0x00000011,%eax*/ 2162 /*orl $0x60000000,%eax*/ 2163 0x66, 0x0d, 0x00, 0x00, 0x00, 0x60, 2164 0x66, 0x0f, 0x22, 0xc0, /*movl %eax,%cr0 */ 2165 0x66, 0x0f, 0x22, 0xd8, /*movl %eax,%cr3 */ 2166 0x66, 0x0f, 0x20, 0xc3, /*movl %cr0,%ebx */ 2167 /*andl $0x60000000,%ebx*/ 2168 0x66, 0x81, 0xe3, 0x00, 0x00, 0x00, 0x60, 2169 0x74, 0x02, /*jz f */ 2170 0x0f, 0x08, /*invd */ 2171 0x24, 0x10, /*f: andb $0x10,al*/ 2172 0x66, 0x0f, 0x22, 0xc0, /*movl %eax,%cr0*/ 2173 0xea, 0x00, 0x00, 0xff, 0xff /*ljmp $0xffff,$0x0000*/ 2174 }; 2175 2176 static inline void kb_wait(void) 2177 { 2178 int i; 2179 2180 for (i=0; i<0x10000; i++) 2181 if ((inb_p(0x64) & 0x02) == 0) 2182 break; 2183 } 2184 2185 void machine_restart(char * __unused) 2186 { 2187 #if __SMP__ 2188 /* turn off the IO-APIC, so we can do a clean reboot */ 2189 init_pic_mode(); 2190 #endif 2191 2192 if(!reboot_thru_bios) { 2193 /* rebooting needs to touch the page at abs addr 0 */ 2194 *((unsigned short *)__va(0x472)) = reboot_mode; 2195 for (;;) { 2196 int i; 2197 for (i=0; i<100; i++) { 2198 kb_wait(); 2199 udelay(50); 2200 outb(0xfe,0x64); /* pulse reset low */ 2201 udelay(50); 2202 } 2203 /* That didn't work - force a triple fault.. */ 2204 __asm__ __volatile__("lidt %0": :"m" (no_idt)); 2205 __asm__ __volatile__("int3"); 2206 } 2207 } 2208 2209 cli(); 2210 2211 /* Write zero to CMOS register number 0x0f, which the 2212 BIOS POST routine will recognize as telling it to do 2213 a proper reboot. (Well that's what this book in 2214 front of me says -- it may only apply to the Phoenix 2215 BIOS though, it's not clear). At the same time, 2216 disable NMIs by setting the top bit in the CMOS 2217 address register, as we're about to do peculiar 2218 things to the CPU. I'm not sure if `outb_p' is 2219 needed instead of just `outb'. Use it to be on the 2220 safe side. */ 2221 2222 outb_p (0x8f, 0x70); 2223 outb_p (0x00, 0x71); 2224 2225 /* Remap the kernel at virtual address zero, as well as 2226 offset zero from the kernel segment. This assumes 2227 the kernel segment starts at virtual address 2228 PAGE_OFFSET. */ 2229 2230 memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, 2231 sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); 2232 2233 /* Make sure the first page is mapped to the start of 2234 physical memory. It is normally not mapped, to trap 2235 kernel NULL pointer dereferences. */ 2236 2237 pg0[0] = _PAGE_RW | _PAGE_PRESENT; 2238 2239 /* Use `swapper_pg_dir' as our page directory. We 2240 * bother with `SET_PAGE_DIR' because although might be 2241 * rebooting, but if we change the way we set root page 2242 * dir in the future, then we wont break a seldom used 2243 * feature ;) */ 2244 2245 SET_PAGE_DIR(current,swapper_pg_dir); 2246 2247 /* Write 0x1234 to absolute memory location 0x472. The 2248 BIOS reads this on booting to tell it to "Bypass 2249 memory test (also warm boot)". This seems like a 2250 fairly standard thing that gets set by REBOOT.COM 2251 programs, and the previous reset routine did this 2252 too. */ 2253 2254 *((unsigned short *)0x472) = reboot_mode; 2255 2256 /* For the switch to real mode, copy some code to low 2257 memory. It has to be in the first 64k because it is 2258 running in 16-bit mode, and it has to have the same 2259 physical and virtual address, because it turns off 2260 paging. Copy it near the end of the first page, out 2261 of the way of BIOS variables. */ 2262 2263 memcpy ((void *) (0x1000 - sizeof (real_mode_switch)), 2264 real_mode_switch, sizeof (real_mode_switch)); 2265 2266 /* Set up the IDT for real mode. */ 2267 2268 __asm__ __volatile__ 2269 ("lidt %0" : : "m" (real_mode_idt)); 2270 2271 /* Set up a GDT from which we can load segment 2272 descriptors for real mode. The GDT is not used in 2273 real mode; it is just needed here to prepare the 2274 descriptors. */ 2275 2276 __asm__ __volatile__ 2277 ("lgdt %0" : : "m" (real_mode_gdt)); 2278 2279 /* Load the data segment registers, and thus the 2280 descriptors ready for real mode. The base address 2281 of each segment is 0x100, 16 times the selector 2282 value being loaded here. This is so that the 2283 segment registers don't have to be reloaded after 2284 switching to real mode: the values are consistent 2285 for real mode operation already. */ 2286 2287 __asm__ __volatile__ ("movl $0x0010,%%eax\n" 2288 "\tmovl %%ax,%%ds\n" 2289 "\tmovl %%ax,%%es\n" 2290 "\tmovl %%ax,%%fs\n" 2291 "\tmovl %%ax,%%gs\n" 2292 "\tmovl %%ax,%%ss" : : : "eax"); 2293 2294 /* Jump to the 16-bit code that we copied earlier. It 2295 disables paging and the cache, switches to real 2296 mode, and jumps to the BIOS reset entry point. */ 2297 2298 __asm__ __volatile__ ("ljmp $0x0008,%0" 2299 : 2300 : "i" ((void *) (0x1000 - 2301 sizeof (real_mode_switch)))); 2302 } 2303 2304 void machine_halt(void) 2305 {} 2306 2307 void machine_power_off(void) 2308 { 2309 #if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF) 2310 apm_power_off(); 2311 #endif 2312 } 2313 2314 2315 void show_regs(struct pt_regs * regs) 2316 { 2317 long cr0 = 0L, cr2 = 0L, cr3 = 0L; 2318 2319 printk("\n"); 2320 printk("EIP: %04x:[<%08lx>]", 2321 0xffff & regs->xcs,regs->eip); 2322 if (regs->xcs & 3) 2323 printk(" ESP: %04x:%08lx", 2324 0xffff & regs->xss,regs->esp); 2325 printk(" EFLAGS: %08lx\n",regs->eflags); 2326 printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", 2327 regs->eax,regs->ebx,regs->ecx,regs->edx); 2328 printk("ESI: %08lx EDI: %08lx EBP: %08lx", 2329 regs->esi, regs->edi, regs->ebp); 2330 printk(" DS: %04x ES: %04x\n", 2331 0xffff & regs->xds,0xffff & regs->xes); 2332 __asm__("movl %%cr0, %0": "=r" (cr0)); 2333 __asm__("movl %%cr2, %0": "=r" (cr2)); 2334 __asm__("movl %%cr3, %0": "=r" (cr3)); 2335 printk("CR0: %08lx CR2: %08lx CR3: %08lx\n", 2336 cr0, cr2, cr3); 2337 } 2338 2339 /* Allocation and freeing of basic task resources. 2340 * 2341 * NOTE! The task struct and the stack go together 2342 * 2343 * The task structure is a two-page thing, and as such 2344 * not reliable to allocate using the basic page alloc 2345 * functions. We have a small cache of structures for 2346 * when the allocations fail.. 2347 * 2348 * This extra buffer essentially acts to make for less 2349 * "jitter" in the allocations.. 2350 * 2351 * On SMP we don't do this right now because: 2352 * - we aren't holding any locks when called, and we 2353 * might as well just depend on the generic memory 2354 * management to do proper locking for us instead of 2355 * complicating it here. 2356 * - if you use SMP you have a beefy enough machine that 2357 * this shouldn't matter.. */ 2358 #ifndef __SMP__ 2359 #define EXTRA_TASK_STRUCT 16 2360 static struct task_struct * 2361 task_struct_stack[EXTRA_TASK_STRUCT]; 2362 static int task_struct_stack_ptr = -1; 2363 #endif 2364 2365 struct task_struct * alloc_task_struct(void) 2366 { 2367 #ifndef EXTRA_TASK_STRUCT 2368 return (struct task_struct *) 2369 __get_free_pages(GFP_KERNEL,1); 2370 #else 2371 int index; 2372 struct task_struct *ret; 2373 2374 index = task_struct_stack_ptr; 2375 if (index >= EXTRA_TASK_STRUCT/2) 2376 goto use_cache; 2377 ret = (struct task_struct *) 2378 __get_free_pages(GFP_KERNEL,1); 2379 if (!ret) { 2380 index = task_struct_stack_ptr; 2381 if (index >= 0) { 2382 use_cache: 2383 ret = task_struct_stack[index]; 2384 task_struct_stack_ptr = index-1; 2385 } 2386 } 2387 return ret; 2388 #endif 2389 } 2390 2391 void free_task_struct(struct task_struct *p) 2392 { 2393 #ifdef EXTRA_TASK_STRUCT 2394 int index = task_struct_stack_ptr+1; 2395 2396 if (index < EXTRA_TASK_STRUCT) { 2397 task_struct_stack[index] = p; 2398 task_struct_stack_ptr = index; 2399 } else 2400 #endif 2401 free_pages((unsigned long) p, 1); 2402 } 2403 2404 void release_segments(struct mm_struct *mm) 2405 { 2406 if (mm->segments) { 2407 void * ldt = mm->segments; 2408 mm->segments = NULL; 2409 vfree(ldt); 2410 } 2411 } 2412 2413 void forget_segments(void) 2414 { 2415 /* forget local segments */ 2416 __asm__ __volatile__("movl %w0,%%fs ; movl %w0,%%gs" 2417 : /* no outputs */ 2418 : "r" (0)); 2419 2420 /* Get the LDT entry from init_task. */ 2421 current->tss.ldt = _LDT(0); 2422 load_ldt(0); 2423 } 2424 2425 /* Create a kernel thread */ 2426 int kernel_thread(int (*fn)(void *), void * arg, 2427 unsigned long flags) 2428 { 2429 long retval, d0; 2430 2431 __asm__ __volatile__( 2432 "movl %%esp,%%esi\n\t" 2433 "int $0x80\n\t" /* Linux/i386 system call */ 2434 "cmpl %%esp,%%esi\n\t" /* child or parent? */ 2435 "je 1f\n\t" /* parent - jump */ 2436 /* Load the argument into eax, and push it. That 2437 * way, it does not matter whether the called 2438 * function is compiled with -mregparm or not. */ 2439 "movl %4,%%eax\n\t" 2440 "pushl %%eax\n\t" 2441 "call *%5\n\t" /* call fn */ 2442 "movl %3,%0\n\t" /* exit */ 2443 "int $0x80\n" 2444 "1:\t" 2445 :"=&a" (retval), "=&S" (d0) 2446 :"0" (__NR_clone), "i" (__NR_exit), 2447 "r" (arg), "r" (fn), 2448 "b" (flags | CLONE_VM) 2449 : "memory"); 2450 return retval; 2451 } 2452 2453 /* Free current thread data structures etc.. */ 2454 void exit_thread(void) 2455 { 2456 /* nothing to do ... */ 2457 } 2458 2459 void flush_thread(void) 2460 { 2461 int i; 2462 struct task_struct *tsk = current; 2463 2464 for (i=0 ; i<8 ; i++) 2465 tsk->tss.debugreg[i] = 0; 2466 2467 /* Forget coprocessor state.. */ 2468 clear_fpu(tsk); 2469 tsk->used_math = 0; 2470 } 2471 2472 void release_thread(struct task_struct *dead_task) 2473 { 2474 } 2475 2476 /* If new_mm is NULL, we're being called to set up the 2477 * LDT descriptor for a clone task. Each clone must have 2478 * a separate entry in the GDT. */ 2479 void copy_segments(int nr, struct task_struct *p, 2480 struct mm_struct *new_mm) 2481 { 2482 struct mm_struct * old_mm = current->mm; 2483 void * old_ldt = old_mm->segments, * ldt = old_ldt; 2484 2485 /* default LDT - use the one from init_task */ 2486 p->tss.ldt = _LDT(0); 2487 if (old_ldt) { 2488 if (new_mm) { 2489 ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE); 2490 new_mm->segments = ldt; 2491 if (!ldt) { 2492 printk(KERN_WARNING "ldt allocation failed\n"); 2493 return; 2494 } 2495 memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE); 2496 } 2497 p->tss.ldt = _LDT(nr); 2498 set_ldt_desc(nr, ldt, LDT_ENTRIES); 2499 return; 2500 } 2501 } 2502 2503 /* Save a segment. */ 2504 #define savesegment(seg,value) \ 2505 asm volatile("movl %%" #seg ",%0":"=m" \ 2506 (*(int *)&(value))) 2507 2508 int copy_thread(int nr, unsigned long clone_flags, 2509 unsigned long esp,struct task_struct * p, 2510 struct pt_regs * regs) 2511 { 2512 struct pt_regs * childregs; 2513 2514 childregs = ((struct pt_regs *) 2515 (2*PAGE_SIZE + (unsigned long) p)) - 1; 2516 *childregs = *regs; 2517 childregs->eax = 0; 2518 childregs->esp = esp; 2519 2520 p->tss.esp = (unsigned long) childregs; 2521 p->tss.esp0 = (unsigned long) (childregs+1); 2522 p->tss.ss0 = __KERNEL_DS; 2523 2524 p->tss.tr = _TSS(nr); 2525 set_tss_desc(nr,&(p->tss)); 2526 p->tss.eip = (unsigned long) ret_from_fork; 2527 2528 savesegment(fs,p->tss.fs); 2529 savesegment(gs,p->tss.gs); 2530 2531 /* a bitmap offset pointing outside of the TSS limit 2532 * causes a nicely controllable SIGSEGV. The first 2533 * sys_ioperm() call sets up the bitmap properly. */ 2534 p->tss.bitmap = sizeof(struct thread_struct); 2535 2536 unlazy_fpu(current); 2537 p->tss.i387 = current->tss.i387; 2538 2539 return 0; 2540 } 2541 2542 /* fill in the FPU structure for a core dump. */ 2543 int dump_fpu(struct pt_regs * regs, 2544 struct user_i387_struct * fpu) 2545 { 2546 int fpvalid; 2547 struct task_struct *tsk = current; 2548 2549 fpvalid = tsk->used_math; 2550 if (fpvalid) { 2551 unlazy_fpu(tsk); 2552 memcpy(fpu,&tsk->tss.i387.hard,sizeof(*fpu)); 2553 } 2554 2555 return fpvalid; 2556 } 2557 2558 /* fill in the user structure for a core dump.. */ 2559 void dump_thread(struct pt_regs * regs, 2560 struct user * dump) 2561 { 2562 int i; 2563 2564 /* changed the size calculations - should hopefully work 2565 better. lbt */ 2566 dump->magic = CMAGIC; 2567 dump->start_code = 0; 2568 dump->start_stack = regs->esp & ~(PAGE_SIZE - 1); 2569 dump->u_tsize = 2570 ((unsigned long) current->mm->end_code) 2571 >> PAGE_SHIFT; 2572 dump->u_dsize = 2573 ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) 2574 >> PAGE_SHIFT; 2575 dump->u_dsize -= dump->u_tsize; 2576 dump->u_ssize = 0; 2577 for (i = 0; i < 8; i++) 2578 dump->u_debugreg[i] = current->tss.debugreg[i]; 2579 2580 if (dump->start_stack < TASK_SIZE) 2581 dump->u_ssize = 2582 ((unsigned long) (TASK_SIZE - dump->start_stack)) 2583 >> PAGE_SHIFT; 2584 2585 dump->regs.ebx = regs->ebx; 2586 dump->regs.ecx = regs->ecx; 2587 dump->regs.edx = regs->edx; 2588 dump->regs.esi = regs->esi; 2589 dump->regs.edi = regs->edi; 2590 dump->regs.ebp = regs->ebp; 2591 dump->regs.eax = regs->eax; 2592 dump->regs.ds = regs->xds; 2593 dump->regs.es = regs->xes; 2594 savesegment(fs,dump->regs.fs); 2595 savesegment(gs,dump->regs.gs); 2596 dump->regs.orig_eax = regs->orig_eax; 2597 dump->regs.eip = regs->eip; 2598 dump->regs.cs = regs->xcs; 2599 dump->regs.eflags = regs->eflags; 2600 dump->regs.esp = regs->esp; 2601 dump->regs.ss = regs->xss; 2602 2603 dump->u_fpvalid = dump_fpu (regs, &dump->i387); 2604 } 2605 2606 /* This special macro can be used to load a debugging 2607 * register */ 2608 #define loaddebug(tsk,register) \ 2609 __asm__("movl %0,%%db" #register \ 2610 : /* no output */ \ 2611 :"r" (tsk->tss.debugreg[register])) 2612 2613 2614 /* switch_to(x,yn) should switch tasks from x to y. 2615 * 2616 * We fsave/fwait so that an exception goes off at the 2617 * right time (as a call from the fsave or fwait in 2618 * effect) rather than to the wrong process. Lazy FP 2619 * saving no longer makes any sense with modern CPU's, 2620 * and this simplifies a lot of things (SMP and UP become 2621 * the same). 2622 * 2623 * NOTE! We used to use the x86 hardware context 2624 * switching. The reason for not using it any more 2625 * becomes apparent when you try to recover gracefully 2626 * from saved state that is no longer valid (stale 2627 * segment register values in particular). With the 2628 * hardware task-switch, there is no way to fix up bad 2629 * state in a reasonable manner. 2630 * 2631 * The fact that Intel documents the hardware 2632 * task-switching to be slow is a fairly red herring - 2633 * this code is not noticeably faster. However, there 2634 * _is_ some room for improvement here, so the 2635 * performance issues may eventually be a valid point. 2636 * More important, however, is the fact that this allows 2637 * us much more flexibility. */ 2638 void __switch_to(struct task_struct *prev, 2639 struct task_struct *next) 2640 { 2641 /* Save FPU and set TS if it wasn't set before.. */ 2642 unlazy_fpu(prev); 2643 2644 /* Reload TR, LDT and the page table pointers.. 2645 * 2646 * We need TR for the IO permission bitmask (and the 2647 * vm86 bitmasks in case we ever use enhanced v86 mode 2648 * properly). 2649 * 2650 * We may want to get rid of the TR register some day, 2651 * and copy the bitmaps around by hand. Oh, well. In 2652 * the meantime we have to clear the busy bit in the 2653 * TSS entry, ugh. */ 2654 gdt_table[next->tss.tr >> 3].b &= 0xfffffdff; 2655 asm volatile("ltr %0": :"g" 2656 (*(unsigned short *)&next->tss.tr)); 2657 2658 /* Save away %fs and %gs. No need to save %es and %ds, 2659 * as those are always kernel segments while inside the 2660 * kernel. */ 2661 asm volatile("movl %%fs,%0":"=m" 2662 (*(int *)&prev->tss.fs)); 2663 asm volatile("movl %%gs,%0":"=m" 2664 (*(int *)&prev->tss.gs)); 2665 2666 /* Re-load LDT if necessary */ 2667 if (next->mm->segments != prev->mm->segments) 2668 asm volatile("lldt %0": :"g" 2669 (*(unsigned short *)&next->tss.ldt)); 2670 2671 /* Re-load page tables */ 2672 { 2673 unsigned long new_cr3 = next->tss.cr3; 2674 if (new_cr3 != prev->tss.cr3) 2675 asm volatile("movl %0,%%cr3": :"r" (new_cr3)); 2676 } 2677 2678 /* Restore %fs and %gs. */ 2679 loadsegment(fs,next->tss.fs); 2680 loadsegment(gs,next->tss.gs); 2681 2682 /* Now maybe reload the debug registers */ 2683 if (next->tss.debugreg[7]){ 2684 loaddebug(next,0); 2685 loaddebug(next,1); 2686 loaddebug(next,2); 2687 loaddebug(next,3); 2688 loaddebug(next,6); 2689 loaddebug(next,7); 2690 } 2691 } 2692 2693 asmlinkage int sys_fork(struct pt_regs regs) 2694 { 2695 return do_fork(SIGCHLD, regs.esp, &regs); 2696 } 2697 2698 asmlinkage int sys_clone(struct pt_regs regs) 2699 { 2700 unsigned long clone_flags; 2701 unsigned long newsp; 2702 2703 clone_flags = regs.ebx; 2704 newsp = regs.ecx; 2705 if (!newsp) 2706 newsp = regs.esp; 2707 return do_fork(clone_flags, newsp, &regs); 2708 } 2709 2710 /* This is trivial, and on the face of it looks like it 2711 * could equally well be done in user mode. 2712 * 2713 * Not so, for quite unobvious reasons - register 2714 * pressure. In user mode vfork() cannot have a stack 2715 * frame, and if done by calling the "clone()" system 2716 * call directly, you do not have enough call-clobbered 2717 * registers to hold all the information you need. */ 2718 asmlinkage int sys_vfork(struct pt_regs regs) 2719 { 2720 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 2721 regs.esp, &regs); 2722 } 2723 2724 /* sys_execve() executes a new program. */ 2725 asmlinkage int sys_execve(struct pt_regs regs) 2726 { 2727 int error; 2728 char * filename; 2729 2730 lock_kernel(); 2731 filename = getname((char *) regs.ebx); 2732 error = PTR_ERR(filename); 2733 if (IS_ERR(filename)) 2734 goto out; 2735 error = do_execve(filename, (char **) regs.ecx, 2736 (char **) regs.edx, &regs); 2737 if (error == 0) 2738 current->flags &= ~PF_DTRACE; 2739 putname(filename); 2740 out: 2741 unlock_kernel(); 2742 return error; 2743 }



Содержание раздела