ARM_linux启动后挂死在__arch_clear_user

qazwsx123

ARM_linux启动后挂死在__arch_clear_user [复制链接]

ARM+linux系统，将目标文件烧入板子后，出现
Kernel panic - not syncing: No init found.  Try passing init= option to kernel.
的问题，并不是所有的目标板都是这，开始基本上都是ok的，有的在工地用过几个月后在
开机就进不了系统了，现在检测后发现出现上述问题，不知道是什么原因引起，下面是
内核启动前的打印信息：
Linux version 2.6.13.4 (root@lenovo) (gcc version 3.4.2) #161 Thu Aug 27 19:16:40 CST 2009
CPU: ARM720T [41807202] revision 2 (ARMv4T)
Machine: Hynix HMS30C7202
Warning: bad configuration page, trying to continue
Memory policy: ECC disabled, Data cache writeback
Built 1 zonelists
Kernel command line: console=ttyS3,9600 mem=16M rootfstype=jffs2  root=/dev/mtdblock2 rw
Trying to install chained handler for IRQ16
PID hash table entries: 128 (order: 7, 2048 bytes)
Console: colour dummy device 80x30
Dentry cache hash table entries: 4096 (order: 2, 16384 bytes)
Inode-cache hash table entries: 2048 (order: 1, 8192 bytes)
Memory: 16MB = 16MB total
Memory: 14732KB available (1145K code, 231K data, 60K init)
Mount-cache hash table entries: 512
CPU: Testing write buffer coherency: ok
Linux NoNET1.0 for Linux 2.6
NetWinder Floating Point Emulator V0.97 (double precision)
JFFS2 version 2.2. (NAND) (C) 2001-2003 Red Hat, Inc.
Initializing Cryptographic API
h720x Framebuffer driver
7202io: Copyright (C) 2005, forsafe weigang
h7202 PS/2 port driver
Serial: 8250/16550 driver $Revision: 1.90 $ 4 ports, IRQ sharing disabled
ttyS0 at MMIO 0x80020000 (irq =  is a 16550A
ttyS1 at MMIO 0x80021000 (irq = 9) is a 16550A
ttyS2 at MMIO 0x8002d000 (irq = 10) is a 16550A
ttyS3 at MMIO 0x8002e000 (irq = 11) is a 16550A
io scheduler noop registered
io scheduler anticipatory registered
io scheduler deadline registered
io scheduler cfq registered
H720x-MTD probing 16bit FLASH
H720X: Found 1 x16 devices at 0x0 in 16-bit bank
Amd/Fujitsu Extended Query Table at 0x0040
number of CFI chips: 1
cfi_cmdset_0002: Disabling erase-suspend-program due to code brokenness.
cmdlinepart partition parsing not available
H720x-MTD probing 16bit FLASH
H720X1: Found 1 x16 devices at 0x0 in 16-bit bank
Amd/Fujitsu Extended Query Table at 0x0040
number of CFI chips: 1
cfi_cmdset_0002: Disabling erase-suspend-program due to code brokenness.
cmdlinepart partition parsing not available
Using builtin partition table
Creating 3 MTD partitions on "H720X":
0x00000000-0x00040000 : "ArMon"
0x00040000-0x001c0000 : "Kernel"
0x001c0000-0x00400000 : "jffs2"
Using builtin partition table
Creating 3 MTD partitions on "H720X1":
0x00000000-0x00380000 : "user"
0x00380000-0x003c0000 : "firecord"
0x003c0000-0x00400000 : "applycation"
h7202_udc: Hynix HMS30C7202 USB Device Controller (2004-05-21)
mice: PS/2 mouse device common for all mice
root delay
Waiting 1sec before mounting root device...
input: AT Raw Set 2 keyboard on h7202ps2
VFS: Mounted root (jffs2 filesystem).
Freeing init memory: 60K
/sbin/init by zhou test
/etc/init by zhou test
/bin/init by zhou test
/bin/sh by zhou test
Kernel panic - not syncing: No init found.  Try passing init= option to kernel.
C/C++ code
跟踪后发现，系统挂在了search_binary_handler函数中
在init中调用run_init_process("/sbin/init");时，挂死在fn处，返回值是-14，不知道为什么
int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
{
      int try,retval;
      struct linux_binfmt *fmt;
#ifdef __alpha__
      /* handle /sbin/loader.. */
      {
         struct exec * eh = (struct exec *) bprm->buf;

         if (!bprm->loader && eh->fh.f_magic == 0x183 &&
            (eh->fh.f_flags & 0x3000) == 0x3000)
         {
            struct file * file;
            unsigned long loader;

            allow_write_access(bprm->file);
            fput(bprm->file);
            bprm->file = NULL;

            loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);

            file = open_exec("/sbin/loader");
            retval = PTR_ERR(file);
            printk("search binary handler retval=%d\n", retval);
            if (IS_ERR(file))
                     return retval;

            /* Remember if the application is TASO.  */
            bprm->sh_bang = eh->ah.entry < 0x100000000UL;

            bprm->file = file;
            bprm->loader = loader;
            retval = prepare_binprm(bprm);
            printk("SBH prepare bin retval=%d\n", retval);
            if (retval<0)
                     return retval;
            /* should call search_binary_handler recursively here,
               but it does not matter */
         }
      }
#endif
      retval = security_bprm_check(bprm);
      printk("SBH sevur rtval=%d\n", retval);
      if (retval)
            return retval;

      /* kernel module loader fixup */
      /* so we don't try to load run modprobe in kernel space. */
      set_fs(USER_DS);
      retval = -ENOENT;
      for (try=0; try<2; try++) {
            read_lock(&binfmt_lock);
            for (fmt = formats ; fmt ; fmt = fmt->next) {
                     int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
                     if (!fn)
                              continue;
                     if (!try_module_get(fmt->module))
                              continue;
                     read_unlock(&binfmt_lock);
                     retval = fn(bprm, regs);
                     printk("fn[%d] retval=%d\n",fmt, retval);//挂死在此处，返回值为-14
                     if (retval >= 0) {
                              put_binfmt(fmt);
                              allow_write_access(bprm->file);
                              if (bprm->file)
                                    fput(bprm->file);
                              bprm->file = NULL;
                              current->did_exec = 1;
                              return retval;
                     }
                     read_lock(&binfmt_lock);
                     put_binfmt(fmt);
                     if (retval != -ENOEXEC || bprm->mm == NULL)
                              break;
                     if (!bprm->file) {
                              read_unlock(&binfmt_lock);
                              return retval;
                     }
            }
            read_unlock(&binfmt_lock);
            if (retval != -ENOEXEC || bprm->mm == NULL) {
                     break;
#ifdef CONFIG_KMOD
            }else{
#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
                     if (printable(bprm->buf[0]) &&
                        printable(bprm->buf[1]) &&
                        printable(bprm->buf[2]) &&
                        printable(bprm->buf[3]))
                              break; /* -ENOEXEC */
                     request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
#endif
            }
      }
      printk("SBH end of retval=%d\n", retval);
      return retval;
}
继续跟踪后，发现是在fn的load_elf_binary中的padzero(elf_bss)出错
static int padzero(unsigned long elf_bss)
{
      unsigned long nbyte;

      nbyte = ELF_PAGEOFFSET(elf_bss);
      printk("nbyte =%d elf bsst=%d\n", nbyte, elf_bss);
      if (nbyte) {
            nbyte = ELF_MIN_ALIGN - nbyte;
            if (clear_user((void __user *) elf_bss, nbyte)) {
                     printk("return -14\n");//正常的板子不会进入此处，
                     return -EFAULT;
            }
      }
      return 0;
}

不知道是什么原因造成的，怀疑是MMU没有初始化好，导致的不稳定？？郁闷

beyondaymk

因问题紧急，现有偿寻求能解决该问题的高手，
我司在深圳科技园
联系电话：0755-86315039 周工

ARM_linux启动后挂死在__arch_clear_user [复制链接]

最新回复

浏览过的版块