/dev/mem
/dev/memはページ単位で以下の条件下で、カーネル空間のメモリーを参照するデバイスです。参照範囲がhigh_memory(0x20000:アーキテクチャ依存)以上なら参照不可エラー(EFAULT)で,それ以下の範囲で、参照範囲がiomem下でIORESOURCE_BUSY && IORESOURCE_EXCLUSIVEであるならエラー(EPERM)となります。
なお、IORESOURCE_BUSY && IORESOURCE_EXCLUSIVEでないiomemリソースでも、System RAM配下のリソースならエラー(EPERM)となります。なお、pagenr <= 256(1M:BIOS領域)なら参照OKです。従って00010000-0009f7ff範囲は、System RAMですが参照可能となります。
上記機能はlinux 2.6では実装されておらず、linux3.3でもCONFIG_STRICT_DEVMEMで構築したシステム下で実装されます。
なお、IORESOURCE_BUSY && IORESOURCE_EXCLUSIVEでないiomemリソースでも、System RAM配下のリソースならエラー(EPERM)となります。なお、pagenr <= 256(1M:BIOS領域)なら参照OKです。従って00010000-0009f7ff範囲は、System RAMですが参照可能となります。
上記機能はlinux 2.6では実装されておらず、linux3.3でもCONFIG_STRICT_DEVMEMで構築したシステム下で実装されます。
static const struct file_operations mem_fops = { .llseek = memory_lseek, .read = read_mem, .write = write_mem, .mmap = mmap_mem, .open = open_mem, .get_unmapped_area = get_unmapped_area_mem, }; static ssize_t read_mem(struct file *file, char __user *buf, size_t count, loff_t *ppos) { unsigned long p = *ppos; ssize_t read, sz; char *ptr; if (!valid_phys_addr_range(p, count)) return -EFAULT; read = 0; while (count > 0) { unsigned long remaining; sz = size_inside_page(p, count); if (!range_is_allowed(p >> PAGE_SHIFT, count)) return -EPERM; ptr = xlate_dev_mem_ptr(p); if (!ptr) return -EFAULT; remaining = copy_to_user(buf, ptr, sz); unxlate_dev_mem_ptr(p, ptr); if (remaining) return -EFAULT; buf += sz; p += sz; count -= sz; read += sz; } *ppos += read; return read; } #define __pa(x) ((unsigned long) (x)) static inline int valid_phys_addr_range(unsigned long addr, size_t count) { return addr + count <= __pa(high_memory); } #ifdef CONFIG_STRICT_DEVMEM static inline int range_is_allowed(unsigned long pfn, unsigned long size) { u64 from = ((u64)pfn) << PAGE_SHIFT; u64 to = from + size; u64 cursor = from; while (cursor < to) { if (!devmem_is_allowed(pfn)) { printk(KERN_INFO "Program %s tried to access /dev/mem between %Lx->%Lx.\n", current->comm, from, to); return 0; } cursor += PAGE_SIZE; pfn++; } return 1; } #else static inline int range_is_allowed(unsigned long pfn, unsigned long size) { return 1; } #endif int devmem_is_allowed(unsigned long pagenr) { if (pagenr <= 256) return 1; if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) return 0; if (!page_is_ram(pagenr)) return 1; return 0; } int iomem_is_exclusive(u64 addr) { struct resource *p = &iomem_resource; int err = 0; loff_t l; int size = PAGE_SIZE; if (!strict_iomem_checks) return 0; addr = addr & PAGE_MASK; read_lock(&resource_lock); for (p = p->child; p ; p = r_next(NULL, p, &l)) { if (p->start >= addr + size) break; if (p->end < addr) continue; if (p->flags & IORESOURCE_BUSY && p->flags & IORESOURCE_EXCLUSIVE) { err = 1; break; } } read_unlock(&resource_lock); return err; } int __weak page_is_ram(unsigned long pfn) { return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; } int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)) { struct resource res; unsigned long pfn, end_pfn; u64 orig_end; int ret = -1; res.start = (u64) start_pfn << PAGE_SHIFT; res.end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1; res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; orig_end = res.end; while ((res.start < res.end) && (find_next_system_ram(&res, "System RAM") >= 0)) { pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT; end_pfn = (res.end + 1) >> PAGE_SHIFT; if (end_pfn > pfn) ret = (*func)(pfn, end_pfn - pfn, arg); if (ret) break; res.start = res.end + 1; res.end = orig_end; } return ret; }
検証サンプルiomemより
[root@localhost c]# cat iomem_reso.txt child :00000000-ffffffff : PCI mem: 1.0 [BUSY:0] [EXCLU:0] [MEM:1] child :00000000-0000ffff : reserved: PCI mem 0.1 [BUSY:1] [EXCLU:0] [MEM:1] sibling:00010000-0009f7ff : System RAM: PCI mem 0.1 [BUSY:1] [EXCLU:0] [MEM:1] sibling:0009f800-0009ffff : reserved: PCI mem 0.1 [BUSY:1] [EXCLU:0] [MEM:1] sibling:000a0000-000bffff : PCI Bus 0000:00: PCI mem 1.1 [BUSY:0] [EXCLU:0] [MEM:1] child :000a0000-000bffff : Video RAM area: PCI Bus 0000:00 0.0 [BUSY:1] [EXCLU:0] [MEM:1] sibling:000c0000-000c7fff : Video ROM: PCI mem 0.1 [BUSY:1] [EXCLU:0] [MEM:1] sibling:000ca000-000cafff : Adapter ROM: PCI mem 0.1 [BUSY:1] [EXCLU:0] [MEM:1] sibling:000cb000-000cbfff : Adapter ROM: PCI mem 0.1 [BUSY:1] [EXCLU:0] [MEM:1] sibling:000cc000-000cffff : PCI Bus 0000:00: PCI mem 0.1 [BUSY:0] [EXCLU:0] [MEM:1] sibling:000d0000-000d3fff : PCI Bus 0000:00: PCI mem 0.1 [BUSY:0] [EXCLU:0] [MEM:1] sibling:000d4000-000d7fff : PCI Bus 0000:00: PCI mem 0.1 [BUSY:0] [EXCLU:0] [MEM:1] sibling:000d8000-000dbfff : PCI Bus 0000:00: PCI mem 0.1 [BUSY:0] [EXCLU:0] [MEM:1] sibling:000dc000-000fffff : reserved: PCI mem 1.1 [BUSY:1] [EXCLU:0] [MEM:1] child :000f0000-000fffff : System ROM: reserved 0.0 [BUSY:1] [EXCLU:0] [MEM:1] sibling:00100000-1feeffff : System RAM: PCI mem 1.1 [BUSY:1] [EXCLU:0] [MEM:1] child :00400000-0093411e : Kernel code: System RAM 0.1 [BUSY:1] [EXCLU:0] [MEM:1] sibling:0093411f-00bb6e3f : Kernel data: System RAM 0.1 [BUSY:1] [EXCLU:0] [MEM:1] sibling:00c57000-00d4efff : Kernel bss: System RAM 0.0 [BUSY:1] [EXCLU:0] [MEM:1] sibling:1fef0000-1fefefff : ACPI Tables: PCI mem 0.1 [BUSY:1] [EXCLU:0] [MEM:1] sibling:1feff000-1fefffff : ACPI Non-volatile Storage: PCI mem 0.1 [BUSY:1] [EXCLU:0] [MEM:1] sibling:1ff00000-1fffffff : System RAM: PCI mem 0.1 [BUSY:1] [EXCLU:0] [MEM:1] sibling:20000000-febfffff : PCI Bus 0000:00: PCI mem 1.1 [BUSY:0] [EXCLU:0] [MEM:1] child :20000000-20007fff : 0000:00:0f.0: PCI Bus 0000:00 0.1 [BUSY:0] [EXCLU:0] [MEM:1]
全iomemリソースの参照チェック
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <errno.h> int read_devmem(int fd, unsigned int addr); char *deverr_name(int err, char *buf); void get_adress(char *p, unsigned int *start, unsigned int *end) { sscanf(p + 8, "%x-%x", start, end); } int main(int argc,char **argv) { char buff[500], errname[10]; int fd, err; unsigned int start, end; FILE *fp; char *endp; long value; fd=open("/dev/mem",0); fp = fopen("iomem_reso.txt", "r"); while (fgets(buff, sizeof(buff), fp)) { get_adress(buff, &start, &end); err = read_devmem(fd, start + 0xfff); if (err) { printf("%s:%s", deverr_name(err, errname), buff); } if (start >= 0x20000000) { break; } } fclose(fp); } int read_devmem(int fd, unsigned int addr) { int r, err = 0; char buf[1]; lseek(fd,addr, SEEK_SET); r=read(fd,buf,1); if(r<1) { err = errno; } return err; } char *deverr_name(int err, char *buf) { switch (err) { case EPERM: strcpy(buf, " EPERM"); break; case EFAULT: strcpy(buf, "EFAULT"); break; default: strcpy(buf, " OTHER"); break; } return buf; }dmesgはrange_is_allowed()で、EPERMのみ出力されます。
[root@localhost c]# ./a.out EPERM:child :00400000-0093411e : Kernel code: System RAM 0.1 [BUSY:1] [EXCLU:0] [MEM:1] EPERM:sibling:0093411f-00bb6e3f : Kernel data: System RAM 0.1 [BUSY:1] [EXCLU:0] [MEM:1] EPERM:sibling:00c57000-00d4efff : Kernel bss: System RAM 0.0 [BUSY:1] [EXCLU:0] [MEM:1] EPERM:sibling:1ff00000-1fffffff : System RAM: PCI mem 0.1 [BUSY:1] [EXCLU:0] [MEM:1] EFAULT:sibling:20000000-febfffff : PCI Bus 0000:00: PCI mem 1.1 [BUSY:0] [EXCLU:0] [MEM:1] [root@localhost c]# dmesg [ 918.089364] Program a.out tried to access /dev/mem between 400000->400001. [ 918.094529] Program a.out tried to access /dev/mem between 935000->935001. [ 918.094622] Program a.out tried to access /dev/mem between c57000->c57001. [ 918.095541] Program a.out tried to access /dev/mem between 1ff00000->1ff00001.
System RAMエリアチェックサンプル(fopen/freadsはlkm下のfreadsより)
#include <linux/module.h> #include <linux/kernel.h> struct file *fopen(char *file, int mode); int freads(char *buff, int size, struct file *fp); int fclose(struct file *fp); static int find_next_system_ram(struct resource *res, char *name); static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg) { return 1; } int __weak page_is_ram(unsigned long pfn) { return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; } int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)) { struct resource res; unsigned long pfn, end_pfn; u64 orig_end; int ret = -1; res.start = (u64) start_pfn << PAGE_SHIFT; res.end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1; res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; orig_end = res.end; while ((res.start < res.end) && (find_next_system_ram(&res, "System RAM") >= 0)) { pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT; end_pfn = (res.end + 1) >> PAGE_SHIFT; if (end_pfn > pfn) ret = (*func)(pfn, end_pfn - pfn, arg); if (ret) break; res.start = res.end + 1; res.end = orig_end; } return ret; } static int find_next_system_ram(struct resource *res, char *name) { resource_size_t start, end; struct resource *p; start = res->start; end = res->end; for (p = iomem_resource.child; p ; p = p->sibling) { if (p->flags != res->flags) continue; if (name && strcmp(p->name, name)) continue; if (p->start > end) { p = NULL; break; } if ((p->end >= start) && (p->start < end)) break; } if (!p) return -1; if (res->start < p->start) res->start = p->start; if (res->end > p->end) res->end = p->end; return 0; } void get_adress(char *p, unsigned int *start, unsigned int *end) { sscanf(p + 8, "%x-%x", start, end); } static int __init babakaka_init( void ) { int ret; struct file *fp; char buff[200]; unsigned int start, end; fp = fopen("iomem_reso.txt", 0); if (fp) { printk("System ram\n"); while (freads(buff, sizeof(buff), fp)) { get_adress(buff, &start, &end); start += 0xfff; ret = page_is_ram(start >> PAGE_SHIFT); if (ret) { printk("%s\n", buff); } } fclose(fp); } return -1; } static void __exit babakaka_exit( void ) { } module_init(babakaka_init); module_exit(babakaka_exit);
[root@localhost lkm]# insmod babakaka.ko [root@localhost lkm]# dmesg [ 1861.432741] System ram [ 1861.434028] sibling:00010000-0009f7ff : System RAM: PCI mem 0.1 [BUSY:1] [EXCLU:0] [MEM:1] [ 1861.436773] sibling:00100000-1feeffff : System RAM: PCI mem 1.1 [BUSY:1] [EXCLU:0] [MEM:1] [ 1861.436954] child :00400000-0093411e : Kernel code: System RAM 0.1 [BUSY:1] [EXCLU:0] [MEM:1] [ 1861.437131] sibling:0093411f-00bb6e3f : Kernel data: System RAM 0.1 [BUSY:1] [EXCLU:0] [MEM:1] [ 1861.437307] sibling:00c57000-00d4efff : Kernel bss: System RAM 0.0 [BUSY:1] [EXCLU:0] [MEM:1] [ 1861.438594] sibling:1ff00000-1fffffff : System RAM: PCI mem 0.1 [BUSY:1] [EXCLU:0] [MEM:1]
追記
low_memoryはup to 20000000で、high_memory=20000000という事です。[root@localhost ~]# dmesg | grep "kernel direct mapping" [ 0.000000] kernel direct mapping tables up to 20000000 @ ffb000-1000000