From: Juha Yrjola Date: Mon, 12 Mar 2007 17:20:00 +0000 (+0200) Subject: Add the lowmem module X-Git-Tag: v2.6.21-omap1~87 X-Git-Url: http://pilppa.com/gitweb/?a=commitdiff_plain;h=92173f9a32f0ceefd511c617c69dda6c03a20ec8;p=linux-2.6-omap-h63xx.git Add the lowmem module lowmem is an LSM module for notifying user space about low memory situations, and for tweaking the vm_enough_memory() implementation to be better suited for embedded devices. Signed-off-by: Juha Yrjola --- diff --git a/security/Kconfig b/security/Kconfig index 460e5c9cf49..873adfc84de 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -93,6 +93,14 @@ config SECURITY_ROOTPLUG If you are unsure how to answer this question, answer N. +config SECURITY_LOWMEM + tristate "Low memory watermark support" + depends on SECURITY + help + Implements low memory watermark support. + + If you are unsure how to answer this question, answer N. + source security/selinux/Kconfig endmenu diff --git a/security/Makefile b/security/Makefile index ef87df2f50a..9d5b0aade3d 100644 --- a/security/Makefile +++ b/security/Makefile @@ -16,3 +16,4 @@ obj-$(CONFIG_SECURITY) += security.o dummy.o inode.o obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o obj-$(CONFIG_SECURITY_CAPABILITIES) += commoncap.o capability.o obj-$(CONFIG_SECURITY_ROOTPLUG) += commoncap.o root_plug.o +obj-$(CONFIG_SECURITY_LOWMEM) += commoncap.o lowmem.o diff --git a/security/lowmem.c b/security/lowmem.c new file mode 100644 index 00000000000..a627d511f8e --- /dev/null +++ b/security/lowmem.c @@ -0,0 +1,334 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MY_NAME "lowmem" + +#define LOWMEM_MAX_UIDS 8 + +enum { + VM_LOWMEM_DENY = 1, + VM_LOWMEM_LEVEL1_NOTIFY, + VM_LOWMEM_LEVEL2_NOTIFY, + VM_LOWMEM_NR_DECAY_PAGES, + VM_LOWMEM_ALLOWED_UIDS, + VM_LOWMEM_ALLOWED_PAGES, + VM_LOWMEM_USED_PAGES, +}; + +static unsigned int deny_percentage; +static unsigned int l1_notify, l2_notify; +static unsigned int nr_decay_pages; +static unsigned long allowed_pages; +static long used_pages; +static unsigned int allowed_uids[LOWMEM_MAX_UIDS]; +static unsigned int minuid = 1; +static unsigned int maxuid = 65535; + +static ctl_table lowmem_table[] = { + { + .ctl_name = VM_LOWMEM_DENY, + .procname = "lowmem_deny_watermark", + .data = &deny_percentage, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .child = NULL, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + }, { + .ctl_name = VM_LOWMEM_LEVEL1_NOTIFY, + .procname = "lowmem_notify_low", + .data = &l1_notify, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .child = NULL, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + }, { + .ctl_name = VM_LOWMEM_LEVEL2_NOTIFY, + .procname = "lowmem_notify_high", + .data = &l2_notify, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .child = NULL, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + }, { + .ctl_name = VM_LOWMEM_NR_DECAY_PAGES, + .procname = "lowmem_nr_decay_pages", + .data = &nr_decay_pages, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .child = NULL, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + }, { + .ctl_name = VM_LOWMEM_ALLOWED_UIDS, + .procname = "lowmem_allowed_uids", + .data = &allowed_uids, + .maxlen = LOWMEM_MAX_UIDS * sizeof(unsigned int), + .mode = 0644, + .child = NULL, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &minuid, + .extra2 = &maxuid, + }, { + .ctl_name = VM_LOWMEM_ALLOWED_PAGES, + .procname = "lowmem_allowed_pages", + .data = &allowed_pages, + .maxlen = sizeof(unsigned long), + .mode = 0444, + .child = NULL, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + }, { + .ctl_name = VM_LOWMEM_USED_PAGES, + .procname = "lowmem_used_pages", + .data = &used_pages, + .maxlen = sizeof(long), + .mode = 0444, + .child = NULL, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + }, { + .ctl_name = 0 + } +}; + +static ctl_table lowmem_root_table[] = { + { + .ctl_name = CTL_VM, + .procname = "vm", + .mode = 0555, + .child = lowmem_table, + }, { + .ctl_name = 0 + } +}; + +#define KERNEL_ATTR_RO(_name) \ +static struct subsys_attribute _name##_attr = __ATTR_RO(_name) + +static int low_watermark_reached, high_watermark_reached; + +static ssize_t low_watermark_show(struct subsystem *subsys, char *page) +{ + return sprintf(page, "%u\n", low_watermark_reached); +} + +static ssize_t high_watermark_show(struct subsystem *subsys, char *page) +{ + return sprintf(page, "%u\n", high_watermark_reached); +} + +KERNEL_ATTR_RO(low_watermark); +KERNEL_ATTR_RO(high_watermark); + +static void low_watermark_state(int new_state) +{ + int changed = 0; + + if (low_watermark_reached != new_state) { + low_watermark_reached = new_state; + changed = 1; + } + + if (changed) + sysfs_notify(&kernel_subsys.kset.kobj, NULL, "low_watermark"); +} + +static void high_watermark_state(int new_state) +{ + int changed = 0; + + if (high_watermark_reached != new_state) { + high_watermark_reached = new_state; + changed = 1; + } + + if (changed) + sysfs_notify(&kernel_subsys.kset.kobj, NULL, "high_watermark"); +} + +static int low_vm_enough_memory(long pages) +{ + unsigned long free, allowed; + long deny_threshold, level1, level2, used; + int cap_sys_admin = 0, notify; + + if (cap_capable(current, CAP_SYS_ADMIN) == 0) + cap_sys_admin = 1; + + /* We activate ourselves only after both parameters have been + * configured. */ + if (deny_percentage == 0 || l1_notify == 0 || l2_notify == 0) + return __vm_enough_memory(pages, cap_sys_admin); + + allowed = totalram_pages - hugetlb_total_pages(); + deny_threshold = allowed * deny_percentage / 100; + level1 = allowed * l1_notify / 100; + level2 = allowed * l2_notify / 100; + + vm_acct_memory(pages); + + /* Easily freed pages when under VM pressure or direct reclaim */ + free = global_page_state(NR_FILE_PAGES); + free += nr_swap_pages; + free += global_page_state(NR_SLAB_RECLAIMABLE); + + used = allowed - free; + if (unlikely(used < 0)) + used = 0; + + /* The hot path, plenty of memory */ + if (likely(used < level1)) + goto enough_memory; + + /* No luck, lets make it more expensive and try again.. */ + used -= nr_free_pages(); + + if (used >= deny_threshold) { + int i; + + allowed_pages = allowed; + used_pages = used; + low_watermark_state(1); + high_watermark_state(1); + /* Memory allocations by root are always allowed */ + if (cap_sys_admin) + return 0; + + /* uids from allowed_uids vector are also allowed no matter what */ + for (i = 0; i < LOWMEM_MAX_UIDS && allowed_uids[i]; i++) + if (current->uid == allowed_uids[i]) + return 0; + + vm_unacct_memory(pages); + if (printk_ratelimit()) { + printk(MY_NAME ": denying memory allocation to process %d (%s)\n", + current->pid, current->comm); + } + return -ENOMEM; + } + +enough_memory: + /* See if we need to notify level 1 */ + low_watermark_state(used >= level1); + + /* + * In the level 2 notification case things are more complicated, + * as the level that we drop the state and send a notification + * should be lower than when it is first triggered. Having this + * on the same watermark level ends up bouncing back and forth + * when applications are being stupid. + */ + notify = used >= level2; + if (notify || used + nr_decay_pages < level2) + high_watermark_state(notify); + + /* We have plenty of memory */ + allowed_pages = allowed; + used_pages = used; + return 0; +} + +static struct security_operations lowmem_security_ops = { + /* Use the capability functions for some of the hooks */ + .ptrace = cap_ptrace, + .capget = cap_capget, + .capset_check = cap_capset_check, + .capset_set = cap_capset_set, + .capable = cap_capable, + + .bprm_apply_creds = cap_bprm_apply_creds, + .bprm_set_security = cap_bprm_set_security, + + .task_post_setuid = cap_task_post_setuid, + .task_reparent_to_init = cap_task_reparent_to_init, + .vm_enough_memory = low_vm_enough_memory, +}; + +static struct ctl_table_header *lowmem_table_header; +/* flag to keep track of how we were registered */ +static int secondary; + +static struct attribute *lowmem_attrs[] = { + &low_watermark_attr.attr, + &high_watermark_attr.attr, + NULL, +}; + +static struct attribute_group lowmem_attr_group = { + .attrs = lowmem_attrs, +}; + +static int __init lowmem_init(void) +{ + int r; + + /* register ourselves with the security framework */ + if (register_security(&lowmem_security_ops)) { + printk(KERN_ERR MY_NAME ": Failure registering with the kernel\n"); + /* try registering with primary module */ + if (mod_reg_security(MY_NAME, &lowmem_security_ops)) { + printk(KERN_ERR ": Failure registering with the primary" + "security module.\n"); + return -EINVAL; + } + secondary = 1; + } + + /* initialize the uids vector */ + memset(allowed_uids, 0, sizeof(allowed_uids)); + + lowmem_table_header = register_sysctl_table(lowmem_root_table); + if (unlikely(!lowmem_table_header)) + return -EPERM; + + kernel_subsys.kset.kobj.kset = &kernel_subsys.kset; + + r = sysfs_create_group(&kernel_subsys.kset.kobj, + &lowmem_attr_group); + if (unlikely(r)) + return r; + + printk(KERN_INFO MY_NAME ": Module initialized.\n"); + + return 0; +} + +static void __exit lowmem_exit(void) +{ + /* remove ourselves from the security framework */ + if (secondary) { + if (mod_unreg_security(MY_NAME, &lowmem_security_ops)) + printk(KERN_ERR MY_NAME ": Failure unregistering " + "with the primary security module.\n"); + } else { + if (unregister_security(&lowmem_security_ops)) { + printk(KERN_ERR MY_NAME ": Failure unregistering " + "with the kernel.\n"); + } + } + + unregister_sysctl_table(lowmem_table_header); + + sysfs_remove_group(&kernel_subsys.kset.kobj, &lowmem_attr_group); + + printk(KERN_INFO MY_NAME ": Module removed.\n"); +} + +module_init(lowmem_init); +module_exit(lowmem_exit); + +MODULE_DESCRIPTION("Low watermark LSM module"); +MODULE_LICENSE("GPL");