From: Thomas Gleixner Date: Mon, 20 Oct 2008 11:14:06 +0000 (+0200) Subject: Merge branches 'timers/clocksource', 'timers/hrtimers', 'timers/nohz', 'timers/ntp... X-Git-Tag: v2.6.28-rc1~82^2 X-Git-Url: http://pilppa.com/gitweb/?a=commitdiff_plain;h=c465a76af658b443075d6efee1c3131257643020;p=linux-2.6-omap-h63xx.git Merge branches 'timers/clocksource', 'timers/hrtimers', 'timers/nohz', 'timers/ntp', 'timers/posixtimers' and 'timers/debug' into v28-timers-for-linus --- c465a76af658b443075d6efee1c3131257643020 diff --cc drivers/clocksource/acpi_pm.c index 3df33848100,5ca1d80de18,71d2ac4e3f4,5ca1d80de18,4eee533f3f4,71d2ac4e3f4..c20171078d1 --- a/drivers/clocksource/acpi_pm.c +++ b/drivers/clocksource/acpi_pm.c @@@@@@@ -175,10 -175,10 -176,15 -175,10 -176,13 -176,15 +176,15 @@@@@@@ static int verify_pmtmr_rate(void #define verify_pmtmr_rate() (0) #endif ++ + /* Number of monotonicity checks to perform during initialization */ ++ + #define ACPI_PM_MONOTONICITY_CHECKS 10 ++ ++ /* Number of reads we try to get two different values */ ++ ++ #define ACPI_PM_READ_CHECKS 10000 ++ + static int __init init_acpi_pm_clocksource(void) { -- - u32 value1, value2; -- - unsigned int i; ++ + cycle_t value1, value2; - unsigned int i, j, good = 0; ++ ++ unsigned int i, j = 0; if (!pmtmr_ioport) return -ENODEV; @@@@@@@ -187,24 -187,24 -193,29 -187,24 -191,32 -193,29 +193,29 @@@@@@@ clocksource_acpi_pm.shift); /* "verify" this timing source: */ -- - value1 = read_pmtmr(); -- - for (i = 0; i < 10000; i++) { -- - value2 = read_pmtmr(); -- - if (value2 == value1) -- - continue; -- - if (value2 > value1) -- - goto pm_good; -- - if ((value2 < value1) && ((value2) < 0xFFF)) -- - goto pm_good; -- - printk(KERN_INFO "PM-Timer had inconsistent results:" -- - " 0x%#x, 0x%#x - aborting.\n", value1, value2); -- - return -EINVAL; ++ + for (j = 0; j < ACPI_PM_MONOTONICITY_CHECKS; j++) { ++ ++ udelay(100 * j); ++ + value1 = clocksource_acpi_pm.read(); - for (i = 0; i < 10000; i++) { ++ ++ for (i = 0; i < ACPI_PM_READ_CHECKS; i++) { ++ + value2 = clocksource_acpi_pm.read(); ++ + if (value2 == value1) ++ + continue; ++ + if (value2 > value1) - good++; ++ + break; ++ + if ((value2 < value1) && ((value2) < 0xFFF)) - good++; ++ + break; ++ + printk(KERN_INFO "PM-Timer had inconsistent results:" ++ + " 0x%#llx, 0x%#llx - aborting.\n", ++ + value1, value2); ++ + return -EINVAL; ++ + } - udelay(300 * i); - } - - if (good != ACPI_PM_MONOTONICITY_CHECKS) { - printk(KERN_INFO "PM-Timer failed consistency check " - " (0x%#llx) - aborting.\n", value1); - return -ENODEV; ++ ++ if (i == ACPI_PM_READ_CHECKS) { ++ ++ printk(KERN_INFO "PM-Timer failed consistency check " ++ ++ " (0x%#llx) - aborting.\n", value1); ++ ++ return -ENODEV; ++ ++ } } -- - printk(KERN_INFO "PM-Timer had no reasonable result:" -- - " 0x%#x - aborting.\n", value1); -- - return -ENODEV; -- - pm_good: if (verify_pmtmr_rate() != 0) return -ENODEV; diff --cc kernel/exit.c index 38ec4063014,38ec4063014,0ef4673e351,16395644a98,40036ac0427,0ef4673e351..059b38cae38 --- a/kernel/exit.c +++ b/kernel/exit.c @@@@@@@ -112,9 -112,9 -112,9 -112,9 -112,7 -112,9 +112,7 @@@@@@@ static void __exit_signal(struct task_s * We won't ever get here for the group leader, since it * will have been the last reference on the signal_struct. */ -- sig->utime = cputime_add(sig->utime, tsk->utime); -- sig->stime = cputime_add(sig->stime, tsk->stime); -- sig->gtime = cputime_add(sig->gtime, tsk->gtime); -- - sig->utime = cputime_add(sig->utime, task_utime(tsk)); -- - sig->stime = cputime_add(sig->stime, task_stime(tsk)); ++ sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; sig->nvcsw += tsk->nvcsw; diff --cc kernel/fork.c index 7ce2ebe8479,7ce2ebe8479,30de644a40c,7ce2ebe8479,021ae012cc7,30de644a40c..44e64d7ba29 --- a/kernel/fork.c +++ b/kernel/fork.c @@@@@@@ -795,15 -795,15 -795,16 -795,15 -824,10 -795,16 +824,11 @@@@@@@ static int copy_signal(unsigned long cl sig->it_real_incr.tv64 = 0; sig->real_timer.function = it_real_fn; ---- - sig->it_virt_expires = cputime_zero; ---- - sig->it_virt_incr = cputime_zero; ---- - sig->it_prof_expires = cputime_zero; ---- - sig->it_prof_incr = cputime_zero; ---- - sig->leader = 0; /* session leadership doesn't inherit */ sig->tty_old_pgrp = NULL; ++ ++ sig->tty = NULL; ---- - sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; ++++ + sig->cutime = sig->cstime = cputime_zero; sig->gtime = cputime_zero; sig->cgtime = cputime_zero; sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; @@@@@@@ -837,7 -837,7 -838,8 -837,7 -851,8 -838,8 +852,9 @@@@@@@ void __cleanup_signal(struct signal_struct *sig) { ++++ + thread_group_cputime_free(sig); exit_thread_group_keys(sig); ++ ++ tty_kref_put(sig->tty); kmem_cache_free(signal_cachep, sig); } diff --cc kernel/hrtimer.c index b8e4dce80a7,4d761d50c52,cdec83e722f,b8e4dce80a7,b8e4dce80a7,cdec83e722f..95978f48e03 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@@@@@@ -1607,9 -1605,9 -1628,56 -1607,9 -1607,9 -1628,56 +1626,56 @@@@@@@ static int migrate_hrtimer_list(struct * Enqueue the timer. Allow reprogramming of the event device */ enqueue_hrtimer(timer, new_base, 1); ++ ++ ++ ++ #ifdef CONFIG_HIGH_RES_TIMERS ++ ++ /* ++ ++ * Happens with high res enabled when the timer was ++ ++ * already expired and the callback mode is ++ ++ * HRTIMER_CB_IRQSAFE_UNLOCKED (hrtimer_sleeper). The ++ ++ * enqueue code does not move them to the soft irq ++ ++ * pending list for performance/latency reasons, but ++ ++ * in the migration state, we need to do that ++ ++ * otherwise we end up with a stale timer. ++ ++ */ ++ ++ if (timer->state == HRTIMER_STATE_MIGRATE) { ++ ++ timer->state = HRTIMER_STATE_PENDING; ++ ++ list_add_tail(&timer->cb_entry, ++ ++ &new_base->cpu_base->cb_pending); ++ ++ raise = 1; ++ ++ } ++ ++ #endif ++ ++ /* Clear the migration state bit */ ++ ++ timer->state &= ~HRTIMER_STATE_MIGRATE; + } ++ ++ return raise; + } + ++ ++ #ifdef CONFIG_HIGH_RES_TIMERS ++ ++ static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base, ++ ++ struct hrtimer_cpu_base *new_base) ++ ++ { ++ ++ struct hrtimer *timer; ++ ++ int raise = 0; ++ ++ ++ ++ while (!list_empty(&old_base->cb_pending)) { ++ ++ timer = list_entry(old_base->cb_pending.next, ++ ++ struct hrtimer, cb_entry); ++ ++ ++ ++ __remove_hrtimer(timer, timer->base, HRTIMER_STATE_PENDING, 0); ++ ++ timer->base = &new_base->clock_base[timer->base->index]; ++ ++ list_add_tail(&timer->cb_entry, &new_base->cb_pending); ++ ++ raise = 1; + ++ } ++ ++ return raise; ++ ++ } ++ ++ #else ++ ++ static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base, ++ ++ struct hrtimer_cpu_base *new_base) ++ ++ { ++ ++ return 0; + ++ } ++ ++ #endif + ++ static void migrate_hrtimers(int cpu) { struct hrtimer_cpu_base *old_base, *new_base; @@@@@@@ -1626,14 -1626,13 -1694,21 -1626,14 -1626,14 -1694,21 +1694,20 @@@@@@@ spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { -- -- migrate_hrtimer_list(&old_base->clock_base[i], -- -- &new_base->clock_base[i]); ++ ++ if (migrate_hrtimer_list(&old_base->clock_base[i], ++ ++ &new_base->clock_base[i], cpu)) ++ ++ raise = 1; } ++ ++ if (migrate_hrtimer_pending(old_base, new_base)) ++ ++ raise = 1; ++ ++ spin_unlock(&old_base->lock); - ---- spin_unlock(&new_base->lock); - ---- local_irq_enable(); + ++++ spin_unlock_irq(&new_base->lock); put_cpu_var(hrtimer_bases); ++ ++ ++ ++ if (raise) ++ ++ hrtimer_raise_softirq(); } #endif /* CONFIG_HOTPLUG_CPU */ diff --cc kernel/sched.c index 9a1ddb84e26,9a1ddb84e26,6f230596bd0,1a5f73c1fcd,ebb03def564,6f230596bd0..09a8c15748f --- a/kernel/sched.c +++ b/kernel/sched.c @@@@@@@ -1425,9 -1425,9 -1418,35 -1425,9 -1425,9 -1418,35 +1418,35 @@@@@@@ up parent = parent->parent; if (parent) goto up; ++ ++ out_unlock: rcu_read_unlock(); ++ ++ ++ ++ return ret; ++ + } ++ + ++ ++ static int tg_nop(struct task_group *tg, void *data) ++ ++ { ++ ++ return 0; + } ++ ++ #endif ++ ++ ++ ++ #ifdef CONFIG_SMP ++ ++ static unsigned long source_load(int cpu, int type); ++ ++ static unsigned long target_load(int cpu, int type); ++ ++ static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); ++ ++ ++ ++ static unsigned long cpu_avg_load_per_task(int cpu) ++ ++ { ++ ++ struct rq *rq = cpu_rq(cpu); ++ ++ ++ ++ if (rq->nr_running) ++ ++ rq->avg_load_per_task = rq->load.weight / rq->nr_running; ++ ++ ++ ++ return rq->avg_load_per_task; ++ ++ } ++ ++ ++ ++ #ifdef CONFIG_FAIR_GROUP_SCHED + static void __set_se_shares(struct sched_entity *se, unsigned long shares); /* @@@@@@@ -8687,73 -8687,73 -8837,95 -8746,73 -8760,73 -8837,95 +8844,95 @@@@@@@ static DEFINE_MUTEX(rt_constraints_mute static unsigned long to_ratio(u64 period, u64 runtime) { if (runtime == RUNTIME_INF) -- -- return 1ULL << 16; ++ ++ return 1ULL << 20; -- -- return div64_u64(runtime << 16, period); ++ ++ return div64_u64(runtime << 20, period); } -- -- #ifdef CONFIG_CGROUP_SCHED -- -- static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) ++ ++ /* Must be called with tasklist_lock held */ ++ ++ static inline int tg_has_rt_tasks(struct task_group *tg) { -- -- struct task_group *tgi, *parent = tg->parent; -- -- unsigned long total = 0; ++ ++ struct task_struct *g, *p; -- -- if (!parent) { -- -- if (global_rt_period() < period) -- -- return 0; ++ ++ do_each_thread(g, p) { ++ ++ if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg) ++ ++ return 1; ++ ++ } while_each_thread(g, p); -- -- return to_ratio(period, runtime) < -- -- to_ratio(global_rt_period(), global_rt_runtime()); -- -- } ++ ++ return 0; ++ ++ } -- -- if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period) -- -- return 0; ++ ++ struct rt_schedulable_data { ++ ++ struct task_group *tg; ++ ++ u64 rt_period; ++ ++ u64 rt_runtime; ++ ++ }; -- -- rcu_read_lock(); -- -- list_for_each_entry_rcu(tgi, &parent->children, siblings) { -- -- if (tgi == tg) -- -- continue; ++ ++ static int tg_schedulable(struct task_group *tg, void *data) ++ ++ { ++ ++ struct rt_schedulable_data *d = data; ++ ++ struct task_group *child; ++ ++ unsigned long total, sum = 0; ++ ++ u64 period, runtime; ++ -- total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period), -- tgi->rt_bandwidth.rt_runtime); ++ ++ period = ktime_to_ns(tg->rt_bandwidth.rt_period); ++ ++ runtime = tg->rt_bandwidth.rt_runtime; ++ -- total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period), -- tgi->rt_bandwidth.rt_runtime); ++ ++ if (tg == d->tg) { ++ ++ period = d->rt_period; ++ ++ runtime = d->rt_runtime; } -- -- rcu_read_unlock(); -- -- return total + to_ratio(period, runtime) <= -- -- to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period), -- -- parent->rt_bandwidth.rt_runtime); -- -- } -- -- #elif defined CONFIG_USER_SCHED -- -- static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) -- -- { -- -- struct task_group *tgi; -- -- unsigned long total = 0; -- -- unsigned long global_ratio = -- -- to_ratio(global_rt_period(), global_rt_runtime()); ++ ++ /* ++ ++ * Cannot have more runtime than the period. ++ ++ */ ++ ++ if (runtime > period && runtime != RUNTIME_INF) ++ ++ return -EINVAL; -- -- rcu_read_lock(); -- -- list_for_each_entry_rcu(tgi, &task_groups, list) { -- -- if (tgi == tg) -- -- continue; ++ ++ /* ++ ++ * Ensure we don't starve existing RT tasks. ++ ++ */ ++ ++ if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg)) ++ ++ return -EBUSY; ++ + - total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period), - tgi->rt_bandwidth.rt_runtime); ++ ++ total = to_ratio(period, runtime); ++ ++ ++ ++ /* ++ ++ * Nobody can have more than the global setting allows. ++ ++ */ ++ ++ if (total > to_ratio(global_rt_period(), global_rt_runtime())) ++ ++ return -EINVAL; ++ -- total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period), -- tgi->rt_bandwidth.rt_runtime); ++ ++ /* ++ ++ * The sum of our children's runtime should not exceed our own. ++ ++ */ ++ ++ list_for_each_entry_rcu(child, &tg->children, siblings) { ++ ++ period = ktime_to_ns(child->rt_bandwidth.rt_period); ++ ++ runtime = child->rt_bandwidth.rt_runtime; ++ ++ ++ ++ if (child == d->tg) { ++ ++ period = d->rt_period; ++ ++ runtime = d->rt_runtime; ++ ++ } ++ + - total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period), - tgi->rt_bandwidth.rt_runtime); ++ ++ sum += to_ratio(period, runtime); } -- -- rcu_read_unlock(); -- -- return total + to_ratio(period, runtime) < global_ratio; ++ ++ if (sum > total) ++ ++ return -EINVAL; ++ ++ ++ ++ return 0; } -- -- #endif -- -- /* Must be called with tasklist_lock held */ -- -- static inline int tg_has_rt_tasks(struct task_group *tg) ++ ++ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) { -- -- struct task_struct *g, *p; -- -- do_each_thread(g, p) { -- -- if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg) -- -- return 1; -- -- } while_each_thread(g, p); -- -- return 0; ++ ++ struct rt_schedulable_data data = { ++ ++ .tg = tg, ++ ++ .rt_period = period, ++ ++ .rt_runtime = runtime, ++ ++ }; ++ ++ ++ ++ return walk_tg_tree(tg_schedulable, tg_nop, &data); } static int tg_set_bandwidth(struct task_group *tg,