[go: nahoru, domu]

1/*
2 * drivers/cpufreq/cpufreq_interactive.c
3 *
4 * Copyright (C) 2010 Google, Inc.
5 *
6 * This software is licensed under the terms of the GNU General Public
7 * License version 2, as published by the Free Software Foundation, and
8 * may be copied, distributed, and modified under those terms.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
14 *
15 * Author: Mike Chan (mike@android.com)
16 *
17 */
18
19#include <linux/cpu.h>
20#include <linux/cpumask.h>
21#include <linux/cpufreq.h>
22#include <linux/module.h>
23#include <linux/moduleparam.h>
24#include <linux/rwsem.h>
25#include <linux/sched.h>
26#include <linux/sched/rt.h>
27#include <linux/tick.h>
28#include <linux/time.h>
29#include <linux/timer.h>
30#include <linux/workqueue.h>
31#include <linux/kthread.h>
32#include <linux/slab.h>
33
34#define CREATE_TRACE_POINTS
35#include <trace/events/cpufreq_interactive.h>
36
37struct cpufreq_interactive_cpuinfo {
38	struct timer_list cpu_timer;
39	struct timer_list cpu_slack_timer;
40	spinlock_t load_lock; /* protects the next 4 fields */
41	u64 time_in_idle;
42	u64 time_in_idle_timestamp;
43	u64 cputime_speedadj;
44	u64 cputime_speedadj_timestamp;
45	struct cpufreq_policy *policy;
46	struct cpufreq_frequency_table *freq_table;
47	spinlock_t target_freq_lock; /*protects target freq */
48	unsigned int target_freq;
49	unsigned int floor_freq;
50	u64 pol_floor_val_time; /* policy floor_validate_time */
51	u64 loc_floor_val_time; /* per-cpu floor_validate_time */
52	u64 pol_hispeed_val_time; /* policy hispeed_validate_time */
53	u64 loc_hispeed_val_time; /* per-cpu hispeed_validate_time */
54	struct rw_semaphore enable_sem;
55	int governor_enabled;
56};
57
58static DEFINE_PER_CPU(struct cpufreq_interactive_cpuinfo, cpuinfo);
59
60/* realtime thread handles frequency scaling */
61static struct task_struct *speedchange_task;
62static cpumask_t speedchange_cpumask;
63static spinlock_t speedchange_cpumask_lock;
64static struct mutex gov_lock;
65
66/* Target load.  Lower values result in higher CPU speeds. */
67#define DEFAULT_TARGET_LOAD 90
68static unsigned int default_target_loads[] = {DEFAULT_TARGET_LOAD};
69
70#define DEFAULT_TIMER_RATE (20 * USEC_PER_MSEC)
71#define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_TIMER_RATE
72static unsigned int default_above_hispeed_delay[] = {
73	DEFAULT_ABOVE_HISPEED_DELAY };
74
75struct cpufreq_interactive_tunables {
76	int usage_count;
77	/* Hi speed to bump to from lo speed when load burst (default max) */
78	unsigned int hispeed_freq;
79	/* Go to hi speed when CPU load at or above this value. */
80#define DEFAULT_GO_HISPEED_LOAD 99
81	unsigned long go_hispeed_load;
82	/* Target load. Lower values result in higher CPU speeds. */
83	spinlock_t target_loads_lock;
84	unsigned int *target_loads;
85	int ntarget_loads;
86	/*
87	 * The minimum amount of time to spend at a frequency before we can ramp
88	 * down.
89	 */
90#define DEFAULT_MIN_SAMPLE_TIME (80 * USEC_PER_MSEC)
91	unsigned long min_sample_time;
92	/*
93	 * The sample rate of the timer used to increase frequency
94	 */
95	unsigned long timer_rate;
96	/*
97	 * Wait this long before raising speed above hispeed, by default a
98	 * single timer interval.
99	 */
100	spinlock_t above_hispeed_delay_lock;
101	unsigned int *above_hispeed_delay;
102	int nabove_hispeed_delay;
103	/* Non-zero means indefinite speed boost active */
104	int boost_val;
105	/* Duration of a boot pulse in usecs */
106	int boostpulse_duration_val;
107	/* End time of boost pulse in ktime converted to usecs */
108	u64 boostpulse_endtime;
109	bool boosted;
110	/*
111	 * Max additional time to wait in idle, beyond timer_rate, at speeds
112	 * above minimum before wakeup to reduce speed, or -1 if unnecessary.
113	 */
114#define DEFAULT_TIMER_SLACK (4 * DEFAULT_TIMER_RATE)
115	int timer_slack_val;
116	bool io_is_busy;
117};
118
119/* For cases where we have single governor instance for system */
120static struct cpufreq_interactive_tunables *common_tunables;
121
122static struct attribute_group *get_sysfs_attr(void);
123
124static void cpufreq_interactive_timer_resched(
125	struct cpufreq_interactive_cpuinfo *pcpu)
126{
127	struct cpufreq_interactive_tunables *tunables =
128		pcpu->policy->governor_data;
129	unsigned long expires;
130	unsigned long flags;
131
132	spin_lock_irqsave(&pcpu->load_lock, flags);
133	pcpu->time_in_idle =
134		get_cpu_idle_time(smp_processor_id(),
135				  &pcpu->time_in_idle_timestamp,
136				  tunables->io_is_busy);
137	pcpu->cputime_speedadj = 0;
138	pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp;
139	expires = jiffies + usecs_to_jiffies(tunables->timer_rate);
140	mod_timer_pinned(&pcpu->cpu_timer, expires);
141
142	if (tunables->timer_slack_val >= 0 &&
143	    pcpu->target_freq > pcpu->policy->min) {
144		expires += usecs_to_jiffies(tunables->timer_slack_val);
145		mod_timer_pinned(&pcpu->cpu_slack_timer, expires);
146	}
147
148	spin_unlock_irqrestore(&pcpu->load_lock, flags);
149}
150
151/* The caller shall take enable_sem write semaphore to avoid any timer race.
152 * The cpu_timer and cpu_slack_timer must be deactivated when calling this
153 * function.
154 */
155static void cpufreq_interactive_timer_start(
156	struct cpufreq_interactive_tunables *tunables, int cpu)
157{
158	struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu);
159	unsigned long expires = jiffies +
160		usecs_to_jiffies(tunables->timer_rate);
161	unsigned long flags;
162
163	pcpu->cpu_timer.expires = expires;
164	add_timer_on(&pcpu->cpu_timer, cpu);
165	if (tunables->timer_slack_val >= 0 &&
166	    pcpu->target_freq > pcpu->policy->min) {
167		expires += usecs_to_jiffies(tunables->timer_slack_val);
168		pcpu->cpu_slack_timer.expires = expires;
169		add_timer_on(&pcpu->cpu_slack_timer, cpu);
170	}
171
172	spin_lock_irqsave(&pcpu->load_lock, flags);
173	pcpu->time_in_idle =
174		get_cpu_idle_time(cpu, &pcpu->time_in_idle_timestamp,
175				  tunables->io_is_busy);
176	pcpu->cputime_speedadj = 0;
177	pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp;
178	spin_unlock_irqrestore(&pcpu->load_lock, flags);
179}
180
181static unsigned int freq_to_above_hispeed_delay(
182	struct cpufreq_interactive_tunables *tunables,
183	unsigned int freq)
184{
185	int i;
186	unsigned int ret;
187	unsigned long flags;
188
189	spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags);
190
191	for (i = 0; i < tunables->nabove_hispeed_delay - 1 &&
192			freq >= tunables->above_hispeed_delay[i+1]; i += 2)
193		;
194
195	ret = tunables->above_hispeed_delay[i];
196	spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags);
197	return ret;
198}
199
200static unsigned int freq_to_targetload(
201	struct cpufreq_interactive_tunables *tunables, unsigned int freq)
202{
203	int i;
204	unsigned int ret;
205	unsigned long flags;
206
207	spin_lock_irqsave(&tunables->target_loads_lock, flags);
208
209	for (i = 0; i < tunables->ntarget_loads - 1 &&
210		    freq >= tunables->target_loads[i+1]; i += 2)
211		;
212
213	ret = tunables->target_loads[i];
214	spin_unlock_irqrestore(&tunables->target_loads_lock, flags);
215	return ret;
216}
217
218/*
219 * If increasing frequencies never map to a lower target load then
220 * choose_freq() will find the minimum frequency that does not exceed its
221 * target load given the current load.
222 */
223static unsigned int choose_freq(struct cpufreq_interactive_cpuinfo *pcpu,
224		unsigned int loadadjfreq)
225{
226	unsigned int freq = pcpu->policy->cur;
227	unsigned int prevfreq, freqmin, freqmax;
228	unsigned int tl;
229	int index;
230
231	freqmin = 0;
232	freqmax = UINT_MAX;
233
234	do {
235		prevfreq = freq;
236		tl = freq_to_targetload(pcpu->policy->governor_data, freq);
237
238		/*
239		 * Find the lowest frequency where the computed load is less
240		 * than or equal to the target load.
241		 */
242
243		if (cpufreq_frequency_table_target(
244			    pcpu->policy, pcpu->freq_table, loadadjfreq / tl,
245			    CPUFREQ_RELATION_L, &index))
246			break;
247		freq = pcpu->freq_table[index].frequency;
248
249		if (freq > prevfreq) {
250			/* The previous frequency is too low. */
251			freqmin = prevfreq;
252
253			if (freq >= freqmax) {
254				/*
255				 * Find the highest frequency that is less
256				 * than freqmax.
257				 */
258				if (cpufreq_frequency_table_target(
259					    pcpu->policy, pcpu->freq_table,
260					    freqmax - 1, CPUFREQ_RELATION_H,
261					    &index))
262					break;
263				freq = pcpu->freq_table[index].frequency;
264
265				if (freq == freqmin) {
266					/*
267					 * The first frequency below freqmax
268					 * has already been found to be too
269					 * low.  freqmax is the lowest speed
270					 * we found that is fast enough.
271					 */
272					freq = freqmax;
273					break;
274				}
275			}
276		} else if (freq < prevfreq) {
277			/* The previous frequency is high enough. */
278			freqmax = prevfreq;
279
280			if (freq <= freqmin) {
281				/*
282				 * Find the lowest frequency that is higher
283				 * than freqmin.
284				 */
285				if (cpufreq_frequency_table_target(
286					    pcpu->policy, pcpu->freq_table,
287					    freqmin + 1, CPUFREQ_RELATION_L,
288					    &index))
289					break;
290				freq = pcpu->freq_table[index].frequency;
291
292				/*
293				 * If freqmax is the first frequency above
294				 * freqmin then we have already found that
295				 * this speed is fast enough.
296				 */
297				if (freq == freqmax)
298					break;
299			}
300		}
301
302		/* If same frequency chosen as previous then done. */
303	} while (freq != prevfreq);
304
305	return freq;
306}
307
308static u64 update_load(int cpu)
309{
310	struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu);
311	struct cpufreq_interactive_tunables *tunables =
312		pcpu->policy->governor_data;
313	u64 now;
314	u64 now_idle;
315	unsigned int delta_idle;
316	unsigned int delta_time;
317	u64 active_time;
318
319	now_idle = get_cpu_idle_time(cpu, &now, tunables->io_is_busy);
320	delta_idle = (unsigned int)(now_idle - pcpu->time_in_idle);
321	delta_time = (unsigned int)(now - pcpu->time_in_idle_timestamp);
322
323	if (delta_time <= delta_idle)
324		active_time = 0;
325	else
326		active_time = delta_time - delta_idle;
327
328	pcpu->cputime_speedadj += active_time * pcpu->policy->cur;
329
330	pcpu->time_in_idle = now_idle;
331	pcpu->time_in_idle_timestamp = now;
332	return now;
333}
334
335static void cpufreq_interactive_timer(unsigned long data)
336{
337	u64 now;
338	unsigned int delta_time;
339	u64 cputime_speedadj;
340	int cpu_load;
341	struct cpufreq_interactive_cpuinfo *pcpu =
342		&per_cpu(cpuinfo, data);
343	struct cpufreq_interactive_tunables *tunables =
344		pcpu->policy->governor_data;
345	unsigned int new_freq;
346	unsigned int loadadjfreq;
347	unsigned int index;
348	unsigned long flags;
349	u64 max_fvtime;
350
351	if (!down_read_trylock(&pcpu->enable_sem))
352		return;
353	if (!pcpu->governor_enabled)
354		goto exit;
355
356	spin_lock_irqsave(&pcpu->load_lock, flags);
357	now = update_load(data);
358	delta_time = (unsigned int)(now - pcpu->cputime_speedadj_timestamp);
359	cputime_speedadj = pcpu->cputime_speedadj;
360	spin_unlock_irqrestore(&pcpu->load_lock, flags);
361
362	if (WARN_ON_ONCE(!delta_time))
363		goto rearm;
364
365	spin_lock_irqsave(&pcpu->target_freq_lock, flags);
366	do_div(cputime_speedadj, delta_time);
367	loadadjfreq = (unsigned int)cputime_speedadj * 100;
368	cpu_load = loadadjfreq / pcpu->policy->cur;
369	tunables->boosted = tunables->boost_val || now < tunables->boostpulse_endtime;
370
371	if (cpu_load >= tunables->go_hispeed_load || tunables->boosted) {
372		if (pcpu->policy->cur < tunables->hispeed_freq) {
373			new_freq = tunables->hispeed_freq;
374		} else {
375			new_freq = choose_freq(pcpu, loadadjfreq);
376
377			if (new_freq < tunables->hispeed_freq)
378				new_freq = tunables->hispeed_freq;
379		}
380	} else {
381		new_freq = choose_freq(pcpu, loadadjfreq);
382		if (new_freq > tunables->hispeed_freq &&
383				pcpu->policy->cur < tunables->hispeed_freq)
384			new_freq = tunables->hispeed_freq;
385	}
386
387	if (pcpu->policy->cur >= tunables->hispeed_freq &&
388	    new_freq > pcpu->policy->cur &&
389	    now - pcpu->pol_hispeed_val_time <
390	    freq_to_above_hispeed_delay(tunables, pcpu->policy->cur)) {
391		trace_cpufreq_interactive_notyet(
392			data, cpu_load, pcpu->target_freq,
393			pcpu->policy->cur, new_freq);
394		spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
395		goto rearm;
396	}
397
398	pcpu->loc_hispeed_val_time = now;
399
400	if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
401					   new_freq, CPUFREQ_RELATION_L,
402					   &index)) {
403		spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
404		goto rearm;
405	}
406
407	new_freq = pcpu->freq_table[index].frequency;
408
409	/*
410	 * Do not scale below floor_freq unless we have been at or above the
411	 * floor frequency for the minimum sample time since last validated.
412	 */
413	max_fvtime = max(pcpu->pol_floor_val_time, pcpu->loc_floor_val_time);
414	if (new_freq < pcpu->floor_freq &&
415	    pcpu->target_freq >= pcpu->policy->cur) {
416		if (now - max_fvtime < tunables->min_sample_time) {
417			trace_cpufreq_interactive_notyet(
418				data, cpu_load, pcpu->target_freq,
419				pcpu->policy->cur, new_freq);
420			spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
421			goto rearm;
422		}
423	}
424
425	/*
426	 * Update the timestamp for checking whether speed has been held at
427	 * or above the selected frequency for a minimum of min_sample_time,
428	 * if not boosted to hispeed_freq.  If boosted to hispeed_freq then we
429	 * allow the speed to drop as soon as the boostpulse duration expires
430	 * (or the indefinite boost is turned off).
431	 */
432
433	if (!tunables->boosted || new_freq > tunables->hispeed_freq) {
434		pcpu->floor_freq = new_freq;
435		if (pcpu->target_freq >= pcpu->policy->cur ||
436		    new_freq >= pcpu->policy->cur)
437			pcpu->loc_floor_val_time = now;
438	}
439
440	if (pcpu->target_freq == new_freq &&
441			pcpu->target_freq <= pcpu->policy->cur) {
442		trace_cpufreq_interactive_already(
443			data, cpu_load, pcpu->target_freq,
444			pcpu->policy->cur, new_freq);
445		spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
446		goto rearm;
447	}
448
449	trace_cpufreq_interactive_target(data, cpu_load, pcpu->target_freq,
450					 pcpu->policy->cur, new_freq);
451
452	pcpu->target_freq = new_freq;
453	spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
454	spin_lock_irqsave(&speedchange_cpumask_lock, flags);
455	cpumask_set_cpu(data, &speedchange_cpumask);
456	spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
457	wake_up_process(speedchange_task);
458
459rearm:
460	if (!timer_pending(&pcpu->cpu_timer))
461		cpufreq_interactive_timer_resched(pcpu);
462
463exit:
464	up_read(&pcpu->enable_sem);
465	return;
466}
467
468static void cpufreq_interactive_idle_end(void)
469{
470	struct cpufreq_interactive_cpuinfo *pcpu =
471		&per_cpu(cpuinfo, smp_processor_id());
472
473	if (!down_read_trylock(&pcpu->enable_sem))
474		return;
475	if (!pcpu->governor_enabled) {
476		up_read(&pcpu->enable_sem);
477		return;
478	}
479
480	/* Arm the timer for 1-2 ticks later if not already. */
481	if (!timer_pending(&pcpu->cpu_timer)) {
482		cpufreq_interactive_timer_resched(pcpu);
483	} else if (time_after_eq(jiffies, pcpu->cpu_timer.expires)) {
484		del_timer(&pcpu->cpu_timer);
485		del_timer(&pcpu->cpu_slack_timer);
486		cpufreq_interactive_timer(smp_processor_id());
487	}
488
489	up_read(&pcpu->enable_sem);
490}
491
492static int cpufreq_interactive_speedchange_task(void *data)
493{
494	unsigned int cpu;
495	cpumask_t tmp_mask;
496	unsigned long flags;
497	struct cpufreq_interactive_cpuinfo *pcpu;
498
499	while (1) {
500		set_current_state(TASK_INTERRUPTIBLE);
501		spin_lock_irqsave(&speedchange_cpumask_lock, flags);
502
503		if (cpumask_empty(&speedchange_cpumask)) {
504			spin_unlock_irqrestore(&speedchange_cpumask_lock,
505					       flags);
506			schedule();
507
508			if (kthread_should_stop())
509				break;
510
511			spin_lock_irqsave(&speedchange_cpumask_lock, flags);
512		}
513
514		set_current_state(TASK_RUNNING);
515		tmp_mask = speedchange_cpumask;
516		cpumask_clear(&speedchange_cpumask);
517		spin_unlock_irqrestore(&speedchange_cpumask_lock, flags);
518
519		for_each_cpu(cpu, &tmp_mask) {
520			unsigned int j;
521			unsigned int max_freq = 0;
522			struct cpufreq_interactive_cpuinfo *pjcpu;
523			u64 hvt = ~0ULL, fvt = 0;
524
525			pcpu = &per_cpu(cpuinfo, cpu);
526			if (!down_read_trylock(&pcpu->enable_sem))
527				continue;
528			if (!pcpu->governor_enabled) {
529				up_read(&pcpu->enable_sem);
530				continue;
531			}
532
533			for_each_cpu(j, pcpu->policy->cpus) {
534				pjcpu = &per_cpu(cpuinfo, j);
535
536				fvt = max(fvt, pjcpu->loc_floor_val_time);
537				if (pjcpu->target_freq > max_freq) {
538					max_freq = pjcpu->target_freq;
539					hvt = pjcpu->loc_hispeed_val_time;
540				} else if (pjcpu->target_freq == max_freq) {
541					hvt = min(hvt, pjcpu->loc_hispeed_val_time);
542				}
543			}
544			for_each_cpu(j, pcpu->policy->cpus) {
545				pjcpu = &per_cpu(cpuinfo, j);
546				pjcpu->pol_floor_val_time = fvt;
547			}
548
549			if (max_freq != pcpu->policy->cur) {
550				__cpufreq_driver_target(pcpu->policy,
551							max_freq,
552							CPUFREQ_RELATION_H);
553				for_each_cpu(j, pcpu->policy->cpus) {
554					pjcpu = &per_cpu(cpuinfo, j);
555					pjcpu->pol_hispeed_val_time = hvt;
556				}
557			}
558			trace_cpufreq_interactive_setspeed(cpu,
559						     pcpu->target_freq,
560						     pcpu->policy->cur);
561
562			up_read(&pcpu->enable_sem);
563		}
564	}
565
566	return 0;
567}
568
569static void cpufreq_interactive_boost(struct cpufreq_interactive_tunables *tunables)
570{
571	int i;
572	int anyboost = 0;
573	unsigned long flags[2];
574	struct cpufreq_interactive_cpuinfo *pcpu;
575
576	tunables->boosted = true;
577
578	spin_lock_irqsave(&speedchange_cpumask_lock, flags[0]);
579
580	for_each_online_cpu(i) {
581		pcpu = &per_cpu(cpuinfo, i);
582		if (tunables != pcpu->policy->governor_data)
583			continue;
584
585		spin_lock_irqsave(&pcpu->target_freq_lock, flags[1]);
586		if (pcpu->target_freq < tunables->hispeed_freq) {
587			pcpu->target_freq = tunables->hispeed_freq;
588			cpumask_set_cpu(i, &speedchange_cpumask);
589			pcpu->pol_hispeed_val_time =
590				ktime_to_us(ktime_get());
591			anyboost = 1;
592		}
593		spin_unlock_irqrestore(&pcpu->target_freq_lock, flags[1]);
594	}
595
596	spin_unlock_irqrestore(&speedchange_cpumask_lock, flags[0]);
597
598	if (anyboost)
599		wake_up_process(speedchange_task);
600}
601
602static int cpufreq_interactive_notifier(
603	struct notifier_block *nb, unsigned long val, void *data)
604{
605	struct cpufreq_freqs *freq = data;
606	struct cpufreq_interactive_cpuinfo *pcpu;
607	int cpu;
608	unsigned long flags;
609
610	if (val == CPUFREQ_POSTCHANGE) {
611		pcpu = &per_cpu(cpuinfo, freq->cpu);
612		if (!down_read_trylock(&pcpu->enable_sem))
613			return 0;
614		if (!pcpu->governor_enabled) {
615			up_read(&pcpu->enable_sem);
616			return 0;
617		}
618
619		for_each_cpu(cpu, pcpu->policy->cpus) {
620			struct cpufreq_interactive_cpuinfo *pjcpu =
621				&per_cpu(cpuinfo, cpu);
622			if (cpu != freq->cpu) {
623				if (!down_read_trylock(&pjcpu->enable_sem))
624					continue;
625				if (!pjcpu->governor_enabled) {
626					up_read(&pjcpu->enable_sem);
627					continue;
628				}
629			}
630			spin_lock_irqsave(&pjcpu->load_lock, flags);
631			update_load(cpu);
632			spin_unlock_irqrestore(&pjcpu->load_lock, flags);
633			if (cpu != freq->cpu)
634				up_read(&pjcpu->enable_sem);
635		}
636
637		up_read(&pcpu->enable_sem);
638	}
639	return 0;
640}
641
642static struct notifier_block cpufreq_notifier_block = {
643	.notifier_call = cpufreq_interactive_notifier,
644};
645
646static unsigned int *get_tokenized_data(const char *buf, int *num_tokens)
647{
648	const char *cp;
649	int i;
650	int ntokens = 1;
651	unsigned int *tokenized_data;
652	int err = -EINVAL;
653
654	cp = buf;
655	while ((cp = strpbrk(cp + 1, " :")))
656		ntokens++;
657
658	if (!(ntokens & 0x1))
659		goto err;
660
661	tokenized_data = kmalloc(ntokens * sizeof(unsigned int), GFP_KERNEL);
662	if (!tokenized_data) {
663		err = -ENOMEM;
664		goto err;
665	}
666
667	cp = buf;
668	i = 0;
669	while (i < ntokens) {
670		if (sscanf(cp, "%u", &tokenized_data[i++]) != 1)
671			goto err_kfree;
672
673		cp = strpbrk(cp, " :");
674		if (!cp)
675			break;
676		cp++;
677	}
678
679	if (i != ntokens)
680		goto err_kfree;
681
682	*num_tokens = ntokens;
683	return tokenized_data;
684
685err_kfree:
686	kfree(tokenized_data);
687err:
688	return ERR_PTR(err);
689}
690
691static ssize_t show_target_loads(
692	struct cpufreq_interactive_tunables *tunables,
693	char *buf)
694{
695	int i;
696	ssize_t ret = 0;
697	unsigned long flags;
698
699	spin_lock_irqsave(&tunables->target_loads_lock, flags);
700
701	for (i = 0; i < tunables->ntarget_loads; i++)
702		ret += sprintf(buf + ret, "%u%s", tunables->target_loads[i],
703			       i & 0x1 ? ":" : " ");
704
705	sprintf(buf + ret - 1, "\n");
706	spin_unlock_irqrestore(&tunables->target_loads_lock, flags);
707	return ret;
708}
709
710static ssize_t store_target_loads(
711	struct cpufreq_interactive_tunables *tunables,
712	const char *buf, size_t count)
713{
714	int ntokens;
715	unsigned int *new_target_loads = NULL;
716	unsigned long flags;
717
718	new_target_loads = get_tokenized_data(buf, &ntokens);
719	if (IS_ERR(new_target_loads))
720		return PTR_RET(new_target_loads);
721
722	spin_lock_irqsave(&tunables->target_loads_lock, flags);
723	if (tunables->target_loads != default_target_loads)
724		kfree(tunables->target_loads);
725	tunables->target_loads = new_target_loads;
726	tunables->ntarget_loads = ntokens;
727	spin_unlock_irqrestore(&tunables->target_loads_lock, flags);
728	return count;
729}
730
731static ssize_t show_above_hispeed_delay(
732	struct cpufreq_interactive_tunables *tunables, char *buf)
733{
734	int i;
735	ssize_t ret = 0;
736	unsigned long flags;
737
738	spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags);
739
740	for (i = 0; i < tunables->nabove_hispeed_delay; i++)
741		ret += sprintf(buf + ret, "%u%s",
742			       tunables->above_hispeed_delay[i],
743			       i & 0x1 ? ":" : " ");
744
745	sprintf(buf + ret - 1, "\n");
746	spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags);
747	return ret;
748}
749
750static ssize_t store_above_hispeed_delay(
751	struct cpufreq_interactive_tunables *tunables,
752	const char *buf, size_t count)
753{
754	int ntokens;
755	unsigned int *new_above_hispeed_delay = NULL;
756	unsigned long flags;
757
758	new_above_hispeed_delay = get_tokenized_data(buf, &ntokens);
759	if (IS_ERR(new_above_hispeed_delay))
760		return PTR_RET(new_above_hispeed_delay);
761
762	spin_lock_irqsave(&tunables->above_hispeed_delay_lock, flags);
763	if (tunables->above_hispeed_delay != default_above_hispeed_delay)
764		kfree(tunables->above_hispeed_delay);
765	tunables->above_hispeed_delay = new_above_hispeed_delay;
766	tunables->nabove_hispeed_delay = ntokens;
767	spin_unlock_irqrestore(&tunables->above_hispeed_delay_lock, flags);
768	return count;
769
770}
771
772static ssize_t show_hispeed_freq(struct cpufreq_interactive_tunables *tunables,
773		char *buf)
774{
775	return sprintf(buf, "%u\n", tunables->hispeed_freq);
776}
777
778static ssize_t store_hispeed_freq(struct cpufreq_interactive_tunables *tunables,
779		const char *buf, size_t count)
780{
781	int ret;
782	long unsigned int val;
783
784	ret = kstrtoul(buf, 0, &val);
785	if (ret < 0)
786		return ret;
787	tunables->hispeed_freq = val;
788	return count;
789}
790
791static ssize_t show_go_hispeed_load(struct cpufreq_interactive_tunables
792		*tunables, char *buf)
793{
794	return sprintf(buf, "%lu\n", tunables->go_hispeed_load);
795}
796
797static ssize_t store_go_hispeed_load(struct cpufreq_interactive_tunables
798		*tunables, const char *buf, size_t count)
799{
800	int ret;
801	unsigned long val;
802
803	ret = kstrtoul(buf, 0, &val);
804	if (ret < 0)
805		return ret;
806	tunables->go_hispeed_load = val;
807	return count;
808}
809
810static ssize_t show_min_sample_time(struct cpufreq_interactive_tunables
811		*tunables, char *buf)
812{
813	return sprintf(buf, "%lu\n", tunables->min_sample_time);
814}
815
816static ssize_t store_min_sample_time(struct cpufreq_interactive_tunables
817		*tunables, const char *buf, size_t count)
818{
819	int ret;
820	unsigned long val;
821
822	ret = kstrtoul(buf, 0, &val);
823	if (ret < 0)
824		return ret;
825	tunables->min_sample_time = val;
826	return count;
827}
828
829static ssize_t show_timer_rate(struct cpufreq_interactive_tunables *tunables,
830		char *buf)
831{
832	return sprintf(buf, "%lu\n", tunables->timer_rate);
833}
834
835static ssize_t store_timer_rate(struct cpufreq_interactive_tunables *tunables,
836		const char *buf, size_t count)
837{
838	int ret;
839	unsigned long val, val_round;
840
841	ret = kstrtoul(buf, 0, &val);
842	if (ret < 0)
843		return ret;
844
845	val_round = jiffies_to_usecs(usecs_to_jiffies(val));
846	if (val != val_round)
847		pr_warn("timer_rate not aligned to jiffy. Rounded up to %lu\n",
848			val_round);
849
850	tunables->timer_rate = val_round;
851	return count;
852}
853
854static ssize_t show_timer_slack(struct cpufreq_interactive_tunables *tunables,
855		char *buf)
856{
857	return sprintf(buf, "%d\n", tunables->timer_slack_val);
858}
859
860static ssize_t store_timer_slack(struct cpufreq_interactive_tunables *tunables,
861		const char *buf, size_t count)
862{
863	int ret;
864	unsigned long val;
865
866	ret = kstrtol(buf, 10, &val);
867	if (ret < 0)
868		return ret;
869
870	tunables->timer_slack_val = val;
871	return count;
872}
873
874static ssize_t show_boost(struct cpufreq_interactive_tunables *tunables,
875			  char *buf)
876{
877	return sprintf(buf, "%d\n", tunables->boost_val);
878}
879
880static ssize_t store_boost(struct cpufreq_interactive_tunables *tunables,
881			   const char *buf, size_t count)
882{
883	int ret;
884	unsigned long val;
885
886	ret = kstrtoul(buf, 0, &val);
887	if (ret < 0)
888		return ret;
889
890	tunables->boost_val = val;
891
892	if (tunables->boost_val) {
893		trace_cpufreq_interactive_boost("on");
894		if (!tunables->boosted)
895			cpufreq_interactive_boost(tunables);
896	} else {
897		tunables->boostpulse_endtime = ktime_to_us(ktime_get());
898		trace_cpufreq_interactive_unboost("off");
899	}
900
901	return count;
902}
903
904static ssize_t store_boostpulse(struct cpufreq_interactive_tunables *tunables,
905				const char *buf, size_t count)
906{
907	int ret;
908	unsigned long val;
909
910	ret = kstrtoul(buf, 0, &val);
911	if (ret < 0)
912		return ret;
913
914	tunables->boostpulse_endtime = ktime_to_us(ktime_get()) +
915		tunables->boostpulse_duration_val;
916	trace_cpufreq_interactive_boost("pulse");
917	if (!tunables->boosted)
918		cpufreq_interactive_boost(tunables);
919	return count;
920}
921
922static ssize_t show_boostpulse_duration(struct cpufreq_interactive_tunables
923		*tunables, char *buf)
924{
925	return sprintf(buf, "%d\n", tunables->boostpulse_duration_val);
926}
927
928static ssize_t store_boostpulse_duration(struct cpufreq_interactive_tunables
929		*tunables, const char *buf, size_t count)
930{
931	int ret;
932	unsigned long val;
933
934	ret = kstrtoul(buf, 0, &val);
935	if (ret < 0)
936		return ret;
937
938	tunables->boostpulse_duration_val = val;
939	return count;
940}
941
942static ssize_t show_io_is_busy(struct cpufreq_interactive_tunables *tunables,
943		char *buf)
944{
945	return sprintf(buf, "%u\n", tunables->io_is_busy);
946}
947
948static ssize_t store_io_is_busy(struct cpufreq_interactive_tunables *tunables,
949		const char *buf, size_t count)
950{
951	int ret;
952	unsigned long val;
953
954	ret = kstrtoul(buf, 0, &val);
955	if (ret < 0)
956		return ret;
957	tunables->io_is_busy = val;
958	return count;
959}
960
961/*
962 * Create show/store routines
963 * - sys: One governor instance for complete SYSTEM
964 * - pol: One governor instance per struct cpufreq_policy
965 */
966#define show_gov_pol_sys(file_name)					\
967static ssize_t show_##file_name##_gov_sys				\
968(struct kobject *kobj, struct attribute *attr, char *buf)		\
969{									\
970	return show_##file_name(common_tunables, buf);			\
971}									\
972									\
973static ssize_t show_##file_name##_gov_pol				\
974(struct cpufreq_policy *policy, char *buf)				\
975{									\
976	return show_##file_name(policy->governor_data, buf);		\
977}
978
979#define store_gov_pol_sys(file_name)					\
980static ssize_t store_##file_name##_gov_sys				\
981(struct kobject *kobj, struct attribute *attr, const char *buf,		\
982	size_t count)							\
983{									\
984	return store_##file_name(common_tunables, buf, count);		\
985}									\
986									\
987static ssize_t store_##file_name##_gov_pol				\
988(struct cpufreq_policy *policy, const char *buf, size_t count)		\
989{									\
990	return store_##file_name(policy->governor_data, buf, count);	\
991}
992
993#define show_store_gov_pol_sys(file_name)				\
994show_gov_pol_sys(file_name);						\
995store_gov_pol_sys(file_name)
996
997show_store_gov_pol_sys(target_loads);
998show_store_gov_pol_sys(above_hispeed_delay);
999show_store_gov_pol_sys(hispeed_freq);
1000show_store_gov_pol_sys(go_hispeed_load);
1001show_store_gov_pol_sys(min_sample_time);
1002show_store_gov_pol_sys(timer_rate);
1003show_store_gov_pol_sys(timer_slack);
1004show_store_gov_pol_sys(boost);
1005store_gov_pol_sys(boostpulse);
1006show_store_gov_pol_sys(boostpulse_duration);
1007show_store_gov_pol_sys(io_is_busy);
1008
1009#define gov_sys_attr_rw(_name)						\
1010static struct global_attr _name##_gov_sys =				\
1011__ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys)
1012
1013#define gov_pol_attr_rw(_name)						\
1014static struct freq_attr _name##_gov_pol =				\
1015__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol)
1016
1017#define gov_sys_pol_attr_rw(_name)					\
1018	gov_sys_attr_rw(_name);						\
1019	gov_pol_attr_rw(_name)
1020
1021gov_sys_pol_attr_rw(target_loads);
1022gov_sys_pol_attr_rw(above_hispeed_delay);
1023gov_sys_pol_attr_rw(hispeed_freq);
1024gov_sys_pol_attr_rw(go_hispeed_load);
1025gov_sys_pol_attr_rw(min_sample_time);
1026gov_sys_pol_attr_rw(timer_rate);
1027gov_sys_pol_attr_rw(timer_slack);
1028gov_sys_pol_attr_rw(boost);
1029gov_sys_pol_attr_rw(boostpulse_duration);
1030gov_sys_pol_attr_rw(io_is_busy);
1031
1032static struct global_attr boostpulse_gov_sys =
1033	__ATTR(boostpulse, 0200, NULL, store_boostpulse_gov_sys);
1034
1035static struct freq_attr boostpulse_gov_pol =
1036	__ATTR(boostpulse, 0200, NULL, store_boostpulse_gov_pol);
1037
1038/* One Governor instance for entire system */
1039static struct attribute *interactive_attributes_gov_sys[] = {
1040	&target_loads_gov_sys.attr,
1041	&above_hispeed_delay_gov_sys.attr,
1042	&hispeed_freq_gov_sys.attr,
1043	&go_hispeed_load_gov_sys.attr,
1044	&min_sample_time_gov_sys.attr,
1045	&timer_rate_gov_sys.attr,
1046	&timer_slack_gov_sys.attr,
1047	&boost_gov_sys.attr,
1048	&boostpulse_gov_sys.attr,
1049	&boostpulse_duration_gov_sys.attr,
1050	&io_is_busy_gov_sys.attr,
1051	NULL,
1052};
1053
1054static struct attribute_group interactive_attr_group_gov_sys = {
1055	.attrs = interactive_attributes_gov_sys,
1056	.name = "interactive",
1057};
1058
1059/* Per policy governor instance */
1060static struct attribute *interactive_attributes_gov_pol[] = {
1061	&target_loads_gov_pol.attr,
1062	&above_hispeed_delay_gov_pol.attr,
1063	&hispeed_freq_gov_pol.attr,
1064	&go_hispeed_load_gov_pol.attr,
1065	&min_sample_time_gov_pol.attr,
1066	&timer_rate_gov_pol.attr,
1067	&timer_slack_gov_pol.attr,
1068	&boost_gov_pol.attr,
1069	&boostpulse_gov_pol.attr,
1070	&boostpulse_duration_gov_pol.attr,
1071	&io_is_busy_gov_pol.attr,
1072	NULL,
1073};
1074
1075static struct attribute_group interactive_attr_group_gov_pol = {
1076	.attrs = interactive_attributes_gov_pol,
1077	.name = "interactive",
1078};
1079
1080static struct attribute_group *get_sysfs_attr(void)
1081{
1082	if (have_governor_per_policy())
1083		return &interactive_attr_group_gov_pol;
1084	else
1085		return &interactive_attr_group_gov_sys;
1086}
1087
1088static int cpufreq_interactive_idle_notifier(struct notifier_block *nb,
1089					     unsigned long val,
1090					     void *data)
1091{
1092	if (val == IDLE_END)
1093		cpufreq_interactive_idle_end();
1094
1095	return 0;
1096}
1097
1098static struct notifier_block cpufreq_interactive_idle_nb = {
1099	.notifier_call = cpufreq_interactive_idle_notifier,
1100};
1101
1102static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
1103		unsigned int event)
1104{
1105	int rc;
1106	unsigned int j;
1107	struct cpufreq_interactive_cpuinfo *pcpu;
1108	struct cpufreq_frequency_table *freq_table;
1109	struct cpufreq_interactive_tunables *tunables;
1110	unsigned long flags;
1111
1112	if (have_governor_per_policy())
1113		tunables = policy->governor_data;
1114	else
1115		tunables = common_tunables;
1116
1117	WARN_ON(!tunables && (event != CPUFREQ_GOV_POLICY_INIT));
1118
1119	switch (event) {
1120	case CPUFREQ_GOV_POLICY_INIT:
1121		if (have_governor_per_policy()) {
1122			WARN_ON(tunables);
1123		} else if (tunables) {
1124			tunables->usage_count++;
1125			policy->governor_data = tunables;
1126			return 0;
1127		}
1128
1129		tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
1130		if (!tunables) {
1131			pr_err("%s: POLICY_INIT: kzalloc failed\n", __func__);
1132			return -ENOMEM;
1133		}
1134
1135		tunables->usage_count = 1;
1136		tunables->above_hispeed_delay = default_above_hispeed_delay;
1137		tunables->nabove_hispeed_delay =
1138			ARRAY_SIZE(default_above_hispeed_delay);
1139		tunables->go_hispeed_load = DEFAULT_GO_HISPEED_LOAD;
1140		tunables->target_loads = default_target_loads;
1141		tunables->ntarget_loads = ARRAY_SIZE(default_target_loads);
1142		tunables->min_sample_time = DEFAULT_MIN_SAMPLE_TIME;
1143		tunables->timer_rate = DEFAULT_TIMER_RATE;
1144		tunables->boostpulse_duration_val = DEFAULT_MIN_SAMPLE_TIME;
1145		tunables->timer_slack_val = DEFAULT_TIMER_SLACK;
1146
1147		spin_lock_init(&tunables->target_loads_lock);
1148		spin_lock_init(&tunables->above_hispeed_delay_lock);
1149
1150		policy->governor_data = tunables;
1151		if (!have_governor_per_policy()) {
1152			common_tunables = tunables;
1153			WARN_ON(cpufreq_get_global_kobject());
1154		}
1155
1156		rc = sysfs_create_group(get_governor_parent_kobj(policy),
1157				get_sysfs_attr());
1158		if (rc) {
1159			kfree(tunables);
1160			policy->governor_data = NULL;
1161			if (!have_governor_per_policy()) {
1162				common_tunables = NULL;
1163				cpufreq_put_global_kobject();
1164			}
1165			return rc;
1166		}
1167
1168		if (!policy->governor->initialized) {
1169			idle_notifier_register(&cpufreq_interactive_idle_nb);
1170			cpufreq_register_notifier(&cpufreq_notifier_block,
1171					CPUFREQ_TRANSITION_NOTIFIER);
1172		}
1173
1174		break;
1175
1176	case CPUFREQ_GOV_POLICY_EXIT:
1177		if (!--tunables->usage_count) {
1178			if (policy->governor->initialized == 1) {
1179				cpufreq_unregister_notifier(&cpufreq_notifier_block,
1180						CPUFREQ_TRANSITION_NOTIFIER);
1181				idle_notifier_unregister(&cpufreq_interactive_idle_nb);
1182			}
1183
1184			sysfs_remove_group(get_governor_parent_kobj(policy),
1185					get_sysfs_attr());
1186
1187			if (!have_governor_per_policy())
1188				cpufreq_put_global_kobject();
1189
1190			kfree(tunables);
1191			common_tunables = NULL;
1192		}
1193
1194		policy->governor_data = NULL;
1195		break;
1196
1197	case CPUFREQ_GOV_START:
1198		mutex_lock(&gov_lock);
1199
1200		freq_table = cpufreq_frequency_get_table(policy->cpu);
1201		if (!tunables->hispeed_freq)
1202			tunables->hispeed_freq = policy->max;
1203
1204		for_each_cpu(j, policy->cpus) {
1205			pcpu = &per_cpu(cpuinfo, j);
1206			pcpu->policy = policy;
1207			pcpu->target_freq = policy->cur;
1208			pcpu->freq_table = freq_table;
1209			pcpu->floor_freq = pcpu->target_freq;
1210			pcpu->pol_floor_val_time =
1211				ktime_to_us(ktime_get());
1212			pcpu->loc_floor_val_time = pcpu->pol_floor_val_time;
1213			pcpu->pol_hispeed_val_time = pcpu->pol_floor_val_time;
1214			pcpu->loc_hispeed_val_time = pcpu->pol_floor_val_time;
1215			down_write(&pcpu->enable_sem);
1216			del_timer_sync(&pcpu->cpu_timer);
1217			del_timer_sync(&pcpu->cpu_slack_timer);
1218			cpufreq_interactive_timer_start(tunables, j);
1219			pcpu->governor_enabled = 1;
1220			up_write(&pcpu->enable_sem);
1221		}
1222
1223		mutex_unlock(&gov_lock);
1224		break;
1225
1226	case CPUFREQ_GOV_STOP:
1227		mutex_lock(&gov_lock);
1228		for_each_cpu(j, policy->cpus) {
1229			pcpu = &per_cpu(cpuinfo, j);
1230			down_write(&pcpu->enable_sem);
1231			pcpu->governor_enabled = 0;
1232			del_timer_sync(&pcpu->cpu_timer);
1233			del_timer_sync(&pcpu->cpu_slack_timer);
1234			up_write(&pcpu->enable_sem);
1235		}
1236
1237		mutex_unlock(&gov_lock);
1238		break;
1239
1240	case CPUFREQ_GOV_LIMITS:
1241		if (policy->max < policy->cur)
1242			__cpufreq_driver_target(policy,
1243					policy->max, CPUFREQ_RELATION_H);
1244		else if (policy->min > policy->cur)
1245			__cpufreq_driver_target(policy,
1246					policy->min, CPUFREQ_RELATION_L);
1247		for_each_cpu(j, policy->cpus) {
1248			pcpu = &per_cpu(cpuinfo, j);
1249
1250			down_read(&pcpu->enable_sem);
1251			if (pcpu->governor_enabled == 0) {
1252				up_read(&pcpu->enable_sem);
1253				continue;
1254			}
1255
1256			spin_lock_irqsave(&pcpu->target_freq_lock, flags);
1257			if (policy->max < pcpu->target_freq)
1258				pcpu->target_freq = policy->max;
1259			else if (policy->min > pcpu->target_freq)
1260				pcpu->target_freq = policy->min;
1261
1262			spin_unlock_irqrestore(&pcpu->target_freq_lock, flags);
1263			up_read(&pcpu->enable_sem);
1264		}
1265		break;
1266	}
1267	return 0;
1268}
1269
1270#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
1271static
1272#endif
1273struct cpufreq_governor cpufreq_gov_interactive = {
1274	.name = "interactive",
1275	.governor = cpufreq_governor_interactive,
1276	.max_transition_latency = 10000000,
1277	.owner = THIS_MODULE,
1278};
1279
1280static void cpufreq_interactive_nop_timer(unsigned long data)
1281{
1282}
1283
1284static int __init cpufreq_interactive_init(void)
1285{
1286	unsigned int i;
1287	struct cpufreq_interactive_cpuinfo *pcpu;
1288	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
1289
1290	/* Initalize per-cpu timers */
1291	for_each_possible_cpu(i) {
1292		pcpu = &per_cpu(cpuinfo, i);
1293		init_timer_deferrable(&pcpu->cpu_timer);
1294		pcpu->cpu_timer.function = cpufreq_interactive_timer;
1295		pcpu->cpu_timer.data = i;
1296		init_timer(&pcpu->cpu_slack_timer);
1297		pcpu->cpu_slack_timer.function = cpufreq_interactive_nop_timer;
1298		spin_lock_init(&pcpu->load_lock);
1299		spin_lock_init(&pcpu->target_freq_lock);
1300		init_rwsem(&pcpu->enable_sem);
1301	}
1302
1303	spin_lock_init(&speedchange_cpumask_lock);
1304	mutex_init(&gov_lock);
1305	speedchange_task =
1306		kthread_create(cpufreq_interactive_speedchange_task, NULL,
1307			       "cfinteractive");
1308	if (IS_ERR(speedchange_task))
1309		return PTR_ERR(speedchange_task);
1310
1311	sched_setscheduler_nocheck(speedchange_task, SCHED_FIFO, &param);
1312	get_task_struct(speedchange_task);
1313
1314	/* NB: wake up so the thread does not look hung to the freezer */
1315	wake_up_process(speedchange_task);
1316
1317	return cpufreq_register_governor(&cpufreq_gov_interactive);
1318}
1319
1320#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_INTERACTIVE
1321fs_initcall(cpufreq_interactive_init);
1322#else
1323module_init(cpufreq_interactive_init);
1324#endif
1325
1326static void __exit cpufreq_interactive_exit(void)
1327{
1328	cpufreq_unregister_governor(&cpufreq_gov_interactive);
1329	kthread_stop(speedchange_task);
1330	put_task_struct(speedchange_task);
1331}
1332
1333module_exit(cpufreq_interactive_exit);
1334
1335MODULE_AUTHOR("Mike Chan <mike@android.com>");
1336MODULE_DESCRIPTION("'cpufreq_interactive' - A cpufreq governor for "
1337	"Latency sensitive workloads");
1338MODULE_LICENSE("GPL");
1339