sched: ehmp: support EHMP(Exynos HMP)

Change-Id: Ie7ee8a84ed0fdc3a62d10a5b55488477edcdba7f
Signed-off-by: Park Bumgyu <bumgyu.park@samsung.com>
This commit is contained in:
Park Bumgyu 2018-01-16 19:01:05 +09:00 committed by Mustafa Gökmen
parent cf93a3967e
commit b1ce8600ad
No known key found for this signature in database
GPG key ID: 3204D8100CFF21ED
12 changed files with 2797 additions and 68 deletions

87
include/linux/ehmp.h Normal file
View file

@ -0,0 +1,87 @@
/*
* Copyright (c) 2017 Samsung Electronics Co., Ltd
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/plist.h>
#ifdef CONFIG_SCHED_TUNE
enum stune_group {
STUNE_ROOT,
STUNE_FOREGROUND,
STUNE_BACKGROUND,
STUNE_TOPAPP,
STUNE_GROUP_COUNT,
};
#endif
struct gb_qos_request {
struct plist_node node;
char *name;
bool active;
};
#ifdef CONFIG_SCHED_EHMP
extern void exynos_init_entity_util_avg(struct sched_entity *se);
extern int exynos_need_active_balance(enum cpu_idle_type idle,
struct sched_domain *sd, int src_cpu, int dst_cpu);
extern unsigned long global_boost(void);
extern int find_second_max_cap(void);
extern int exynos_select_cpu(struct task_struct *p, int prev_cpu,
int sync, int sd_flag);
extern void ontime_migration(void);
extern int ontime_can_migration(struct task_struct *p, int cpu);
extern void ontime_update_load_avg(u64 delta, int cpu, unsigned long weight,
struct sched_avg *sa);
extern void ontime_new_entity_load(struct task_struct *parent,
struct sched_entity *se);
extern void ontime_trace_task_info(struct task_struct *p);
extern void ehmp_update_max_cpu_capacity(int cpu, unsigned long val);
extern void ehmp_update_overutilized(int cpu, unsigned long capacity);
extern bool ehmp_trigger_lb(int src_cpu, int dst_cpu);
extern void gb_qos_update_request(struct gb_qos_request *req, u32 new_value);
extern void request_kernel_prefer_perf(int grp_idx, int enable);
#else
static inline void exynos_init_entity_util_avg(struct sched_entity *se) { }
static inline int exynos_need_active_balance(enum cpu_idle_type idle,
struct sched_domain *sd, int src_cpu, int dst_cpu) { return 0; }
static inline unsigned long global_boost(void) { return 0; }
static inline int find_second_max_cap(void) { return -EINVAL; }
static inline int exynos_select_cpu(struct task_struct *p,
int prev_cpu) { return -EINVAL; }
static inline int exynos_select_cpu(struct task_struct *p, int prev_cpu,
int sync, int sd_flag) { return -EINVAL; }
static inline void ontime_migration(void) { }
static inline int ontime_can_migration(struct task_struct *p, int cpu) { return 1; }
static inline void ontime_update_load_avg(u64 delta, int cpu, unsigned long weight,
struct sched_avg *sa) { }
static inline void ontime_new_entity_load(struct task_struct *p,
struct sched_entity *se) { }
static inline void ontime_trace_task_info(struct task_struct *p) { }
static inline void ehmp_update_max_cpu_capacity(int cpu, unsigned long val) { }
static inline void ehmp_update_overutilized(int cpu, unsigned long capacity) { }
static inline bool ehmp_trigger_lb(int src_cpu, int dst_cpu) { return false; }
static inline void gb_qos_update_request(struct gb_qos_request *req, u32 new_value) { }
extern void request_kernel_prefer_perf(int grp_idx, int enable) { }
#endif /* CONFIG_SCHED_EHMP */

View file

@ -1349,6 +1349,25 @@ struct sched_avg {
struct util_est util_est;
};
#ifdef CONFIG_SCHED_EHMP
#define NOT_ONTIME 1
#define ONTIME_MIGRATING 2
#define ONTIME 4
struct ontime_avg {
u64 ontime_migration_time;
u64 load_sum;
u32 period_contrib;
unsigned long load_avg;
};
struct ontime_entity {
struct ontime_avg avg;
int flags;
int cpu;
};
#endif
#ifdef CONFIG_SCHEDSTATS
struct sched_statistics {
u64 wait_start;
@ -1483,6 +1502,9 @@ struct sched_entity {
*/
struct sched_avg avg ____cacheline_aligned_in_smp;
#endif
#ifdef CONFIG_SCHED_EHMP
struct ontime_entity ontime;
#endif
};
struct sched_rt_entity {

340
include/trace/events/ehmp.h Normal file
View file

@ -0,0 +1,340 @@
/*
* Copyright (C) 2017 Park Bumgyu <bumgyu.park@samsung.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM ehmp
#if !defined(_TRACE_EHMP_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_EHMP_H
#include <linux/sched.h>
#include <linux/tracepoint.h>
/*
* Tracepoint for selection of boost cpu
*/
TRACE_EVENT(ehmp_select_boost_cpu,
TP_PROTO(struct task_struct *p, int cpu, int trigger, char *state),
TP_ARGS(p, cpu, trigger, state),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, cpu )
__field( int, trigger )
__array( char, state, 64 )
),
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->cpu = cpu;
__entry->trigger = trigger;
memcpy(__entry->state, state, 64);
),
TP_printk("comm=%s pid=%d target_cpu=%d trigger=%d state=%s",
__entry->comm, __entry->pid, __entry->cpu,
__entry->trigger, __entry->state)
);
/*
* Tracepoint for selection of group balancer
*/
TRACE_EVENT(ehmp_select_group_boost,
TP_PROTO(struct task_struct *p, int cpu, char *state),
TP_ARGS(p, cpu, state),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, cpu )
__array( char, state, 64 )
),
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->cpu = cpu;
memcpy(__entry->state, state, 64);
),
TP_printk("comm=%s pid=%d target_cpu=%d state=%s",
__entry->comm, __entry->pid, __entry->cpu, __entry->state)
);
TRACE_EVENT(ehmp_global_boost,
TP_PROTO(char *name, unsigned long boost),
TP_ARGS(name, boost),
TP_STRUCT__entry(
__array( char, name, 64 )
__field( unsigned long, boost )
),
TP_fast_assign(
memcpy(__entry->name, name, 64);
__entry->boost = boost;
),
TP_printk("name=%s global_boost_value=%ld", __entry->name, __entry->boost)
);
/*
* Tracepoint for prefer idle
*/
TRACE_EVENT(ehmp_prefer_idle,
TP_PROTO(struct task_struct *p, int orig_cpu, int target_cpu,
unsigned long task_util, unsigned long new_util, int idle),
TP_ARGS(p, orig_cpu, target_cpu, task_util, new_util, idle),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, orig_cpu )
__field( int, target_cpu )
__field( unsigned long, task_util )
__field( unsigned long, new_util )
__field( int, idle )
),
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->orig_cpu = orig_cpu;
__entry->target_cpu = target_cpu;
__entry->task_util = task_util;
__entry->new_util = new_util;
__entry->idle = idle;
),
TP_printk("comm=%s pid=%d orig_cpu=%d target_cpu=%d task_util=%lu new_util=%lu idle=%d",
__entry->comm, __entry->pid, __entry->orig_cpu, __entry->target_cpu,
__entry->task_util, __entry->new_util, __entry->idle)
);
TRACE_EVENT(ehmp_prefer_idle_cpu_select,
TP_PROTO(struct task_struct *p, int cpu),
TP_ARGS(p, cpu),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, cpu )
),
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->cpu = cpu;
),
TP_printk("comm=%s pid=%d target_cpu=%d",
__entry->comm, __entry->pid, __entry->cpu)
);
/*
* Tracepoint for cpu selection
*/
TRACE_EVENT(ehmp_find_best_target_stat,
TP_PROTO(int cpu, unsigned long cap, unsigned long util, unsigned long target_util),
TP_ARGS(cpu, cap, util, target_util),
TP_STRUCT__entry(
__field( int, cpu )
__field( unsigned long, cap )
__field( unsigned long, util )
__field( unsigned long, target_util )
),
TP_fast_assign(
__entry->cpu = cpu;
__entry->cap = cap;
__entry->util = util;
__entry->target_util = target_util;
),
TP_printk("find_best : [cpu%d] capacity %lu, util %lu, target_util %lu\n",
__entry->cpu, __entry->cap, __entry->util, __entry->target_util)
);
TRACE_EVENT(ehmp_find_best_target_candi,
TP_PROTO(unsigned int cpu),
TP_ARGS(cpu),
TP_STRUCT__entry(
__field( unsigned int, cpu )
),
TP_fast_assign(
__entry->cpu = cpu;
),
TP_printk("find_best: energy candidate cpu %d\n", __entry->cpu)
);
TRACE_EVENT(ehmp_find_best_target_cpu,
TP_PROTO(unsigned int cpu, unsigned long target_util),
TP_ARGS(cpu, target_util),
TP_STRUCT__entry(
__field( unsigned int, cpu )
__field( unsigned long, target_util )
),
TP_fast_assign(
__entry->cpu = cpu;
__entry->target_util = target_util;
),
TP_printk("find_best: target_cpu %d, target_util %lu\n", __entry->cpu, __entry->target_util)
);
/*
* Tracepoint for ontime migration
*/
TRACE_EVENT(ehmp_ontime_migration,
TP_PROTO(struct task_struct *p, unsigned long load,
int src_cpu, int dst_cpu, int boost_migration),
TP_ARGS(p, load, src_cpu, dst_cpu, boost_migration),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( unsigned long, load )
__field( int, src_cpu )
__field( int, dst_cpu )
__field( int, bm )
),
TP_fast_assign(
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->load = load;
__entry->src_cpu = src_cpu;
__entry->dst_cpu = dst_cpu;
__entry->bm = boost_migration;
),
TP_printk("comm=%s pid=%d ontime_load_avg=%lu src_cpu=%d dst_cpu=%d boost_migration=%d",
__entry->comm, __entry->pid, __entry->load,
__entry->src_cpu, __entry->dst_cpu, __entry->bm)
);
/*
* Tracepoint for accounting ontime load averages for tasks.
*/
TRACE_EVENT(ehmp_ontime_new_entity_load,
TP_PROTO(struct task_struct *tsk, struct ontime_avg *avg),
TP_ARGS(tsk, avg),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, cpu )
__field( unsigned long, load_avg )
__field( u64, load_sum )
),
TP_fast_assign(
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
__entry->pid = tsk->pid;
__entry->cpu = task_cpu(tsk);
__entry->load_avg = avg->load_avg;
__entry->load_sum = avg->load_sum;
),
TP_printk("comm=%s pid=%d cpu=%d load_avg=%lu load_sum=%llu",
__entry->comm,
__entry->pid,
__entry->cpu,
__entry->load_avg,
(u64)__entry->load_sum)
);
/*
* Tracepoint for accounting ontime load averages for tasks.
*/
TRACE_EVENT(ehmp_ontime_load_avg_task,
TP_PROTO(struct task_struct *tsk, struct ontime_avg *avg, int ontime_flag),
TP_ARGS(tsk, avg, ontime_flag),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, cpu )
__field( unsigned long, load_avg )
__field( u64, load_sum )
__field( int, ontime_flag )
),
TP_fast_assign(
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
__entry->pid = tsk->pid;
__entry->cpu = task_cpu(tsk);
__entry->load_avg = avg->load_avg;
__entry->load_sum = avg->load_sum;
__entry->ontime_flag = ontime_flag;
),
TP_printk("comm=%s pid=%d cpu=%d load_avg=%lu load_sum=%llu ontime_flag=%d",
__entry->comm, __entry->pid, __entry->cpu, __entry->load_avg,
(u64)__entry->load_sum, __entry->ontime_flag)
);
TRACE_EVENT(ehmp_ontime_check_migrate,
TP_PROTO(struct task_struct *tsk, int cpu, int migrate, char *label),
TP_ARGS(tsk, cpu, migrate, label),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( int, cpu )
__field( int, migrate )
__array( char, label, 64 )
),
TP_fast_assign(
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
__entry->pid = tsk->pid;
__entry->cpu = cpu;
__entry->migrate = migrate;
strncpy(__entry->label, label, 64);
),
TP_printk("comm=%s pid=%d target_cpu=%d migrate=%d reason=%s",
__entry->comm, __entry->pid, __entry->cpu,
__entry->migrate, __entry->label)
);
#endif /* _TRACE_EHMP_H */
/* This part must be outside protection */
#include <trace/define_trace.h>

View file

@ -796,6 +796,67 @@ TRACE_EVENT(sched_tune_tasks_update,
__entry->boost, __entry->max_boost)
);
/*
* Tracepoint for schedtune_grouputil_update
*/
TRACE_EVENT(sched_tune_grouputil_update,
TP_PROTO(int idx, int total, int accumulated, unsigned long group_util,
struct task_struct *heaviest_p, unsigned long biggest_util),
TP_ARGS(idx, total, accumulated, group_util, heaviest_p, biggest_util),
TP_STRUCT__entry(
__field( int, idx )
__field( int, total )
__field( int, accumulated )
__field( unsigned long, group_util )
__field( pid_t, pid )
__array( char, comm, TASK_COMM_LEN )
__field( unsigned long, biggest_util )
),
TP_fast_assign(
__entry->idx = idx;
__entry->total = total;
__entry->accumulated = accumulated;
__entry->group_util = group_util;
__entry->pid = heaviest_p->pid;
memcpy(__entry->comm, heaviest_p->comm, TASK_COMM_LEN);
__entry->biggest_util = biggest_util;
),
TP_printk("idx=%d total=%d accumulated=%d group_util=%lu "
"heaviest task(pid=%d comm=%s util=%lu)",
__entry->idx, __entry->total, __entry->accumulated, __entry->group_util,
__entry->pid, __entry->comm, __entry->biggest_util)
);
/*
* Tracepoint for checking group balancing
*/
TRACE_EVENT(sched_tune_check_group_balance,
TP_PROTO(int idx, int ib_count, bool balancing),
TP_ARGS(idx, ib_count, balancing),
TP_STRUCT__entry(
__field( int, idx )
__field( int, ib_count )
__field( bool, balancing )
),
TP_fast_assign(
__entry->idx = idx;
__entry->ib_count = ib_count;
__entry->balancing = balancing;
),
TP_printk("idx=%d imbalance_count=%d balancing=%d",
__entry->idx, __entry->ib_count, __entry->balancing)
);
/*
* Tracepoint for schedtune_boostgroup_update
*/

View file

@ -1464,6 +1464,19 @@ config SCHED_TUNE
If unsure, say N.
config SCHED_EHMP
bool "Exynos scheduler for Heterogeneous Multi-Processor"
depends on SMP
default n
help
This option supports Exynos scheduler for HMP architecture. It is
designed to secure the limits of energy aware scheduler. This option
provides features such as independent boosting functinos such as
global boost and on-time migration, and prefer_perf and enhanced
prefer_idle that work in conjunction with SCHEDTUNE.
If unsure, say N.
config DEFAULT_USE_ENERGY_AWARE
bool "Default to enabling the Energy Aware Scheduler feature"
default n

View file

@ -19,6 +19,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
obj-y += wait.o swait.o completion.o idle.o
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o energy.o
obj-$(CONFIG_SCHED_EHMP) += ehmp.o
obj-$(CONFIG_SCHED_WALT) += walt.o
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
obj-$(CONFIG_SCHEDSTATS) += stats.o

1671
kernel/sched/ehmp.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -30,6 +30,7 @@
#include <linux/mempolicy.h>
#include <linux/migrate.h>
#include <linux/task_work.h>
#include <linux/ehmp.h>
#include <linux/module.h>
#include <trace/events/sched.h>
@ -604,7 +605,7 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
return rb_entry(left, struct sched_entity, run_node);
}
static struct sched_entity *__pick_next_entity(struct sched_entity *se)
struct sched_entity *__pick_next_entity(struct sched_entity *se)
{
struct rb_node *next = rb_next(&se->run_node);
@ -753,6 +754,8 @@ void init_entity_runnable_average(struct sched_entity *se)
sa->util_avg = 0;
sa->util_sum = 0;
/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
ontime_new_entity_load(current, se);
}
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
@ -791,6 +794,11 @@ void post_init_entity_util_avg(struct sched_entity *se)
long cpu_scale = arch_scale_cpu_capacity(NULL, cpu_of(rq_of(cfs_rq)));
long cap = (long)(cpu_scale - cfs_rq->avg.util_avg) / 2;
if (sched_feat(EXYNOS_HMP)) {
exynos_init_entity_util_avg(se);
goto util_init_done;
}
if (cap > 0) {
if (cfs_rq->avg.util_avg != 0) {
sa->util_avg = cfs_rq->avg.util_avg * se->load.weight;
@ -804,6 +812,7 @@ void post_init_entity_util_avg(struct sched_entity *se)
sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
}
util_init_done:
if (entity_is_task(se)) {
struct task_struct *p = task_of(se);
if (p->sched_class != &fair_sched_class) {
@ -2746,7 +2755,7 @@ static inline void update_cfs_shares(struct sched_entity *se)
* Approximate:
* val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
*/
static u64 decay_load(u64 val, u64 n)
u64 decay_load(u64 val, u64 n)
{
unsigned int local_n;
@ -2947,6 +2956,9 @@ ___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
if (!weight)
running = 0;
if (!cfs_rq && !rt_rq)
ontime_update_load_avg(delta, cpu, weight, sa);
/*
* Now we know we crossed measurement unit boundaries. The *_avg
* accrues by two steps:
@ -3424,6 +3436,7 @@ static inline void update_load_avg(struct sched_entity *se, int flags)
update_tg_load_avg(cfs_rq, 0);
if (entity_is_task(se)) {
ontime_trace_task_info(task_of(se));
#ifdef CONFIG_SCHED_WALT
ptr = (void *)&(task_of(se)->ravg);
#endif
@ -5850,68 +5863,6 @@ static inline bool energy_aware(void)
return sched_feat(ENERGY_AWARE);
}
/*
* CPU candidates.
*
* These are labels to reference CPU candidates for an energy_diff.
* Currently we support only two possible candidates: the task's previous CPU
* and another candiate CPU.
* More advanced/aggressive EAS selection policies can consider more
* candidates.
*/
#define EAS_CPU_PRV 0
#define EAS_CPU_NXT 1
#define EAS_CPU_BKP 2
#define EAS_CPU_CNT 3
/*
* energy_diff - supports the computation of the estimated energy impact in
* moving a "task"'s "util_delta" between different CPU candidates.
*/
struct energy_env {
/* Utilization to move */
struct task_struct *p;
int util_delta;
/* Mask of CPUs candidates to evaluate */
cpumask_t cpus_mask;
/* CPU candidates to evaluate */
struct {
/* CPU ID, must be in cpus_mask */
int cpu_id;
/*
* Index (into sched_group_energy::cap_states) of the OPP the
* CPU needs to run at if the task is placed on it.
* This includes the both active and blocked load, due to
* other tasks on this CPU, as well as the task's own
* utilization.
*/
int cap_idx;
int cap;
/* Estimated system energy */
unsigned int energy;
/* Estimated energy variation wrt EAS_CPU_PRV */
int nrg_delta;
} cpu[EAS_CPU_CNT];
/*
* Index (into energy_env::cpu) of the morst energy efficient CPU for
* the specified energy_env::task
*/
int next_idx;
/* Support data */
struct sched_group *sg_top;
struct sched_group *sg_cap;
struct sched_group *sg;
};
/**
* Amount of capacity of a CPU that is (estimated to be) used by CFS tasks
* @cpu: the CPU to get the utilization of
@ -6414,7 +6365,7 @@ static inline bool cpu_in_sg(struct sched_group *sg, int cpu)
* A value greater than zero means that the first energy-efficient CPU is the
* one represented by eenv->cpu[eenv->next_idx].cpu_id.
*/
static inline int select_energy_cpu_idx(struct energy_env *eenv)
int select_energy_cpu_idx(struct energy_env *eenv)
{
struct sched_domain *sd;
struct sched_group *sg;
@ -6590,7 +6541,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
return 1;
}
static inline unsigned long boosted_task_util(struct task_struct *p);
unsigned long boosted_task_util(struct task_struct *p);
static inline bool __task_fits(struct task_struct *p, int cpu, int util)
{
@ -6707,7 +6658,7 @@ boosted_cpu_util(int cpu)
return util + margin;
}
static inline unsigned long
unsigned long
boosted_task_util(struct task_struct *p)
{
unsigned long util = task_util_est(p);
@ -7244,7 +7195,7 @@ static int start_cpu(bool boosted)
return boosted ? rd->max_cap_orig_cpu : rd->min_cap_orig_cpu;
}
static inline int find_best_target(struct task_struct *p, int *backup_cpu,
int find_best_target(struct task_struct *p, int *backup_cpu,
bool boosted, bool prefer_idle)
{
unsigned long min_util = boosted_task_util(p);
@ -7716,6 +7667,14 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
cpumask_test_cpu(cpu, tsk_cpus_allowed(p)));
}
if (sched_feat(EXYNOS_HMP)) {
int selected_cpu;
selected_cpu = exynos_select_cpu(p, prev_cpu, sync, sd_flag);
if (selected_cpu >= 0)
return selected_cpu;
}
rcu_read_lock();
sd = rcu_dereference(cpu_rq(prev_cpu)->sd);
if (energy_aware() && sd && !sd_overutilized(sd)) {
@ -8449,6 +8408,11 @@ static inline int migrate_degrades_locality(struct task_struct *p,
}
#endif
static inline bool smaller_cpu_capacity(int cpu, int ref)
{
return capacity_orig_of(cpu) < capacity_orig_of(ref);
}
/*
* can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
*/
@ -8461,11 +8425,21 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
/*
* We do not migrate tasks that are:
* 0) cannot be migrated to smaller capacity cpu due to schedtune.prefer_perf, or
* 1) throttled_lb_pair, or
* 2) cannot be migrated to this CPU due to cpus_allowed, or
* 3) running (obviously), or
* 4) are cache-hot on their current CPU.
*/
if (!ontime_can_migration(p, env->dst_cpu))
return 0;
#ifdef CONFIG_SCHED_TUNE
if (smaller_cpu_capacity(env->dst_cpu, env->src_cpu) &&
schedtune_prefer_perf(p))
return 0;
#endif
if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
return 0;
@ -9974,6 +9948,9 @@ static int need_active_balance(struct lb_env *env)
return 1;
}
if (sched_feat(EXYNOS_HMP))
return exynos_need_active_balance(env->idle, sd, env->src_cpu, env->dst_cpu);
/*
* The dst_cpu is idle and the src_cpu CPU has only 1 CFS task.
* It's worth migrating the task if the src_cpu's capacity is reduced
@ -11004,6 +10981,10 @@ static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
update_blocked_averages(this_rq->cpu);
if (!test_bit(NOHZ_STATS_KICK, nohz_flags(this_rq->cpu)))
rebalance_domains(this_rq, idle);
ontime_migration();
schedtune_group_util_update();
#ifdef CONFIG_NO_HZ_COMMON
clear_bit(NOHZ_STATS_KICK, nohz_flags(this_rq->cpu));
#endif

View file

@ -103,3 +103,10 @@ SCHED_FEAT(MIN_CAPACITY_CAPPING, false)
* OFF: Use whichever of target or backup saves most.
*/
SCHED_FEAT(FBT_STRICT_ORDER, true)
#ifdef CONFIG_SCHED_EHMP
SCHED_FEAT(EXYNOS_HMP, true)
#else
SCHED_FEAT(EXYNOS_HMP, false)
#endif

View file

@ -738,6 +738,9 @@ struct rq {
u64 cum_window_demand;
#endif /* CONFIG_SCHED_WALT */
#ifdef CONFIG_SCHED_EHMP
bool ontime_migrating;
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
u64 prev_irq_time;
@ -1989,6 +1992,68 @@ extern void nohz_balance_exit_idle(unsigned int cpu);
static inline void nohz_balance_exit_idle(unsigned int cpu) { }
#endif
/*
* CPU candidates.
*
* These are labels to reference CPU candidates for an energy_diff.
* Currently we support only two possible candidates: the task's previous CPU
* and another candiate CPU.
* More advanced/aggressive EAS selection policies can consider more
* candidates.
*/
#define EAS_CPU_PRV 0
#define EAS_CPU_NXT 1
#define EAS_CPU_BKP 2
#define EAS_CPU_CNT 3
/*
* energy_diff - supports the computation of the estimated energy impact in
* moving a "task"'s "util_delta" between different CPU candidates.
*/
struct energy_env {
/* Utilization to move */
struct task_struct *p;
int util_delta;
/* Mask of CPUs candidates to evaluate */
cpumask_t cpus_mask;
/* CPU candidates to evaluate */
struct {
/* CPU ID, must be in cpus_mask */
int cpu_id;
/*
* Index (into sched_group_energy::cap_states) of the OPP the
* CPU needs to run at if the task is placed on it.
* This includes the both active and blocked load, due to
* other tasks on this CPU, as well as the task's own
* utilization.
*/
int cap_idx;
int cap;
/* Estimated system energy */
unsigned int energy;
/* Estimated energy variation wrt EAS_CPU_PRV */
int nrg_delta;
} cpu[EAS_CPU_CNT];
/*
* Index (into energy_env::cpu) of the morst energy efficient CPU for
* the specified energy_env::task
*/
int next_idx;
/* Support data */
struct sched_group *sg_top;
struct sched_group *sg_cap;
struct sched_group *sg;
};
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
struct irqtime {
u64 total;

View file

@ -5,6 +5,7 @@
#include <linux/printk.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/ehmp.h>
#include <trace/events/sched.h>
@ -20,6 +21,52 @@ unsigned int sysctl_sched_cfs_boost __read_mostly;
extern struct reciprocal_value schedtune_spc_rdiv;
struct target_nrg schedtune_target_nrg;
static int perf_threshold = 0;
int schedtune_perf_threshold(void)
{
return perf_threshold + 1;
}
struct group_balancer {
/* sum of task utilization in group */
unsigned long util;
/* group balancing threshold */
unsigned long threshold;
/* imbalance ratio by heaviest task */
unsigned int imbalance_ratio;
/* balance ratio by heaviest task */
unsigned int balance_ratio;
/* heaviest task utilization in group */
unsigned long heaviest_util;
/* group utilization update interval */
unsigned long update_interval;
/* next group utilization update time */
unsigned long next_update_time;
/*
* group imbalance time = imbalance_count * update_interval
* imbalance_count >= imbalance_duration -> need balance
*/
unsigned int imbalance_duration;
unsigned int imbalance_count;
/* utilization tracking window size */
unsigned long window;
/* group balancer locking */
raw_spinlock_t lock;
/* need group balancing? */
bool need_balance;
};
/* Performance Boost region (B) threshold params */
static int perf_boost_idx;
@ -188,6 +235,13 @@ struct schedtune {
/* Hint to bias scheduling of tasks on that SchedTune CGroup
* towards idle CPUs */
int prefer_idle;
/* Hint to bias scheduling of tasks on that SchedTune CGroup
* towards high performance CPUs */
int prefer_perf;
/* SchedTune group balancer */
struct group_balancer gb;
};
static inline struct schedtune *css_st(struct cgroup_subsys_state *css)
@ -220,6 +274,7 @@ root_schedtune = {
.perf_boost_idx = 0,
.perf_constrain_idx = 0,
.prefer_idle = 0,
.prefer_perf = 0,
};
int
@ -624,6 +679,337 @@ int schedtune_prefer_idle(struct task_struct *p)
return prefer_idle;
}
#ifdef CONFIG_SCHED_EHMP
static atomic_t kernel_prefer_perf_req[BOOSTGROUPS_COUNT];
int kernel_prefer_perf(int grp_idx)
{
if (grp_idx >= BOOSTGROUPS_COUNT)
return -EINVAL;
return atomic_read(&kernel_prefer_perf_req[grp_idx]);
}
void request_kernel_prefer_perf(int grp_idx, int enable)
{
if (grp_idx >= BOOSTGROUPS_COUNT)
return;
if (enable)
atomic_inc(&kernel_prefer_perf_req[grp_idx]);
else
BUG_ON(atomic_dec_return(&kernel_prefer_perf_req[grp_idx]) < 0);
}
#else
static inline int kernel_prefer_perf(int grp_idx) { return 0; }
#endif
int schedtune_prefer_perf(struct task_struct *p)
{
struct schedtune *st;
int prefer_perf;
if (unlikely(!schedtune_initialized))
return 0;
/* Get prefer_perf value */
rcu_read_lock();
st = task_schedtune(p);
prefer_perf = max(st->prefer_perf, kernel_prefer_perf(st->idx));
rcu_read_unlock();
return prefer_perf;
}
int schedtune_need_group_balance(struct task_struct *p)
{
bool balance;
if (unlikely(!schedtune_initialized))
return 0;
rcu_read_lock();
balance = task_schedtune(p)->gb.need_balance;
rcu_read_unlock();
return balance;
}
static inline void
check_need_group_balance(int group_idx, struct group_balancer *gb)
{
int heaviest_ratio;
if (!gb->util) {
gb->imbalance_count = 0;
gb->need_balance = false;
goto out;
}
heaviest_ratio = gb->heaviest_util * 100 / gb->util;
if (gb->need_balance) {
if (gb->util < gb->threshold || heaviest_ratio < gb->balance_ratio) {
gb->imbalance_count = 0;
gb->need_balance = false;
}
goto out;
}
if (gb->util >= gb->threshold && heaviest_ratio > gb->imbalance_ratio) {
gb->imbalance_count++;
if (gb->imbalance_count >= gb->imbalance_duration)
gb->need_balance = true;
} else {
gb->imbalance_count = 0;
}
out:
trace_sched_tune_check_group_balance(group_idx,
gb->imbalance_count, gb->need_balance);
}
static void __schedtune_group_util_update(struct schedtune *st)
{
struct group_balancer *gb = &st->gb;
unsigned long now = cpu_rq(0)->clock_task;
struct css_task_iter it;
struct task_struct *p;
struct task_struct *heaviest_p = NULL;
unsigned long util_sum = 0;
unsigned long heaviest_util = 0;
unsigned int total = 0, accumulated = 0;
if (!raw_spin_trylock(&gb->lock))
return;
if (!gb->update_interval)
goto out;
if (time_before(now, gb->next_update_time))
goto out;
css_task_iter_start(&st->css, &it);
while ((p = css_task_iter_next(&it))) {
unsigned long clock_task, delta, util;
total++;
clock_task = task_rq(p)->clock_task;
delta = clock_task - p->se.avg.last_update_time;
if (p->se.avg.last_update_time && delta > gb->window)
continue;
util = p->se.avg.util_avg;
if (util > heaviest_util) {
heaviest_util = util;
heaviest_p = p;
}
util_sum += p->se.avg.util_avg;
accumulated++;
}
css_task_iter_end(&it);
gb->util = util_sum;
gb->heaviest_util = heaviest_util;
gb->next_update_time = now + gb->update_interval;
/* if there is no task in group, heaviest_p is always NULL */
if (heaviest_p)
trace_sched_tune_grouputil_update(st->idx, total, accumulated,
gb->util, heaviest_p, gb->heaviest_util);
check_need_group_balance(st->idx, gb);
out:
raw_spin_unlock(&gb->lock);
}
void schedtune_group_util_update(void)
{
int idx;
if (unlikely(!schedtune_initialized))
return;
rcu_read_lock();
for (idx = 1; idx < BOOSTGROUPS_COUNT; idx++) {
struct schedtune *st = allocated_group[idx];
if (!st)
continue;
__schedtune_group_util_update(st);
}
rcu_read_unlock();
}
static u64
gb_util_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
struct schedtune *st = css_st(css);
return st->gb.util;
}
static u64
gb_heaviest_ratio_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
struct schedtune *st = css_st(css);
if (!st->gb.util)
return 0;
return st->gb.heaviest_util * 100 / st->gb.util;
}
static u64
gb_threshold_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
struct schedtune *st = css_st(css);
return st->gb.threshold;
}
static int
gb_threshold_write(struct cgroup_subsys_state *css, struct cftype *cft,
u64 threshold)
{
struct schedtune *st = css_st(css);
struct group_balancer *gb = &st->gb;
raw_spin_lock(&gb->lock);
gb->threshold = threshold;
check_need_group_balance(st->idx, gb);
raw_spin_unlock(&gb->lock);
return 0;
}
static u64
gb_imbalance_ratio_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
struct schedtune *st = css_st(css);
return st->gb.imbalance_ratio;
}
static int
gb_imbalance_ratio_write(struct cgroup_subsys_state *css, struct cftype *cft,
u64 ratio)
{
struct schedtune *st = css_st(css);
struct group_balancer *gb = &st->gb;
ratio = min_t(u64, ratio, 100);
raw_spin_lock(&gb->lock);
gb->imbalance_ratio = ratio;
check_need_group_balance(st->idx, gb);
raw_spin_unlock(&gb->lock);
return 0;
}
static u64
gb_balance_ratio_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
struct schedtune *st = css_st(css);
return st->gb.balance_ratio;
}
static int
gb_balance_ratio_write(struct cgroup_subsys_state *css, struct cftype *cft,
u64 ratio)
{
struct schedtune *st = css_st(css);
struct group_balancer *gb = &st->gb;
ratio = min_t(u64, ratio, 100);
raw_spin_lock(&gb->lock);
gb->balance_ratio = ratio;
check_need_group_balance(st->idx, gb);
raw_spin_unlock(&gb->lock);
return 0;
}
static u64
gb_interval_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
struct schedtune *st = css_st(css);
return st->gb.update_interval / NSEC_PER_USEC;
}
static int
gb_interval_write(struct cgroup_subsys_state *css, struct cftype *cft,
u64 interval_us)
{
struct schedtune *st = css_st(css);
struct group_balancer *gb = &st->gb;
raw_spin_lock(&gb->lock);
gb->update_interval = interval_us * NSEC_PER_USEC;
if (!interval_us) {
gb->util = 0;
gb->need_balance = false;
}
raw_spin_unlock(&gb->lock);
return 0;
}
static u64
gb_duration_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
struct schedtune *st = css_st(css);
return st->gb.imbalance_duration;
}
static int
gb_duration_write(struct cgroup_subsys_state *css, struct cftype *cft,
u64 duration)
{
struct schedtune *st = css_st(css);
struct group_balancer *gb = &st->gb;
raw_spin_lock(&gb->lock);
gb->imbalance_duration = duration;
check_need_group_balance(st->idx, gb);
raw_spin_unlock(&gb->lock);
return 0;
}
static u64
gb_window_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
struct schedtune *st = css_st(css);
return st->gb.window / NSEC_PER_MSEC;
}
static int
gb_window_write(struct cgroup_subsys_state *css, struct cftype *cft,
u64 window)
{
struct schedtune *st = css_st(css);
struct group_balancer *gb = &st->gb;
raw_spin_lock(&gb->lock);
gb->window = window * NSEC_PER_MSEC;
raw_spin_unlock(&gb->lock);
return 0;
}
static u64
prefer_idle_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
@ -642,6 +1028,24 @@ prefer_idle_write(struct cgroup_subsys_state *css, struct cftype *cft,
return 0;
}
static u64
prefer_perf_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
struct schedtune *st = css_st(css);
return st->prefer_perf;
}
static int
prefer_perf_write(struct cgroup_subsys_state *css, struct cftype *cft,
u64 prefer_perf)
{
struct schedtune *st = css_st(css);
st->prefer_perf = prefer_perf;
return 0;
}
static s64
boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
@ -698,6 +1102,49 @@ static struct cftype files[] = {
.read_u64 = prefer_idle_read,
.write_u64 = prefer_idle_write,
},
{
.name = "prefer_perf",
.read_u64 = prefer_perf_read,
.write_u64 = prefer_perf_write,
},
{
.name = "gb_util",
.read_u64 = gb_util_read,
},
{
.name = "gb_heaviest_ratio",
.read_u64 = gb_heaviest_ratio_read,
},
{
.name = "gb_threshold",
.read_u64 = gb_threshold_read,
.write_u64 = gb_threshold_write,
},
{
.name = "gb_imbalance_ratio",
.read_u64 = gb_imbalance_ratio_read,
.write_u64 = gb_imbalance_ratio_write,
},
{
.name = "gb_balance_ratio",
.read_u64 = gb_balance_ratio_read,
.write_u64 = gb_balance_ratio_write,
},
{
.name = "gb_interval_us",
.read_u64 = gb_interval_read,
.write_u64 = gb_interval_write,
},
{
.name = "gb_duration",
.read_u64 = gb_duration_read,
.write_u64 = gb_duration_write,
},
{
.name = "gb_window_ms",
.read_u64 = gb_window_read,
.write_u64 = gb_window_write,
},
{ } /* terminate */
};
@ -719,6 +1166,22 @@ schedtune_boostgroup_init(struct schedtune *st, int idx)
st->idx = idx;
}
static void
schedtune_group_balancer_init(struct schedtune *st)
{
raw_spin_lock_init(&st->gb.lock);
st->gb.threshold = ULONG_MAX;
st->gb.imbalance_ratio = 0; /* 0% */
st->gb.update_interval = 0; /* disable update */
st->gb.next_update_time = cpu_rq(0)->clock_task;
st->gb.imbalance_duration = 0;
st->gb.imbalance_count = 0;
st->gb.window = 100 * NSEC_PER_MSEC; /* 100ms */
}
static struct cgroup_subsys_state *
schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
{
@ -748,6 +1211,8 @@ schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
if (!st)
goto out;
schedtune_group_balancer_init(st);
/* Initialize per CPUs boost group support */
schedtune_boostgroup_init(st, idx);
@ -1015,6 +1480,8 @@ schedtune_init(void)
pr_info("schedtune: configured to support global boosting only\n");
#endif
perf_threshold = find_second_max_cap();
schedtune_spc_rdiv = reciprocal_value(100);
return 0;

View file

@ -17,7 +17,13 @@ struct target_nrg {
int schedtune_cpu_boost(int cpu);
int schedtune_task_boost(struct task_struct *tsk);
void schedtune_group_util_update(void);
int schedtune_need_group_balance(struct task_struct *p);
int schedtune_perf_threshold(void);
int schedtune_prefer_idle(struct task_struct *tsk);
int schedtune_prefer_perf(struct task_struct *tsk);
void schedtune_exit_task(struct task_struct *tsk);
@ -45,6 +51,14 @@ int schedtune_accept_deltas(int nrg_delta, int cap_delta,
#define schedtune_cpu_boost(cpu) 0
#define schedtune_task_boost(tsk) 0
#define schedtune_group_util_update() do { } while (0)
#define schedtune_need_group_balance(task) 0
#define schedtune_perf_threshold() 0
#define schedtune_prefer_idle(tsk) 0
#define schedtune_prefer_perf(tsk) 0
#define schedtune_exit_task(task) do { } while (0)
#define schedtune_enqueue_task(task, cpu) do { } while (0)