FROMLIST: psi: introduce psi monitor

Psi monitor aims to provide a low-latency short-term pressure
detection mechanism configurable by users. It allows users to
monitor psi metrics growth and trigger events whenever a metric
raises above user-defined threshold within user-defined time window.

Time window and threshold are both expressed in usecs. Multiple psi
resources with different thresholds and window sizes can be monitored
concurrently.

Psi monitors activate when system enters stall state for the monitored
psi metric and deactivate upon exit from the stall state. While system
is in the stall state psi signal growth is monitored at a rate of 10 times
per tracking window. Min window size is 500ms, therefore the min monitoring
interval is 50ms. Max window size is 10s with monitoring interval of 1s.

When activated psi monitor stays active for at least the duration of one
tracking window to avoid repeated activations/deactivations when psi
signal is bouncing.

Notifications to the users are rate-limited to one per tracking window.

Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>

(not upstream yet, latest version published at: https://lore.kernel.org/patchwork/patch/1052418/)

Conflicts:
        include/linux/psi.h
        kernel/cgroup.c
        kernel/sched/psi.c

(1. replaced __poll_t with unsigned int
2. replaced EPOLLERR/EPOLLPRI with POLLERR/POLLPRI (values are the same)
3. include <linux/cgroup-defs.h> in include/linux/psi.h)

Bug: 127712811
Bug: 129157727
Test: lmkd in PSI mode
Change-Id: I1688f047e98e1f109627dad72a33d2f70e575268
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
This commit is contained in:
Suren Baghdasaryan 2018-12-03 17:36:42 -08:00
parent c405bfbb74
commit a163d3fb8a
5 changed files with 742 additions and 20 deletions

View file

@ -3499,7 +3499,65 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
{
return psi_show(seq, &seq_css(seq)->cgroup->psi, PSI_CPU);
}
#endif
static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, enum psi_res res)
{
struct psi_trigger *new;
struct cgroup *cgrp;
cgrp = cgroup_kn_lock_live(of->kn, false);
if (!cgrp)
return -ENODEV;
cgroup_get(cgrp);
cgroup_kn_unlock(of->kn);
new = psi_trigger_create(&cgrp->psi, buf, nbytes, res);
if (IS_ERR(new)) {
cgroup_put(cgrp);
return PTR_ERR(new);
}
psi_trigger_replace(&of->priv, new);
cgroup_put(cgrp);
return nbytes;
}
static ssize_t cgroup_io_pressure_write(struct kernfs_open_file *of,
char *buf, size_t nbytes,
loff_t off)
{
return cgroup_pressure_write(of, buf, nbytes, PSI_IO);
}
static ssize_t cgroup_memory_pressure_write(struct kernfs_open_file *of,
char *buf, size_t nbytes,
loff_t off)
{
return cgroup_pressure_write(of, buf, nbytes, PSI_MEM);
}
static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of,
char *buf, size_t nbytes,
loff_t off)
{
return cgroup_pressure_write(of, buf, nbytes, PSI_CPU);
}
static unsigned int cgroup_pressure_poll(struct kernfs_open_file *of,
poll_table *pt)
{
return psi_trigger_poll(&of->priv, of->file, pt);
}
static void cgroup_pressure_release(struct kernfs_open_file *of)
{
psi_trigger_replace(&of->priv, NULL);
}
#endif /* CONFIG_PSI */
static int cgroup_file_open(struct kernfs_open_file *of)
{
@ -4955,18 +5013,27 @@ static struct cftype cgroup_dfl_base_files[] = {
.name = "io.pressure",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = cgroup_io_pressure_show,
.write = cgroup_io_pressure_write,
.poll = cgroup_pressure_poll,
.release = cgroup_pressure_release,
},
{
.name = "memory.pressure",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = cgroup_memory_pressure_show,
.write = cgroup_memory_pressure_write,
.poll = cgroup_pressure_poll,
.release = cgroup_pressure_release,
},
{
.name = "cpu.pressure",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = cgroup_cpu_pressure_show,
.write = cgroup_cpu_pressure_write,
.poll = cgroup_pressure_poll,
.release = cgroup_pressure_release,
},
#endif
#endif /* CONFIG_PSI */
{ } /* terminate */
};