commit 2fd1d2c4ceb2248a727696962cf3370dc9f5a0a4 upstream. Andrei Vagin writes: FYI: This bug has been reproduced on 4.11.7 > BUG: Dentry ffff895a3dd01240{i=4e7c09a,n=lo} still in use (1) [unmount of proc proc] > ------------[ cut here ]------------ > WARNING: CPU: 1 PID: 13588 at fs/dcache.c:1445 umount_check+0x6e/0x80 > CPU: 1 PID: 13588 Comm: kworker/1:1 Not tainted 4.11.7-200.fc25.x86_64 #1 > Hardware name: CompuLab sbc-flt1/fitlet, BIOS SBCFLT_0.08.04 06/27/2015 > Workqueue: events proc_cleanup_work > Call Trace: > dump_stack+0x63/0x86 > __warn+0xcb/0xf0 > warn_slowpath_null+0x1d/0x20 > umount_check+0x6e/0x80 > d_walk+0xc6/0x270 > ? dentry_free+0x80/0x80 > do_one_tree+0x26/0x40 > shrink_dcache_for_umount+0x2d/0x90 > generic_shutdown_super+0x1f/0xf0 > kill_anon_super+0x12/0x20 > proc_kill_sb+0x40/0x50 > deactivate_locked_super+0x43/0x70 > deactivate_super+0x5a/0x60 > cleanup_mnt+0x3f/0x90 > mntput_no_expire+0x13b/0x190 > kern_unmount+0x3e/0x50 > pid_ns_release_proc+0x15/0x20 > proc_cleanup_work+0x15/0x20 > process_one_work+0x197/0x450 > worker_thread+0x4e/0x4a0 > kthread+0x109/0x140 > ? process_one_work+0x450/0x450 > ? kthread_park+0x90/0x90 > ret_from_fork+0x2c/0x40 > ---[ end trace e1c109611e5d0b41 ]--- > VFS: Busy inodes after unmount of proc. Self-destruct in 5 seconds. Have a nice day... > BUG: unable to handle kernel NULL pointer dereference at (null) > IP: _raw_spin_lock+0xc/0x30 > PGD 0 Fix this by taking a reference to the super block in proc_sys_prune_dcache. The superblock reference is the core of the fix however the sysctl_inodes list is converted to a hlist so that hlist_del_init_rcu may be used. This allows proc_sys_prune_dache to remove inodes the sysctl_inodes list, while not causing problems for proc_sys_evict_inode when if it later choses to remove the inode from the sysctl_inodes list. Removing inodes from the sysctl_inodes list allows proc_sys_prune_dcache to have a progress guarantee, while still being able to drop all locks. The fact that head->unregistering is set in start_unregistering ensures that no more inodes will be added to the the sysctl_inodes list. Previously the code did a dance where it delayed calling iput until the next entry in the list was being considered to ensure the inode remained on the sysctl_inodes list until the next entry was walked to. The structure of the loop in this patch does not need that so is much easier to understand and maintain. Cc: stable@vger.kernel.org Reported-by: Andrei Vagin <avagin@gmail.com> Tested-by: Andrei Vagin <avagin@openvz.org> Fixes: ace0c791e6c3 ("proc/sysctl: Don't grab i_lock under sysctl_lock.") Fixes: d6cffbbe9a7e ("proc/sysctl: prune stale dentries during unregistering") Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
228 lines
7 KiB
C
228 lines
7 KiB
C
/*
|
|
* sysctl.h: General linux system control interface
|
|
*
|
|
* Begun 24 March 1995, Stephen Tweedie
|
|
*
|
|
****************************************************************
|
|
****************************************************************
|
|
**
|
|
** WARNING:
|
|
** The values in this file are exported to user space via
|
|
** the sysctl() binary interface. Do *NOT* change the
|
|
** numbering of any existing values here, and do not change
|
|
** any numbers within any one set of values. If you have to
|
|
** redefine an existing interface, use a new number for it.
|
|
** The kernel will then return -ENOTDIR to any application using
|
|
** the old binary interface.
|
|
**
|
|
****************************************************************
|
|
****************************************************************
|
|
*/
|
|
#ifndef _LINUX_SYSCTL_H
|
|
#define _LINUX_SYSCTL_H
|
|
|
|
#include <linux/list.h>
|
|
#include <linux/rcupdate.h>
|
|
#include <linux/wait.h>
|
|
#include <linux/rbtree.h>
|
|
#include <linux/uidgid.h>
|
|
#include <uapi/linux/sysctl.h>
|
|
|
|
/* For the /proc/sys support */
|
|
struct completion;
|
|
struct ctl_table;
|
|
struct nsproxy;
|
|
struct ctl_table_root;
|
|
struct ctl_table_header;
|
|
struct ctl_dir;
|
|
|
|
typedef int proc_handler (struct ctl_table *ctl, int write,
|
|
void __user *buffer, size_t *lenp, loff_t *ppos);
|
|
|
|
extern int proc_dostring(struct ctl_table *, int,
|
|
void __user *, size_t *, loff_t *);
|
|
extern int proc_dointvec(struct ctl_table *, int,
|
|
void __user *, size_t *, loff_t *);
|
|
extern int proc_douintvec(struct ctl_table *, int,
|
|
void __user *, size_t *, loff_t *);
|
|
extern int proc_dointvec_minmax(struct ctl_table *, int,
|
|
void __user *, size_t *, loff_t *);
|
|
extern int proc_dointvec_jiffies(struct ctl_table *, int,
|
|
void __user *, size_t *, loff_t *);
|
|
extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int,
|
|
void __user *, size_t *, loff_t *);
|
|
extern int proc_dointvec_ms_jiffies(struct ctl_table *, int,
|
|
void __user *, size_t *, loff_t *);
|
|
extern int proc_doulongvec_minmax(struct ctl_table *, int,
|
|
void __user *, size_t *, loff_t *);
|
|
extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int,
|
|
void __user *, size_t *, loff_t *);
|
|
extern int proc_do_large_bitmap(struct ctl_table *, int,
|
|
void __user *, size_t *, loff_t *);
|
|
|
|
/*
|
|
* Register a set of sysctl names by calling register_sysctl_table
|
|
* with an initialised array of struct ctl_table's. An entry with
|
|
* NULL procname terminates the table. table->de will be
|
|
* set up by the registration and need not be initialised in advance.
|
|
*
|
|
* sysctl names can be mirrored automatically under /proc/sys. The
|
|
* procname supplied controls /proc naming.
|
|
*
|
|
* The table's mode will be honoured both for sys_sysctl(2) and
|
|
* proc-fs access.
|
|
*
|
|
* Leaf nodes in the sysctl tree will be represented by a single file
|
|
* under /proc; non-leaf nodes will be represented by directories. A
|
|
* null procname disables /proc mirroring at this node.
|
|
*
|
|
* sysctl(2) can automatically manage read and write requests through
|
|
* the sysctl table. The data and maxlen fields of the ctl_table
|
|
* struct enable minimal validation of the values being written to be
|
|
* performed, and the mode field allows minimal authentication.
|
|
*
|
|
* There must be a proc_handler routine for any terminal nodes
|
|
* mirrored under /proc/sys (non-terminals are handled by a built-in
|
|
* directory handler). Several default handlers are available to
|
|
* cover common cases.
|
|
*/
|
|
|
|
/* Support for userspace poll() to watch for changes */
|
|
struct ctl_table_poll {
|
|
atomic_t event;
|
|
wait_queue_head_t wait;
|
|
};
|
|
|
|
static inline void *proc_sys_poll_event(struct ctl_table_poll *poll)
|
|
{
|
|
return (void *)(unsigned long)atomic_read(&poll->event);
|
|
}
|
|
|
|
#define __CTL_TABLE_POLL_INITIALIZER(name) { \
|
|
.event = ATOMIC_INIT(0), \
|
|
.wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait) }
|
|
|
|
#define DEFINE_CTL_TABLE_POLL(name) \
|
|
struct ctl_table_poll name = __CTL_TABLE_POLL_INITIALIZER(name)
|
|
|
|
/* A sysctl table is an array of struct ctl_table: */
|
|
struct ctl_table
|
|
{
|
|
const char *procname; /* Text ID for /proc/sys, or zero */
|
|
void *data;
|
|
int maxlen;
|
|
umode_t mode;
|
|
struct ctl_table *child; /* Deprecated */
|
|
proc_handler *proc_handler; /* Callback for text formatting */
|
|
struct ctl_table_poll *poll;
|
|
void *extra1;
|
|
void *extra2;
|
|
};
|
|
|
|
struct ctl_node {
|
|
struct rb_node node;
|
|
struct ctl_table_header *header;
|
|
};
|
|
|
|
/* struct ctl_table_header is used to maintain dynamic lists of
|
|
struct ctl_table trees. */
|
|
struct ctl_table_header
|
|
{
|
|
union {
|
|
struct {
|
|
struct ctl_table *ctl_table;
|
|
int used;
|
|
int count;
|
|
int nreg;
|
|
};
|
|
struct rcu_head rcu;
|
|
};
|
|
struct completion *unregistering;
|
|
struct ctl_table *ctl_table_arg;
|
|
struct ctl_table_root *root;
|
|
struct ctl_table_set *set;
|
|
struct ctl_dir *parent;
|
|
struct ctl_node *node;
|
|
struct hlist_head inodes; /* head for proc_inode->sysctl_inodes */
|
|
};
|
|
|
|
struct ctl_dir {
|
|
/* Header must be at the start of ctl_dir */
|
|
struct ctl_table_header header;
|
|
struct rb_root root;
|
|
};
|
|
|
|
struct ctl_table_set {
|
|
int (*is_seen)(struct ctl_table_set *);
|
|
struct ctl_dir dir;
|
|
};
|
|
|
|
struct ctl_table_root {
|
|
struct ctl_table_set default_set;
|
|
struct ctl_table_set *(*lookup)(struct ctl_table_root *root);
|
|
void (*set_ownership)(struct ctl_table_header *head,
|
|
struct ctl_table *table,
|
|
kuid_t *uid, kgid_t *gid);
|
|
int (*permissions)(struct ctl_table_header *head, struct ctl_table *table);
|
|
};
|
|
|
|
/* struct ctl_path describes where in the hierarchy a table is added */
|
|
struct ctl_path {
|
|
const char *procname;
|
|
};
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
|
|
void proc_sys_poll_notify(struct ctl_table_poll *poll);
|
|
|
|
extern void setup_sysctl_set(struct ctl_table_set *p,
|
|
struct ctl_table_root *root,
|
|
int (*is_seen)(struct ctl_table_set *));
|
|
extern void retire_sysctl_set(struct ctl_table_set *set);
|
|
|
|
void register_sysctl_root(struct ctl_table_root *root);
|
|
struct ctl_table_header *__register_sysctl_table(
|
|
struct ctl_table_set *set,
|
|
const char *path, struct ctl_table *table);
|
|
struct ctl_table_header *__register_sysctl_paths(
|
|
struct ctl_table_set *set,
|
|
const struct ctl_path *path, struct ctl_table *table);
|
|
struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table);
|
|
struct ctl_table_header *register_sysctl_table(struct ctl_table * table);
|
|
struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
|
|
struct ctl_table *table);
|
|
|
|
void unregister_sysctl_table(struct ctl_table_header * table);
|
|
|
|
extern int sysctl_init(void);
|
|
|
|
extern struct ctl_table sysctl_mount_point[];
|
|
|
|
#else /* CONFIG_SYSCTL */
|
|
static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline struct ctl_table_header *register_sysctl_paths(
|
|
const struct ctl_path *path, struct ctl_table *table)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline void unregister_sysctl_table(struct ctl_table_header * table)
|
|
{
|
|
}
|
|
|
|
static inline void setup_sysctl_set(struct ctl_table_set *p,
|
|
struct ctl_table_root *root,
|
|
int (*is_seen)(struct ctl_table_set *))
|
|
{
|
|
}
|
|
|
|
#endif /* CONFIG_SYSCTL */
|
|
|
|
int sysctl_max_threads(struct ctl_table *table, int write,
|
|
void __user *buffer, size_t *lenp, loff_t *ppos);
|
|
|
|
#endif /* _LINUX_SYSCTL_H */
|