Merge 4.9.212 branch 'android-4.9-q' into tw10-android-4.9-q

Documentation/filesystems/fscrypt.rst arch/arm/common/Kconfig arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi arch/arm64/boot/dts/arm/juno-clocks.dtsi arch/arm64/boot/dts/broadcom/ns2.dtsi arch/arm64/boot/dts/lg/lg1312.dtsi arch/arm64/boot/dts/lg/lg1313.dtsi arch/arm64/boot/dts/marvell/armada-37xx.dtsi arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi arch/arm64/boot/dts/nvidia/tegra210.dtsi arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi arch/arm64/boot/dts/qcom/msm8996.dtsi arch/arm64/configs/ranchu64_defconfig arch/arm64/include/asm/cpucaps.h arch/arm64/kernel/cpufeature.c arch/arm64/kernel/traps.c arch/arm64/mm/mmu.c crypto/Makefile crypto/ablkcipher.c crypto/blkcipher.c crypto/testmgr.h crypto/zstd.c drivers/android/binder.c drivers/android/binder_alloc.c drivers/char/random.c drivers/clocksource/exynos_mct.c drivers/dma/pl330.c drivers/hid/hid-sony.c drivers/hid/uhid.c drivers/hid/usbhid/hiddev.c drivers/i2c/i2c-core.c drivers/md/dm-crypt.c drivers/media/v4l2-core/videobuf2-v4l2.c drivers/mmc/host/dw_mmc.c drivers/net/ethernet/broadcom/tg3.c drivers/net/usb/r8152.c drivers/scsi/scsi_logging.c drivers/scsi/sd.c drivers/scsi/ufs/ufshcd-pci.c drivers/scsi/ufs/ufshcd-pltfrm.c drivers/staging/android/Kconfig drivers/staging/android/ion/ion.c drivers/staging/android/ion/ion_priv.h drivers/staging/android/ion/ion_system_heap.c drivers/staging/android/lowmemorykiller.c drivers/tty/serial/samsung.c drivers/usb/dwc3/core.c drivers/usb/dwc3/gadget.c drivers/usb/host/xhci-hub.c drivers/video/fbdev/core/fbmon.c drivers/video/fbdev/core/modedb.c fs/crypto/fname.c fs/crypto/fscrypt_private.h fs/crypto/keyinfo.c fs/ext4/ialloc.c fs/ext4/namei.c fs/ext4/xattr.c fs/f2fs/checkpoint.c fs/f2fs/data.c fs/f2fs/debug.c fs/f2fs/dir.c fs/f2fs/f2fs.h fs/f2fs/file.c fs/f2fs/gc.c fs/f2fs/inline.c fs/f2fs/inode.c fs/f2fs/namei.c fs/f2fs/node.c fs/f2fs/recovery.c fs/f2fs/segment.c fs/f2fs/segment.h fs/f2fs/super.c fs/f2fs/sysfs.c fs/fat/dir.c fs/fat/fatent.c fs/file.c fs/namespace.c fs/pnode.c fs/proc/inode.c fs/proc/root.c fs/proc/task_mmu.c fs/sdcardfs/dentry.c fs/sdcardfs/derived_perm.c fs/sdcardfs/file.c fs/sdcardfs/inode.c fs/sdcardfs/lookup.c fs/sdcardfs/main.c fs/sdcardfs/sdcardfs.h fs/sdcardfs/super.c include/linux/blk_types.h include/linux/cpuhotplug.h include/linux/cred.h include/linux/fb.h include/linux/power_supply.h include/linux/sched.h include/linux/zstd.h include/trace/events/sched.h include/uapi/linux/android/binder.h init/Kconfig init/main.c kernel/bpf/hashtab.c kernel/cpu.c kernel/cred.c kernel/fork.c kernel/locking/spinlock_debug.c kernel/panic.c kernel/printk/printk.c kernel/sched/Makefile kernel/sched/core.c kernel/sched/fair.c kernel/sched/rt.c kernel/sched/walt.c kernel/sched/walt.h kernel/trace/trace.c lib/bug.c lib/list_debug.c lib/vsprintf.c lib/zstd/bitstream.h lib/zstd/compress.c lib/zstd/decompress.c lib/zstd/fse.h lib/zstd/fse_compress.c lib/zstd/fse_decompress.c lib/zstd/huf_compress.c lib/zstd/huf_decompress.c lib/zstd/zstd_internal.h mm/debug.c mm/filemap.c mm/rmap.c net/core/filter.c net/ipv4/sysctl_net_ipv4.c net/ipv4/sysfs_net_ipv4.c net/ipv4/tcp_input.c net/ipv4/tcp_output.c net/ipv4/udp.c net/ipv6/netfilter/nf_conntrack_reasm.c net/netfilter/Kconfig net/netfilter/Makefile net/netfilter/xt_qtaguid.c net/netfilter/xt_qtaguid_internal.h net/xfrm/xfrm_policy.c net/xfrm/xfrm_state.c scripts/checkpatch.pl security/selinux/hooks.c sound/core/compress_offload.c
2020-02-09 15:34:27 +02:00 · 2020-02-09 15:34:27 +02:00 · af1d3ae977
commit af1d3ae977
parent 2b92eefa41 a0ccc147a6
4823 changed files with 128774 additions and 44493 deletions
--- a/Documentation/ABI/obsolete/sysfs-block-zram
+++ b/Documentation/ABI/obsolete/sysfs-block-zram
@ -1,119 +0,0 @@
-What:		/sys/block/zram<id>/num_reads
-Date:		August 2015
-Contact:	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
-		The num_reads file is read-only and specifies the number of
-		reads (failed or successful) done on this device.
-		Now accessible via zram<id>/stat node.
-
-What:		/sys/block/zram<id>/num_writes
-Date:		August 2015
-Contact:	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
-		The num_writes file is read-only and specifies the number of
-		writes (failed or successful) done on this device.
-		Now accessible via zram<id>/stat node.
-
-What:		/sys/block/zram<id>/invalid_io
-Date:		August 2015
-Contact:	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
-		The invalid_io file is read-only and specifies the number of
-		non-page-size-aligned I/O requests issued to this device.
-		Now accessible via zram<id>/io_stat node.
-
-What:		/sys/block/zram<id>/failed_reads
-Date:		August 2015
-Contact:	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
-		The failed_reads file is read-only and specifies the number of
-		failed reads happened on this device.
-		Now accessible via zram<id>/io_stat node.
-
-What:		/sys/block/zram<id>/failed_writes
-Date:		August 2015
-Contact:	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
-		The failed_writes file is read-only and specifies the number of
-		failed writes happened on this device.
-		Now accessible via zram<id>/io_stat node.
-
-What:		/sys/block/zram<id>/notify_free
-Date:		August 2015
-Contact:	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
-		The notify_free file is read-only. Depending on device usage
-		scenario it may account a) the number of pages freed because
-		of swap slot free notifications or b) the number of pages freed
-		because of REQ_DISCARD requests sent by bio. The former ones
-		are sent to a swap block device when a swap slot is freed, which
-		implies that this disk is being used as a swap disk. The latter
-		ones are sent by filesystem mounted with discard option,
-		whenever some data blocks are getting discarded.
-		Now accessible via zram<id>/io_stat node.
-
-What:		/sys/block/zram<id>/zero_pages
-Date:		August 2015
-Contact:	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
-		The zero_pages file is read-only and specifies number of zero
-		filled pages written to this disk. No memory is allocated for
-		such pages.
-		Now accessible via zram<id>/mm_stat node.
-
-What:		/sys/block/zram<id>/orig_data_size
-Date:		August 2015
-Contact:	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
-		The orig_data_size file is read-only and specifies uncompressed
-		size of data stored in this disk. This excludes zero-filled
-		pages (zero_pages) since no memory is allocated for them.
-		Unit: bytes
-		Now accessible via zram<id>/mm_stat node.
-
-What:		/sys/block/zram<id>/compr_data_size
-Date:		August 2015
-Contact:	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
-		The compr_data_size file is read-only and specifies compressed
-		size of data stored in this disk. So, compression ratio can be
-		calculated using orig_data_size and this statistic.
-		Unit: bytes
-		Now accessible via zram<id>/mm_stat node.
-
-What:		/sys/block/zram<id>/mem_used_total
-Date:		August 2015
-Contact:	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
-		The mem_used_total file is read-only and specifies the amount
-		of memory, including allocator fragmentation and metadata
-		overhead, allocated for this disk. So, allocator space
-		efficiency can be calculated using compr_data_size and this
-		statistic.
-		Unit: bytes
-		Now accessible via zram<id>/mm_stat node.
-
-What:		/sys/block/zram<id>/mem_used_max
-Date:		August 2015
-Contact:	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
-		The mem_used_max file is read/write and specifies the amount
-		of maximum memory zram have consumed to store compressed data.
-		For resetting the value, you should write "0". Otherwise,
-		you could see -EINVAL.
-		Unit: bytes
-		Downgraded to write-only node: so it's possible to set new
-		value only; its current value is stored in zram<id>/mm_stat
-		node.
-
-What:		/sys/block/zram<id>/mem_limit
-Date:		August 2015
-Contact:	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
-		The mem_limit file is read/write and specifies the maximum
-		amount of memory ZRAM can use to store the compressed data.
-		The limit could be changed in run time and "0" means disable
-		the limit.  No limit is the initial state.  Unit: bytes
-		Downgraded to write-only node: so it's possible to set new
-		value only; its current value is stored in zram<id>/mm_stat
-		node.
--- a/Documentation/ABI/testing/sysfs-block-zram
+++ b/Documentation/ABI/testing/sysfs-block-zram
@ -22,41 +22,6 @@ Description:
 		device. The reset operation frees all the memory associated
 		with this device.

-What:		/sys/block/zram<id>/num_reads
-Date:		August 2010
-Contact:	Nitin Gupta <ngupta@vflare.org>
-Description:
-		The num_reads file is read-only and specifies the number of
-		reads (failed or successful) done on this device.
-
-What:		/sys/block/zram<id>/num_writes
-Date:		August 2010
-Contact:	Nitin Gupta <ngupta@vflare.org>
-Description:
-		The num_writes file is read-only and specifies the number of
-		writes (failed or successful) done on this device.
-
-What:		/sys/block/zram<id>/invalid_io
-Date:		August 2010
-Contact:	Nitin Gupta <ngupta@vflare.org>
-Description:
-		The invalid_io file is read-only and specifies the number of
-		non-page-size-aligned I/O requests issued to this device.
-
-What:		/sys/block/zram<id>/failed_reads
-Date:		February 2014
-Contact:	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
-		The failed_reads file is read-only and specifies the number of
-		failed reads happened on this device.
-
-What:		/sys/block/zram<id>/failed_writes
-Date:		February 2014
-Contact:	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
-Description:
-		The failed_writes file is read-only and specifies the number of
-		failed writes happened on this device.
-
 What:		/sys/block/zram<id>/max_comp_streams
 Date:		February 2014
 Contact:	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
@ -73,74 +38,24 @@ Description:
 		available and selected compression algorithms, change
 		compression algorithm selection.

-What:		/sys/block/zram<id>/notify_free
-Date:		August 2010
-Contact:	Nitin Gupta <ngupta@vflare.org>
-Description:
-		The notify_free file is read-only. Depending on device usage
-		scenario it may account a) the number of pages freed because
-		of swap slot free notifications or b) the number of pages freed
-		because of REQ_DISCARD requests sent by bio. The former ones
-		are sent to a swap block device when a swap slot is freed, which
-		implies that this disk is being used as a swap disk. The latter
-		ones are sent by filesystem mounted with discard option,
-		whenever some data blocks are getting discarded.
-
-What:		/sys/block/zram<id>/zero_pages
-Date:		August 2010
-Contact:	Nitin Gupta <ngupta@vflare.org>
-Description:
-		The zero_pages file is read-only and specifies number of zero
-		filled pages written to this disk. No memory is allocated for
-		such pages.
-
-What:		/sys/block/zram<id>/orig_data_size
-Date:		August 2010
-Contact:	Nitin Gupta <ngupta@vflare.org>
-Description:
-		The orig_data_size file is read-only and specifies uncompressed
-		size of data stored in this disk. This excludes zero-filled
-		pages (zero_pages) since no memory is allocated for them.
-		Unit: bytes
-
-What:		/sys/block/zram<id>/compr_data_size
-Date:		August 2010
-Contact:	Nitin Gupta <ngupta@vflare.org>
-Description:
-		The compr_data_size file is read-only and specifies compressed
-		size of data stored in this disk. So, compression ratio can be
-		calculated using orig_data_size and this statistic.
-		Unit: bytes
-
-What:		/sys/block/zram<id>/mem_used_total
-Date:		August 2010
-Contact:	Nitin Gupta <ngupta@vflare.org>
-Description:
-		The mem_used_total file is read-only and specifies the amount
-		of memory, including allocator fragmentation and metadata
-		overhead, allocated for this disk. So, allocator space
-		efficiency can be calculated using compr_data_size and this
-		statistic.
-		Unit: bytes
-
 What:		/sys/block/zram<id>/mem_used_max
 Date:		August 2014
 Contact:	Minchan Kim <minchan@kernel.org>
 Description:
-		The mem_used_max file is read/write and specifies the amount
-		of maximum memory zram have consumed to store compressed data.
-		For resetting the value, you should write "0". Otherwise,
-		you could see -EINVAL.
+		The mem_used_max file is write-only and is used to reset
+		the counter of maximum memory zram have consumed to store
+		compressed data. For resetting the value, you should write
+		"0". Otherwise, you could see -EINVAL.
 		Unit: bytes

 What:		/sys/block/zram<id>/mem_limit
 Date:		August 2014
 Contact:	Minchan Kim <minchan@kernel.org>
 Description:
-		The mem_limit file is read/write and specifies the maximum
-		amount of memory ZRAM can use to store the compressed data.  The
-		limit could be changed in run time and "0" means disable the
-		limit.  No limit is the initial state.  Unit: bytes
+		The mem_limit file is write-only and specifies the maximum
+		amount of memory ZRAM can use to store the compressed data.
+		The limit could be changed in run time and "0" means disable
+		the limit. No limit is the initial state.  Unit: bytes

 What:		/sys/block/zram<id>/compact
 Date:		August 2015
@ -175,3 +90,50 @@ Description:
 		device's debugging info useful for kernel developers. Its
 		format is not documented intentionally and may change
 		anytime without any notice.
+
+What:		/sys/block/zram<id>/backing_dev
+Date:		June 2017
+Contact:	Minchan Kim <minchan@kernel.org>
+Description:
+		The backing_dev file is read-write and set up backing
+		device for zram to write incompressible pages.
+		For using, user should enable CONFIG_ZRAM_WRITEBACK.
+
+What:		/sys/block/zram<id>/idle
+Date:		November 2018
+Contact:	Minchan Kim <minchan@kernel.org>
+Description:
+		idle file is write-only and mark zram slot as idle.
+		If system has mounted debugfs, user can see which slots
+		are idle via /sys/kernel/debug/zram/zram<id>/block_state
+
+What:		/sys/block/zram<id>/writeback
+Date:		November 2018
+Contact:	Minchan Kim <minchan@kernel.org>
+Description:
+		The writeback file is write-only and trigger idle and/or
+		huge page writeback to backing device.
+
+What:		/sys/block/zram<id>/bd_stat
+Date:		November 2018
+Contact:	Minchan Kim <minchan@kernel.org>
+Description:
+		The bd_stat file is read-only and represents backing device's
+		statistics (bd_count, bd_reads, bd_writes) in a format
+		similar to block layer statistics file format.
+
+What:		/sys/block/zram<id>/writeback_limit_enable
+Date:		November 2018
+Contact:	Minchan Kim <minchan@kernel.org>
+Description:
+		The writeback_limit_enable file is read-write and specifies
+		eanbe of writeback_limit feature. "1" means eable the feature.
+		No limit "0" is the initial state.
+
+What:		/sys/block/zram<id>/writeback_limit
+Date:		November 2018
+Contact:	Minchan Kim <minchan@kernel.org>
+Description:
+		The writeback_limit file is read-write and specifies the maximum
+		amount of writeback ZRAM can do. The limit could be changed
+		in run time.
--- a/Documentation/ABI/testing/sysfs-bus-mei
+++ b/Documentation/ABI/testing/sysfs-bus-mei
@ -4,7 +4,7 @@ KernelVersion:	3.10
 Contact:	Samuel Ortiz <sameo@linux.intel.com>
 		linux-mei@linux.intel.com
 Description:	Stores the same MODALIAS value emitted by uevent
-		Format: mei:<mei device name>:<device uuid>:
+		Format: mei:<mei device name>:<device uuid>:<protocol version>

 What:		/sys/bus/mei/devices/.../name
 Date:		May 2015
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@ -356,6 +356,10 @@ What:		/sys/devices/system/cpu/vulnerabilities
 		/sys/devices/system/cpu/vulnerabilities/spectre_v1
 		/sys/devices/system/cpu/vulnerabilities/spectre_v2
 		/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
+		/sys/devices/system/cpu/vulnerabilities/l1tf
+		/sys/devices/system/cpu/vulnerabilities/mds
+		/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+		/sys/devices/system/cpu/vulnerabilities/itlb_multihit
 Date:		January 2018
 Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:	Information about CPU vulnerabilities
@ -367,3 +371,25 @@ Description:	Information about CPU vulnerabilities
 		"Not affected"	  CPU is not affected by the vulnerability
 		"Vulnerable"	  CPU is affected and no mitigation in effect
 		"Mitigation: $M"  CPU is affected and mitigation $M is in effect
+
+		See also: Documentation/hw-vuln/index.rst
+
+What:		/sys/devices/system/cpu/smt
+		/sys/devices/system/cpu/smt/active
+		/sys/devices/system/cpu/smt/control
+Date:		June 2018
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:	Control Symetric Multi Threading (SMT)
+
+		active:  Tells whether SMT is active (enabled and siblings online)
+
+		control: Read/write interface to control SMT. Possible
+			 values:
+
+			 "on"		SMT is enabled
+			 "off"		SMT is disabled
+			 "forceoff"	SMT is force disabled. Cannot be changed.
+			 "notsupported" SMT is not supported by the CPU
+
+			 If control status is "forceoff" or "notsupported" writes
+			 are rejected.
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@ -51,6 +51,14 @@ Description:
 		 Controls the dirty page count condition for the in-place-update
 		 policies.

+What:		/sys/fs/f2fs/<disk>/min_seq_blocks
+Date:		August 2018
+Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+		 Controls the dirty page count condition for batched sequential
+		 writes in ->writepages.
+
+
 What:		/sys/fs/f2fs/<disk>/min_hot_blocks
 Date:		March 2017
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
@ -78,12 +86,28 @@ Description:
 		The unit size is one block, now only support configuring in range
 		of [1, 512].

+What:          /sys/fs/f2fs/<disk>/umount_discard_timeout
+Date:          January 2019
+Contact:       "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+		Set timeout to issue discard commands during umount.
+		Default: 5 secs
+
 What:		/sys/fs/f2fs/<disk>/max_victim_search
 Date:		January 2014
 Contact:	"Jaegeuk Kim" <jaegeuk.kim@samsung.com>
 Description:
 		 Controls the number of trials to find a victim segment.

+What:		/sys/fs/f2fs/<disk>/migration_granularity
+Date:		October 2018
+Contact:	"Chao Yu" <yuchao0@huawei.com>
+Description:
+		 Controls migration granularity of garbage collection on large
+		 section, it can let GC move partial segment{s} of one section
+		 in one GC cycle, so that dispersing heavy overhead GC to
+		 multiple lightweight one.
+
 What:		/sys/fs/f2fs/<disk>/dir_level
 Date:		March 2014
 Contact:	"Jaegeuk Kim" <jaegeuk.kim@samsung.com>
@ -113,7 +137,22 @@ What:		/sys/fs/f2fs/<disk>/idle_interval
 Date:		January 2016
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
 Description:
-		 Controls the idle timing.
+		 Controls the idle timing for all paths other than
+		 discard and gc path.
+
+What:		/sys/fs/f2fs/<disk>/discard_idle_interval
+Date:		September 2018
+Contact:	"Chao Yu" <yuchao0@huawei.com>
+Contact:	"Sahitya Tummala" <stummala@codeaurora.org>
+Description:
+		 Controls the idle timing for discard path.
+
+What:		/sys/fs/f2fs/<disk>/gc_idle_interval
+Date:		September 2018
+Contact:	"Chao Yu" <yuchao0@huawei.com>
+Contact:	"Sahitya Tummala" <stummala@codeaurora.org>
+Description:
+		 Controls the idle timing for gc path.

 What:		/sys/fs/f2fs/<disk>/iostat_enable
 Date:		August 2017
--- a/Documentation/Changes
+++ b/Documentation/Changes
@ -33,7 +33,7 @@ GNU C                  3.2              gcc --version
 GNU make               3.80             make --version
 binutils               2.12             ld -v
 util-linux             2.10o            fdformat --version
-module-init-tools      0.9.10           depmod -V
+kmod                   13               depmod -V
 e2fsprogs              1.41.4           e2fsck -V
 jfsutils               1.1.3            fsck.jfs -V
 reiserfsprogs          3.6.3            reiserfsck -V
@ -143,12 +143,6 @@ is not build with ``CONFIG_KALLSYMS`` and you have no way to rebuild and
 reproduce the Oops with that option, then you can still decode that Oops
 with ksymoops.

-Module-Init-Tools
-----------------
-
-A new module loader is now in the kernel that requires ``module-init-tools``
-to use.  It is backward compatible with the 2.4.x series kernels.
-
 Mkinitrd
 --------

@ -363,16 +357,17 @@ Util-linux

 - <ftp://ftp.kernel.org/pub/linux/utils/util-linux/>

+Kmod
+----
+
+- <https://www.kernel.org/pub/linux/utils/kernel/kmod/>
+- <https://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git>
+
 Ksymoops
 --------

 - <ftp://ftp.kernel.org/pub/linux/utils/kernel/ksymoops/v2.4/>

-Module-Init-Tools
-----------------
-
- <ftp://ftp.kernel.org/pub/linux/kernel/people/rusty/modules/>
-
 Mkinitrd
 --------

--- a/Documentation/accounting/psi.txt
+++ b/Documentation/accounting/psi.txt
@ -0,0 +1,180 @@
+================================
+PSI - Pressure Stall Information
+================================
+
+:Date: April, 2018
+:Author: Johannes Weiner <hannes@cmpxchg.org>
+
+When CPU, memory or IO devices are contended, workloads experience
+latency spikes, throughput losses, and run the risk of OOM kills.
+
+Without an accurate measure of such contention, users are forced to
+either play it safe and under-utilize their hardware resources, or
+roll the dice and frequently suffer the disruptions resulting from
+excessive overcommit.
+
+The psi feature identifies and quantifies the disruptions caused by
+such resource crunches and the time impact it has on complex workloads
+or even entire systems.
+
+Having an accurate measure of productivity losses caused by resource
+scarcity aids users in sizing workloads to hardware--or provisioning
+hardware according to workload demand.
+
+As psi aggregates this information in realtime, systems can be managed
+dynamically using techniques such as load shedding, migrating jobs to
+other systems or data centers, or strategically pausing or killing low
+priority or restartable batch jobs.
+
+This allows maximizing hardware utilization without sacrificing
+workload health or risking major disruptions such as OOM kills.
+
+Pressure interface
+==================
+
+Pressure information for each resource is exported through the
+respective file in /proc/pressure/ -- cpu, memory, and io.
+
+The format for CPU is as such:
+
+some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+
+and for memory and IO:
+
+some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+full avg10=0.00 avg60=0.00 avg300=0.00 total=0
+
+The "some" line indicates the share of time in which at least some
+tasks are stalled on a given resource.
+
+The "full" line indicates the share of time in which all non-idle
+tasks are stalled on a given resource simultaneously. In this state
+actual CPU cycles are going to waste, and a workload that spends
+extended time in this state is considered to be thrashing. This has
+severe impact on performance, and it's useful to distinguish this
+situation from a state where some tasks are stalled but the CPU is
+still doing productive work. As such, time spent in this subset of the
+stall state is tracked separately and exported in the "full" averages.
+
+The ratios are tracked as recent trends over ten, sixty, and three
+hundred second windows, which gives insight into short term events as
+well as medium and long term trends. The total absolute stall time is
+tracked and exported as well, to allow detection of latency spikes
+which wouldn't necessarily make a dent in the time averages, or to
+average trends over custom time frames.
+
+Monitoring for pressure thresholds
+==================================
+
+Users can register triggers and use poll() to be woken up when resource
+pressure exceeds certain thresholds.
+
+A trigger describes the maximum cumulative stall time over a specific
+time window, e.g. 100ms of total stall time within any 500ms window to
+generate a wakeup event.
+
+To register a trigger user has to open psi interface file under
+/proc/pressure/ representing the resource to be monitored and write the
+desired threshold and time window. The open file descriptor should be
+used to wait for trigger events using select(), poll() or epoll().
+The following format is used:
+
+<some|full> <stall amount in us> <time window in us>
+
+For example writing "some 150000 1000000" into /proc/pressure/memory
+would add 150ms threshold for partial memory stall measured within
+1sec time window. Writing "full 50000 1000000" into /proc/pressure/io
+would add 50ms threshold for full io stall measured within 1sec time window.
+
+Triggers can be set on more than one psi metric and more than one trigger
+for the same psi metric can be specified. However for each trigger a separate
+file descriptor is required to be able to poll it separately from others,
+therefore for each trigger a separate open() syscall should be made even
+when opening the same psi interface file.
+
+Monitors activate only when system enters stall state for the monitored
+psi metric and deactivates upon exit from the stall state. While system is
+in the stall state psi signal growth is monitored at a rate of 10 times per
+tracking window.
+
+The kernel accepts window sizes ranging from 500ms to 10s, therefore min
+monitoring update interval is 50ms and max is 1s. Min limit is set to
+prevent overly frequent polling. Max limit is chosen as a high enough number
+after which monitors are most likely not needed and psi averages can be used
+instead.
+
+When activated, psi monitor stays active for at least the duration of one
+tracking window to avoid repeated activations/deactivations when system is
+bouncing in and out of the stall state.
+
+Notifications to the userspace are rate-limited to one per tracking window.
+
+The trigger will de-register when the file descriptor used to define the
+trigger  is closed.
+
+Userspace monitor usage example
+===============================
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <poll.h>
+#include <string.h>
+#include <unistd.h>
+
+/*
+ * Monitor memory partial stall with 1s tracking window size
+ * and 150ms threshold.
+ */
+int main() {
+	const char trig[] = "some 150000 1000000";
+	struct pollfd fds;
+	int n;
+
+	fds.fd = open("/proc/pressure/memory", O_RDWR | O_NONBLOCK);
+	if (fds.fd < 0) {
+		printf("/proc/pressure/memory open error: %s\n",
+			strerror(errno));
+		return 1;
+	}
+	fds.events = POLLPRI;
+
+	if (write(fds.fd, trig, strlen(trig) + 1) < 0) {
+		printf("/proc/pressure/memory write error: %s\n",
+			strerror(errno));
+		return 1;
+	}
+
+	printf("waiting for events...\n");
+	while (1) {
+		n = poll(&fds, 1, -1);
+		if (n < 0) {
+			printf("poll error: %s\n", strerror(errno));
+			return 1;
+		}
+		if (fds.revents & POLLERR) {
+			printf("got POLLERR, event source is gone\n");
+			return 0;
+		}
+		if (fds.revents & POLLPRI) {
+			printf("event triggered!\n");
+		} else {
+			printf("unknown event received: 0x%x\n", fds.revents);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+Cgroup2 interface
+=================
+
+In a system with a CONFIG_CGROUP=y kernel and the cgroup2 filesystem
+mounted, pressure stall information is also tracked for tasks grouped
+into cgroups. Each subdirectory in the cgroupfs mountpoint contains
+cpu.pressure, memory.pressure, and io.pressure files; the format is
+the same as the /proc/pressure/ files.
+
+Per-cgroup psi monitors can be specified and used the same way as
+system-wide ones.
--- a/Documentation/arm/kernel_mode_neon.txt
+++ b/Documentation/arm/kernel_mode_neon.txt
@ -6,7 +6,7 @@ TL;DR summary
 * Use only NEON instructions, or VFP instructions that don't rely on support
  code
 * Isolate your NEON code in a separate compilation unit, and compile it with
-  '-mfpu=neon -mfloat-abi=softfp'
+  '-march=armv7-a -mfpu=neon -mfloat-abi=softfp'
 * Put kernel_neon_begin() and kernel_neon_end() calls around the calls into your
  NEON code
 * Don't sleep in your NEON code, and be aware that it will be executed with
@ -87,7 +87,7 @@ instructions appearing in unexpected places if no special care is taken.
 Therefore, the recommended and only supported way of using NEON/VFP in the
 kernel is by adhering to the following rules:
 * isolate the NEON code in a separate compilation unit and compile it with
-  '-mfpu=neon -mfloat-abi=softfp';
+  '-march=armv7-a -mfpu=neon -mfloat-abi=softfp';
 * issue the calls to kernel_neon_begin(), kernel_neon_end() as well as the calls
  into the unit containing the NEON code from a compilation unit which is *not*
  built with the GCC flag '-mfpu=neon' set.
--- a/Documentation/blockdev/zram.txt
+++ b/Documentation/blockdev/zram.txt
@ -156,47 +156,24 @@ Per-device statistics are exported as various nodes under /sys/block/zram<id>/
 A brief description of exported device attributes. For more details please
 read Documentation/ABI/testing/sysfs-block-zram.

-Name            access            description
----            ------            -----------
-disksize          RW    show and set the device's disk size
-initstate         RO    shows the initialization state of the device
-reset             WO    trigger device reset
-num_reads         RO    the number of reads
-failed_reads      RO    the number of failed reads
-num_write         RO    the number of writes
-failed_writes     RO    the number of failed writes
-invalid_io        RO    the number of non-page-size-aligned I/O requests
-max_comp_streams  RW    the number of possible concurrent compress operations
-comp_algorithm    RW    show and change the compression algorithm
-notify_free       RO    the number of notifications to free pages (either
-                        slot free notifications or REQ_DISCARD requests)
-zero_pages        RO    the number of zero filled pages written to this disk
-orig_data_size    RO    uncompressed size of data stored in this disk
-compr_data_size   RO    compressed size of data stored in this disk
-mem_used_total    RO    the amount of memory allocated for this disk
-mem_used_max      RW    the maximum amount of memory zram have consumed to
-                        store the data (to reset this counter to the actual
-                        current value, write 1 to this attribute)
-mem_limit         RW    the maximum amount of memory ZRAM can use to store
-                        the compressed data
-pages_compacted   RO    the number of pages freed during compaction
-                        (available only via zram<id>/mm_stat node)
-compact           WO    trigger memory compaction
-debug_stat        RO    this file is used for zram debugging purposes
+Name            	access            description
+----            	------            -----------
+disksize          	RW	show and set the device's disk size
+initstate         	RO	shows the initialization state of the device
+reset             	WO	trigger device reset
+mem_used_max      	WO	reset the `mem_used_max' counter (see later)
+mem_limit         	WO	specifies the maximum amount of memory ZRAM can use
+				to store the compressed data
+writeback_limit   	WO	specifies the maximum amount of write IO zram can
+				write out to backing device as 4KB unit
+writeback_limit_enable  RW	show and set writeback_limit feature
+max_comp_streams  	RW	the number of possible concurrent compress operations
+comp_algorithm    	RW	show and change the compression algorithm
+compact           	WO	trigger memory compaction
+debug_stat        	RO	this file is used for zram debugging purposes
+backing_dev	  	RW	set up backend storage for zram to write out
+idle		  	WO	mark allocated slot as idle

-WARNING
-=======
-per-stat sysfs attributes are considered to be deprecated.
-The basic strategy is:
-- the existing RW nodes will be downgraded to WO nodes (in linux 4.11)
-- deprecated RO sysfs nodes will eventually be removed (in linux 4.11)
-
-The list of deprecated attributes can be found here:
-Documentation/ABI/obsolete/sysfs-block-zram
-
-Basically, every attribute that has its own read accessible sysfs node
-(e.g. num_reads) *AND* is accessible via one of the stat files (zram<id>/stat
-or zram<id>/io_stat or zram<id>/mm_stat) is considered to be deprecated.

 User space is advised to use the following files to read the device statistics.

@ -211,22 +188,52 @@ The stat file represents device's I/O statistics not accounted by block
 layer and, thus, not available in zram<id>/stat file. It consists of a
 single line of text and contains the following stats separated by
 whitespace:
-	failed_reads
-	failed_writes
-	invalid_io
-	notify_free
+ failed_reads     the number of failed reads
+ failed_writes    the number of failed writes
+ invalid_io       the number of non-page-size-aligned I/O requests
+ notify_free      Depending on device usage scenario it may account
+                  a) the number of pages freed because of swap slot free
+                  notifications or b) the number of pages freed because of
+                  REQ_DISCARD requests sent by bio. The former ones are
+                  sent to a swap block device when a swap slot is freed,
+                  which implies that this disk is being used as a swap disk.
+                  The latter ones are sent by filesystem mounted with
+                  discard option, whenever some data blocks are getting
+                  discarded.

 File /sys/block/zram<id>/mm_stat

 The stat file represents device's mm statistics. It consists of a single
 line of text and contains the following stats separated by whitespace:
-	orig_data_size
-	compr_data_size
-	mem_used_total
-	mem_limit
-	mem_used_max
-	zero_pages
-	num_migrated
+ orig_data_size   uncompressed size of data stored in this disk.
+		  This excludes same-element-filled pages (same_pages) since
+		  no memory is allocated for them.
+                  Unit: bytes
+ compr_data_size  compressed size of data stored in this disk
+ mem_used_total   the amount of memory allocated for this disk. This
+                  includes allocator fragmentation and metadata overhead,
+                  allocated for this disk. So, allocator space efficiency
+                  can be calculated using compr_data_size and this statistic.
+                  Unit: bytes
+ mem_limit        the maximum amount of memory ZRAM can use to store
+                  the compressed data
+ mem_used_max     the maximum amount of memory zram have consumed to
+                  store the data
+ same_pages       the number of same element filled pages written to this disk.
+                  No memory is allocated for such pages.
+ pages_compacted  the number of pages freed during compaction
+ huge_pages	  the number of incompressible pages
+
+File /sys/block/zram<id>/bd_stat
+
+The stat file represents device's backing device statistics. It consists of
+a single line of text and contains the following stats separated by whitespace:
+ bd_count	size of data written in backing device.
+		Unit: 4K bytes
+ bd_reads	the number of reads from backing device
+		Unit: 4K bytes
+ bd_writes	the number of writes to backing device
+		Unit: 4K bytes

 9) Deactivate:
 	swapoff /dev/zram0
@ -241,5 +248,108 @@ line of text and contains the following stats separated by whitespace:
 	resets the disksize to zero. You must set the disksize again
 	before reusing the device.

+* Optional Feature
+
+= writeback
+
+With CONFIG_ZRAM_WRITEBACK, zram can write idle/incompressible page
+to backing storage rather than keeping it in memory.
+To use the feature, admin should set up backing device via
+
+	"echo /dev/sda5 > /sys/block/zramX/backing_dev"
+
+before disksize setting. It supports only partition at this moment.
+If admin want to use incompressible page writeback, they could do via
+
+	"echo huge > /sys/block/zramX/write"
+
+To use idle page writeback, first, user need to declare zram pages
+as idle.
+
+	"echo all > /sys/block/zramX/idle"
+
+From now on, any pages on zram are idle pages. The idle mark
+will be removed until someone request access of the block.
+IOW, unless there is access request, those pages are still idle pages.
+
+Admin can request writeback of those idle pages at right timing via
+
+	"echo idle > /sys/block/zramX/writeback"
+
+With the command, zram writeback idle pages from memory to the storage.
+
+If there are lots of write IO with flash device, potentially, it has
+flash wearout problem so that admin needs to design write limitation
+to guarantee storage health for entire product life.
+
+To overcome the concern, zram supports "writeback_limit" feature.
+The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
+any writeback. IOW, if admin want to apply writeback budget, he should
+enable writeback_limit_enable via
+
+	$ echo 1 > /sys/block/zramX/writeback_limit_enable
+
+Once writeback_limit_enable is set, zram doesn't allow any writeback
+until admin set the budget via /sys/block/zramX/writeback_limit.
+
+(If admin doesn't enable writeback_limit_enable, writeback_limit's value
+assigned via /sys/block/zramX/writeback_limit is meaninless.)
+
+If admin want to limit writeback as per-day 400M, he could do it
+like below.
+
+	$ MB_SHIFT=20
+	$ 4K_SHIFT=12
+	$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
+		/sys/block/zram0/writeback_limit.
+	$ echo 1 > /sys/block/zram0/writeback_limit_enable
+
+If admin want to allow further write again once the bugdet is exausted,
+he could do it like below
+
+	$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
+		/sys/block/zram0/writeback_limit
+
+If admin want to see remaining writeback budget since he set,
+
+	$ cat /sys/block/zramX/writeback_limit
+
+If admin want to disable writeback limit, he could do
+
+	$ echo 0 > /sys/block/zramX/writeback_limit_enable
+
+The writeback_limit count will reset whenever you reset zram(e.g.,
+system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
+writeback happened until you reset the zram to allocate extra writeback
+budget in next setting is user's job.
+
+If admin want to measure writeback count in a certain period, he could
+know it via /sys/block/zram0/bd_stat's 3rd column.
+
+= memory tracking
+
+With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the
+zram block. It could be useful to catch cold or incompressible
+pages of the process with*pagemap.
+If you enable the feature, you could see block state via
+/sys/kernel/debug/zram/zram0/block_state". The output is as follows,
+
+	  300    75.033841 .wh.
+	  301    63.806904 s...
+	  302    63.806919 ..hi
+
+First column is zram's block index.
+Second column is access time since the system was booted
+Third column is state of the block.
+(s: same page
+w: written page to backing store
+h: huge page
+i: idle page)
+
+First line of above example says 300th block is accessed at 75.033841sec
+and the block's state is huge so it is written back to the backing
+storage. It's a debugging feature so anyone shouldn't rely on it to work
+properly.
+
 Nitin Gupta
 ngupta@vflare.org
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@ -717,6 +717,12 @@ All time durations are in microseconds.
 	$PERIOD duration.  If only one number is written, $MAX is
 	updated.

+  cpu.pressure
+	A read-only nested-key file which exists on non-root cgroups.
+
+	Shows pressure stall information for CPU. See
+	Documentation/accounting/psi.txt for details.
+

 5-2. Memory

@ -925,6 +931,12 @@ PAGE_SIZE multiple when read back.
 	Swap usage hard limit.  If a cgroup's swap usage reaches this
 	limit, anonymous meomry of the cgroup will not be swapped out.

+  memory.pressure
+	A read-only nested-key file which exists on non-root cgroups.
+
+	Shows pressure stall information for memory. See
+	Documentation/accounting/psi.txt for details.
+

 5-2-2. Usage Guidelines

@ -1055,6 +1067,12 @@ blk-mq devices.

 	  8:16 rbps=2097152 wbps=max riops=max wiops=max

+  io.pressure
+	A read-only nested-key file which exists on non-root cgroups.
+
+	Shows pressure stall information for IO. See
+	Documentation/accounting/psi.txt for details.
+

 5-3-2. Writeback

--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@ -37,7 +37,7 @@ from load_config import loadConfig
 extensions = ['kernel-doc', 'rstFlatTable', 'kernel_include', 'cdomain']

 # The name of the math extension changed on Sphinx 1.4
-if major == 1 and minor > 3:
+if (major == 1 and minor > 3) or (major > 1):
    extensions.append("sphinx.ext.imgmath")
 else:
    extensions.append("sphinx.ext.pngmath")
--- a/Documentation/device-mapper/dm-bow.txt
+++ b/Documentation/device-mapper/dm-bow.txt
@ -0,0 +1,99 @@
+dm_bow (backup on write)
+========================
+
+dm_bow is a device mapper driver that uses the free space on a device to back up
+data that is overwritten. The changes can then be committed by a simple state
+change, or rolled back by removing the dm_bow device and running a command line
+utility over the underlying device.
+
+dm_bow has three states, set by writing ‘1’ or ‘2’ to /sys/block/dm-?/bow/state.
+It is only possible to go from state 0 (initial state) to state 1, and then from
+state 1 to state 2.
+
+State 0: dm_bow collects all trims to the device and assumes that these mark
+free space on the overlying file system that can be safely used. Typically the
+mount code would create the dm_bow device, mount the file system, call the
+FITRIM ioctl on the file system then switch to state 1. These trims are not
+propagated to the underlying device.
+
+State 1: All writes to the device cause the underlying data to be backed up to
+the free (trimmed) area as needed in such a way as they can be restored.
+However, the writes, with one exception, then happen exactly as they would
+without dm_bow, so the device is always in a good final state. The exception is
+that sector 0 is used to keep a log of the latest changes, both to indicate that
+we are in this state and to allow rollback. See below for all details. If there
+isn't enough free space, writes are failed with -ENOSPC.
+
+State 2: The transition to state 2 triggers replacing the special sector 0 with
+the normal sector 0, and the freeing of all state information. dm_bow then
+becomes a pass-through driver, allowing the device to continue to be used with
+minimal performance impact.
+
+Usage
+=====
+dm-bow takes one command line parameter, the name of the underlying device.
+
+dm-bow will typically be used in the following way. dm-bow will be loaded with a
+suitable underlying device and the resultant device will be mounted. A file
+system trim will be issued via the FITRIM ioctl, then the device will be
+switched to state 1. The file system will now be used as normal. At some point,
+the changes can either be committed by switching to state 2, or rolled back by
+unmounting the file system, removing the dm-bow device and running the command
+line utility. Note that rebooting the device will be equivalent to unmounting
+and removing, but the command line utility must still be run
+
+Details of operation in state 1
+===============================
+
+dm_bow maintains a type for all sectors. A sector can be any of:
+
+SECTOR0
+SECTOR0_CURRENT
+UNCHANGED
+FREE
+CHANGED
+BACKUP
+
+SECTOR0 is the first sector on the device, and is used to hold the log of
+changes. This is the one exception.
+
+SECTOR0_CURRENT is a sector picked from the FREE sectors, and is where reads and
+writes from the true sector zero are redirected to. Note that like any backup
+sector, if the sector is written to directly, it must be moved again.
+
+UNCHANGED means that the sector has not been changed since we entered state 1.
+Thus if it is written to or trimmed, the contents must first be backed up.
+
+FREE means that the sector was trimmed in state 0 and has not yet been written
+to or used for backup. On being written to, a FREE sector is changed to CHANGED.
+
+CHANGED means that the sector has been modified, and can be further modified
+without further backup.
+
+BACKUP means that this is a free sector being used as a backup. On being written
+to, the contents must first be backed up again.
+
+All backup operations are logged to the first sector. The log sector has the
+format:
+--------------------------------------------------------
+| Magic | Count | Sequence | Log entry | Log entry | …
+--------------------------------------------------------
+
+Magic is a magic number. Count is the number of log entries. Sequence is 0
+initially. A log entry is
+
+-----------------------------------
+| Source | Dest | Size | Checksum |
+-----------------------------------
+
+When SECTOR0 is full, the log sector is backed up and another empty log sector
+created with sequence number one higher. The first entry in any log entry with
+sequence > 0 therefore must be the log of the backing up of the previous log
+sector. Note that sequence is not strictly needed, but is a useful sanity check
+and potentially limits the time spent trying to restore a corrupted snapshot.
+
+On entering state 1, dm_bow has a list of free sectors. All other sectors are
+unchanged. Sector0_current is selected from the free sectors and the contents of
+sector 0 are copied there. The sector 0 is backed up, which triggers the first
+log entry to be written.
+
--- a/Documentation/devicetree/bindings/eeprom/eeprom.txt
+++ b/Documentation/devicetree/bindings/eeprom/eeprom.txt
@ -6,7 +6,8 @@ Required properties:

 	"atmel,24c00", "atmel,24c01", "atmel,24c02", "atmel,24c04",
 	"atmel,24c08", "atmel,24c16", "atmel,24c32", "atmel,24c64",
-	"atmel,24c128", "atmel,24c256", "atmel,24c512", "atmel,24c1024"
+	"atmel,24c128", "atmel,24c256", "atmel,24c512", "atmel,24c1024",
+	"atmel,24c2048"

 	"catalyst,24c32"

@ -17,7 +18,7 @@ Required properties:
 	 If there is no specific driver for <manufacturer>, a generic
 	 driver based on <type> is selected. Possible types are:
 	 "24c00", "24c01", "24c02", "24c04", "24c08", "24c16", "24c32", "24c64",
-	 "24c128", "24c256", "24c512", "24c1024", "spd"
+	 "24c128", "24c256", "24c512", "24c1024", "24c2048", "spd"

  - reg : the I2C address of the EEPROM

--- a/Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt
+++ b/Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt
@ -4,6 +4,7 @@ Required properties:
 - compatible: Should be one of the following:
   - "microchip,mcp2510" for MCP2510.
   - "microchip,mcp2515" for MCP2515.
+   - "microchip,mcp25625" for MCP25625.
 - reg: SPI chip select.
 - clocks: The clock feeding the CAN controller.
 - interrupt-parent: The parent interrupt controller.
--- a/Documentation/devicetree/bindings/net/macb.txt
+++ b/Documentation/devicetree/bindings/net/macb.txt
@ -10,6 +10,7 @@ Required properties:
  Use "cdns,pc302-gem" for Picochip picoXcell pc302 and later devices based on
  the Cadence GEM, or the generic form: "cdns,gem".
  Use "atmel,sama5d2-gem" for the GEM IP (10/100) available on Atmel sama5d2 SoCs.
+  Use "atmel,sama5d3-macb" for the 10/100Mbit IP available on Atmel sama5d3 SoCs.
  Use "atmel,sama5d3-gem" for the Gigabit IP available on Atmel sama5d3 SoCs.
  Use "atmel,sama5d4-gem" for the GEM IP (10/100) available on Atmel sama5d4 SoCs.
  Use "cdns,zynq-gem" Xilinx Zynq-7xxx SoC.
--- a/Documentation/devicetree/bindings/rtc/abracon,abx80x.txt
+++ b/Documentation/devicetree/bindings/rtc/abracon,abx80x.txt
@ -27,4 +27,4 @@ and valid to enable charging:

 - "abracon,tc-diode": should be "standard" (0.6V) or "schottky" (0.3V)
 - "abracon,tc-resistor": should be <0>, <3>, <6> or <11>. 0 disables the output
-                          resistor, the other values are in ohm.
+                          resistor, the other values are in kOhm.
--- a/Documentation/devicetree/bindings/serial/mvebu-uart.txt
+++ b/Documentation/devicetree/bindings/serial/mvebu-uart.txt
@ -8,6 +8,6 @@ Required properties:
 Example:
 	serial@12000 {
 		compatible = "marvell,armada-3700-uart";
-		reg = <0x12000 0x400>;
+		reg = <0x12000 0x200>;
 		interrupts = <43>;
 	};
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@ -125,6 +125,9 @@ active_logs=%u         Support configuring the number of active logs. In the
 disable_ext_identify   Disable the extension list configured by mkfs, so f2fs
                       does not aware of cold files such as media files.
 inline_xattr           Enable the inline xattrs feature.
+noinline_xattr         Disable the inline xattrs feature.
+inline_xattr_size=%u   Support configuring inline xattr size, it depends on
+		       flexible inline xattr feature.
 inline_data            Enable the inline data feature: New created small(<~3.4k)
                       files can be written into inode block.
 inline_dentry          Enable the inline dir feature: data in new created
@ -154,6 +157,27 @@ noinline_data          Disable the inline data feature, inline data feature is
                       enabled by default.
 data_flush             Enable data flushing before checkpoint in order to
                       persist data of regular and symlink.
+fault_injection=%d     Enable fault injection in all supported types with
+                       specified injection rate.
+fault_type=%d          Support configuring fault injection type, should be
+                       enabled with fault_injection option, fault type value
+                       is shown below, it supports single or combined type.
+                       Type_Name		Type_Value
+                       FAULT_KMALLOC		0x000000001
+                       FAULT_KVMALLOC		0x000000002
+                       FAULT_PAGE_ALLOC		0x000000004
+                       FAULT_PAGE_GET		0x000000008
+                       FAULT_ALLOC_BIO		0x000000010
+                       FAULT_ALLOC_NID		0x000000020
+                       FAULT_ORPHAN		0x000000040
+                       FAULT_BLOCK		0x000000080
+                       FAULT_DIR_DEPTH		0x000000100
+                       FAULT_EVICT_INODE	0x000000200
+                       FAULT_TRUNCATE		0x000000400
+                       FAULT_READ_IO		0x000000800
+                       FAULT_CHECKPOINT		0x000001000
+                       FAULT_DISCARD		0x000002000
+                       FAULT_WRITE_IO		0x000004000
 mode=%s                Control block allocation mode which supports "adaptive"
                       and "lfs". In "lfs" mode, there should be no random
                       writes towards main area.
@ -190,6 +214,11 @@ fsync_mode=%s          Control the policy of fsync. Currently supports "posix",
                       non-atomic files likewise "nobarrier" mount option.
 test_dummy_encryption  Enable dummy encryption, which provides a fake fscrypt
                       context. The fake fscrypt context is used by xfstests.
+checkpoint=%s          Set to "disable" to turn off checkpointing. Set to "enable"
+                       to reenable checkpointing. Is enabled by default. While
+                       disabled, any unmounting or unexpected shutdowns will cause
+                       the filesystem contents to appear as they did when the
+                       filesystem was mounted with that option.

 ================================================================================
 DEBUGFS ENTRIES
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@ -0,0 +1,641 @@
+=====================================
+Filesystem-level encryption (fscrypt)
+=====================================
+
+Introduction
+============
+
+fscrypt is a library which filesystems can hook into to support
+transparent encryption of files and directories.
+
+Note: "fscrypt" in this document refers to the kernel-level portion,
+implemented in ``fs/crypto/``, as opposed to the userspace tool
+`fscrypt <https://github.com/google/fscrypt>`_.  This document only
+covers the kernel-level portion.  For command-line examples of how to
+use encryption, see the documentation for the userspace tool `fscrypt
+<https://github.com/google/fscrypt>`_.  Also, it is recommended to use
+the fscrypt userspace tool, or other existing userspace tools such as
+`fscryptctl <https://github.com/google/fscryptctl>`_ or `Android's key
+management system
+<https://source.android.com/security/encryption/file-based>`_, over
+using the kernel's API directly.  Using existing tools reduces the
+chance of introducing your own security bugs.  (Nevertheless, for
+completeness this documentation covers the kernel's API anyway.)
+
+Unlike dm-crypt, fscrypt operates at the filesystem level rather than
+at the block device level.  This allows it to encrypt different files
+with different keys and to have unencrypted files on the same
+filesystem.  This is useful for multi-user systems where each user's
+data-at-rest needs to be cryptographically isolated from the others.
+However, except for filenames, fscrypt does not encrypt filesystem
+metadata.
+
+Unlike eCryptfs, which is a stacked filesystem, fscrypt is integrated
+directly into supported filesystems --- currently ext4, F2FS, and
+UBIFS.  This allows encrypted files to be read and written without
+caching both the decrypted and encrypted pages in the pagecache,
+thereby nearly halving the memory used and bringing it in line with
+unencrypted files.  Similarly, half as many dentries and inodes are
+needed.  eCryptfs also limits encrypted filenames to 143 bytes,
+causing application compatibility issues; fscrypt allows the full 255
+bytes (NAME_MAX).  Finally, unlike eCryptfs, the fscrypt API can be
+used by unprivileged users, with no need to mount anything.
+
+fscrypt does not support encrypting files in-place.  Instead, it
+supports marking an empty directory as encrypted.  Then, after
+userspace provides the key, all regular files, directories, and
+symbolic links created in that directory tree are transparently
+encrypted.
+
+Threat model
+============
+
+Offline attacks
+---------------
+
+Provided that userspace chooses a strong encryption key, fscrypt
+protects the confidentiality of file contents and filenames in the
+event of a single point-in-time permanent offline compromise of the
+block device content.  fscrypt does not protect the confidentiality of
+non-filename metadata, e.g. file sizes, file permissions, file
+timestamps, and extended attributes.  Also, the existence and location
+of holes (unallocated blocks which logically contain all zeroes) in
+files is not protected.
+
+fscrypt is not guaranteed to protect confidentiality or authenticity
+if an attacker is able to manipulate the filesystem offline prior to
+an authorized user later accessing the filesystem.
+
+Online attacks
+--------------
+
+fscrypt (and storage encryption in general) can only provide limited
+protection, if any at all, against online attacks.  In detail:
+
+fscrypt is only resistant to side-channel attacks, such as timing or
+electromagnetic attacks, to the extent that the underlying Linux
+Cryptographic API algorithms are.  If a vulnerable algorithm is used,
+such as a table-based implementation of AES, it may be possible for an
+attacker to mount a side channel attack against the online system.
+Side channel attacks may also be mounted against applications
+consuming decrypted data.
+
+After an encryption key has been provided, fscrypt is not designed to
+hide the plaintext file contents or filenames from other users on the
+same system, regardless of the visibility of the keyring key.
+Instead, existing access control mechanisms such as file mode bits,
+POSIX ACLs, LSMs, or mount namespaces should be used for this purpose.
+Also note that as long as the encryption keys are *anywhere* in
+memory, an online attacker can necessarily compromise them by mounting
+a physical attack or by exploiting any kernel security vulnerability
+which provides an arbitrary memory read primitive.
+
+While it is ostensibly possible to "evict" keys from the system,
+recently accessed encrypted files will remain accessible at least
+until the filesystem is unmounted or the VFS caches are dropped, e.g.
+using ``echo 2 > /proc/sys/vm/drop_caches``.  Even after that, if the
+RAM is compromised before being powered off, it will likely still be
+possible to recover portions of the plaintext file contents, if not
+some of the encryption keys as well.  (Since Linux v4.12, all
+in-kernel keys related to fscrypt are sanitized before being freed.
+However, userspace would need to do its part as well.)
+
+Currently, fscrypt does not prevent a user from maliciously providing
+an incorrect key for another user's existing encrypted files.  A
+protection against this is planned.
+
+Key hierarchy
+=============
+
+Master Keys
+-----------
+
+Each encrypted directory tree is protected by a *master key*.  Master
+keys can be up to 64 bytes long, and must be at least as long as the
+greater of the key length needed by the contents and filenames
+encryption modes being used.  For example, if AES-256-XTS is used for
+contents encryption, the master key must be 64 bytes (512 bits).  Note
+that the XTS mode is defined to require a key twice as long as that
+required by the underlying block cipher.
+
+To "unlock" an encrypted directory tree, userspace must provide the
+appropriate master key.  There can be any number of master keys, each
+of which protects any number of directory trees on any number of
+filesystems.
+
+Userspace should generate master keys either using a cryptographically
+secure random number generator, or by using a KDF (Key Derivation
+Function).  Note that whenever a KDF is used to "stretch" a
+lower-entropy secret such as a passphrase, it is critical that a KDF
+designed for this purpose be used, such as scrypt, PBKDF2, or Argon2.
+
+Per-file keys
+-------------
+
+Since each master key can protect many files, it is necessary to
+"tweak" the encryption of each file so that the same plaintext in two
+files doesn't map to the same ciphertext, or vice versa.  In most
+cases, fscrypt does this by deriving per-file keys.  When a new
+encrypted inode (regular file, directory, or symlink) is created,
+fscrypt randomly generates a 16-byte nonce and stores it in the
+inode's encryption xattr.  Then, it uses a KDF (Key Derivation
+Function) to derive the file's key from the master key and nonce.
+
+The Adiantum encryption mode (see `Encryption modes and usage`_) is
+special, since it accepts longer IVs and is suitable for both contents
+and filenames encryption.  For it, a "direct key" option is offered
+where the file's nonce is included in the IVs and the master key is
+used for encryption directly.  This improves performance; however,
+users must not use the same master key for any other encryption mode.
+
+Below, the KDF and design considerations are described in more detail.
+
+The current KDF works by encrypting the master key with AES-128-ECB,
+using the file's nonce as the AES key.  The output is used as the
+derived key.  If the output is longer than needed, then it is
+truncated to the needed length.
+
+Note: this KDF meets the primary security requirement, which is to
+produce unique derived keys that preserve the entropy of the master
+key, assuming that the master key is already a good pseudorandom key.
+However, it is nonstandard and has some problems such as being
+reversible, so it is generally considered to be a mistake!  It may be
+replaced with HKDF or another more standard KDF in the future.
+
+Key derivation was chosen over key wrapping because wrapped keys would
+require larger xattrs which would be less likely to fit in-line in the
+filesystem's inode table, and there didn't appear to be any
+significant advantages to key wrapping.  In particular, currently
+there is no requirement to support unlocking a file with multiple
+alternative master keys or to support rotating master keys.  Instead,
+the master keys may be wrapped in userspace, e.g. as is done by the
+`fscrypt <https://github.com/google/fscrypt>`_ tool.
+
+Including the inode number in the IVs was considered.  However, it was
+rejected as it would have prevented ext4 filesystems from being
+resized, and by itself still wouldn't have been sufficient to prevent
+the same key from being directly reused for both XTS and CTS-CBC.
+
+Encryption modes and usage
+==========================
+
+fscrypt allows one encryption mode to be specified for file contents
+and one encryption mode to be specified for filenames.  Different
+directory trees are permitted to use different encryption modes.
+Currently, the following pairs of encryption modes are supported:
+
+- AES-256-XTS for contents and AES-256-CTS-CBC for filenames
+- AES-128-CBC for contents and AES-128-CTS-CBC for filenames
+- Adiantum for both contents and filenames
+
+If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair.
+
+AES-128-CBC was added only for low-powered embedded devices with
+crypto accelerators such as CAAM or CESA that do not support XTS.
+
+Adiantum is a (primarily) stream cipher-based mode that is fast even
+on CPUs without dedicated crypto instructions.  It's also a true
+wide-block mode, unlike XTS.  It can also eliminate the need to derive
+per-file keys.  However, it depends on the security of two primitives,
+XChaCha12 and AES-256, rather than just one.  See the paper
+"Adiantum: length-preserving encryption for entry-level processors"
+(https://eprint.iacr.org/2018/720.pdf) for more details.  To use
+Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled.  Also, fast
+implementations of ChaCha and NHPoly1305 should be enabled, e.g.
+CONFIG_CRYPTO_CHACHA20_NEON and CONFIG_CRYPTO_NHPOLY1305_NEON for ARM.
+
+New encryption modes can be added relatively easily, without changes
+to individual filesystems.  However, authenticated encryption (AE)
+modes are not currently supported because of the difficulty of dealing
+with ciphertext expansion.
+
+Contents encryption
+-------------------
+
+For file contents, each filesystem block is encrypted independently.
+Currently, only the case where the filesystem block size is equal to
+the system's page size (usually 4096 bytes) is supported.
+
+Each block's IV is set to the logical block number within the file as
+a little endian number, except that:
+
+- With CBC mode encryption, ESSIV is also used.  Specifically, each IV
+  is encrypted with AES-256 where the AES-256 key is the SHA-256 hash
+  of the file's data encryption key.
+
+- In the "direct key" configuration (FS_POLICY_FLAG_DIRECT_KEY set in
+  the fscrypt_policy), the file's nonce is also appended to the IV.
+  Currently this is only allowed with the Adiantum encryption mode.
+
+Filenames encryption
+--------------------
+
+For filenames, each full filename is encrypted at once.  Because of
+the requirements to retain support for efficient directory lookups and
+filenames of up to 255 bytes, the same IV is used for every filename
+in a directory.
+
+However, each encrypted directory still uses a unique key; or
+alternatively (for the "direct key" configuration) has the file's
+nonce included in the IVs.  Thus, IV reuse is limited to within a
+single directory.
+
+With CTS-CBC, the IV reuse means that when the plaintext filenames
+share a common prefix at least as long as the cipher block size (16
+bytes for AES), the corresponding encrypted filenames will also share
+a common prefix.  This is undesirable.  Adiantum does not have this
+weakness, as it is a wide-block encryption mode.
+
+All supported filenames encryption modes accept any plaintext length
+>= 16 bytes; cipher block alignment is not required.  However,
+filenames shorter than 16 bytes are NUL-padded to 16 bytes before
+being encrypted.  In addition, to reduce leakage of filename lengths
+via their ciphertexts, all filenames are NUL-padded to the next 4, 8,
+16, or 32-byte boundary (configurable).  32 is recommended since this
+provides the best confidentiality, at the cost of making directory
+entries consume slightly more space.  Note that since NUL (``\0``) is
+not otherwise a valid character in filenames, the padding will never
+produce duplicate plaintexts.
+
+Symbolic link targets are considered a type of filename and are
+encrypted in the same way as filenames in directory entries, except
+that IV reuse is not a problem as each symlink has its own inode.
+
+User API
+========
+
+Setting an encryption policy
+----------------------------
+
+The FS_IOC_SET_ENCRYPTION_POLICY ioctl sets an encryption policy on an
+empty directory or verifies that a directory or regular file already
+has the specified encryption policy.  It takes in a pointer to a
+:c:type:`struct fscrypt_policy`, defined as follows::
+
+    #define FS_KEY_DESCRIPTOR_SIZE  8
+
+    struct fscrypt_policy {
+            __u8 version;
+            __u8 contents_encryption_mode;
+            __u8 filenames_encryption_mode;
+            __u8 flags;
+            __u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+    };
+
+This structure must be initialized as follows:
+
+- ``version`` must be 0.
+
+- ``contents_encryption_mode`` and ``filenames_encryption_mode`` must
+  be set to constants from ``<linux/fs.h>`` which identify the
+  encryption modes to use.  If unsure, use
+  FS_ENCRYPTION_MODE_AES_256_XTS (1) for ``contents_encryption_mode``
+  and FS_ENCRYPTION_MODE_AES_256_CTS (4) for
+  ``filenames_encryption_mode``.
+
+- ``flags`` must contain a value from ``<linux/fs.h>`` which
+  identifies the amount of NUL-padding to use when encrypting
+  filenames.  If unsure, use FS_POLICY_FLAGS_PAD_32 (0x3).
+  In addition, if the chosen encryption modes are both
+  FS_ENCRYPTION_MODE_ADIANTUM, this can contain
+  FS_POLICY_FLAG_DIRECT_KEY to specify that the master key should be
+  used directly, without key derivation.
+
+- ``master_key_descriptor`` specifies how to find the master key in
+  the keyring; see `Adding keys`_.  It is up to userspace to choose a
+  unique ``master_key_descriptor`` for each master key.  The e4crypt
+  and fscrypt tools use the first 8 bytes of
+  ``SHA-512(SHA-512(master_key))``, but this particular scheme is not
+  required.  Also, the master key need not be in the keyring yet when
+  FS_IOC_SET_ENCRYPTION_POLICY is executed.  However, it must be added
+  before any files can be created in the encrypted directory.
+
+If the file is not yet encrypted, then FS_IOC_SET_ENCRYPTION_POLICY
+verifies that the file is an empty directory.  If so, the specified
+encryption policy is assigned to the directory, turning it into an
+encrypted directory.  After that, and after providing the
+corresponding master key as described in `Adding keys`_, all regular
+files, directories (recursively), and symlinks created in the
+directory will be encrypted, inheriting the same encryption policy.
+The filenames in the directory's entries will be encrypted as well.
+
+Alternatively, if the file is already encrypted, then
+FS_IOC_SET_ENCRYPTION_POLICY validates that the specified encryption
+policy exactly matches the actual one.  If they match, then the ioctl
+returns 0.  Otherwise, it fails with EEXIST.  This works on both
+regular files and directories, including nonempty directories.
+
+Note that the ext4 filesystem does not allow the root directory to be
+encrypted, even if it is empty.  Users who want to encrypt an entire
+filesystem with one key should consider using dm-crypt instead.
+
+FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors:
+
+- ``EACCES``: the file is not owned by the process's uid, nor does the
+  process have the CAP_FOWNER capability in a namespace with the file
+  owner's uid mapped
+- ``EEXIST``: the file is already encrypted with an encryption policy
+  different from the one specified
+- ``EINVAL``: an invalid encryption policy was specified (invalid
+  version, mode(s), or flags)
+- ``ENOTDIR``: the file is unencrypted and is a regular file, not a
+  directory
+- ``ENOTEMPTY``: the file is unencrypted and is a nonempty directory
+- ``ENOTTY``: this type of filesystem does not implement encryption
+- ``EOPNOTSUPP``: the kernel was not configured with encryption
+  support for this filesystem, or the filesystem superblock has not
+  had encryption enabled on it.  (For example, to use encryption on an
+  ext4 filesystem, CONFIG_EXT4_ENCRYPTION must be enabled in the
+  kernel config, and the superblock must have had the "encrypt"
+  feature flag enabled using ``tune2fs -O encrypt`` or ``mkfs.ext4 -O
+  encrypt``.)
+- ``EPERM``: this directory may not be encrypted, e.g. because it is
+  the root directory of an ext4 filesystem
+- ``EROFS``: the filesystem is readonly
+
+Getting an encryption policy
+----------------------------
+
+The FS_IOC_GET_ENCRYPTION_POLICY ioctl retrieves the :c:type:`struct
+fscrypt_policy`, if any, for a directory or regular file.  See above
+for the struct definition.  No additional permissions are required
+beyond the ability to open the file.
+
+FS_IOC_GET_ENCRYPTION_POLICY can fail with the following errors:
+
+- ``EINVAL``: the file is encrypted, but it uses an unrecognized
+  encryption context format
+- ``ENODATA``: the file is not encrypted
+- ``ENOTTY``: this type of filesystem does not implement encryption
+- ``EOPNOTSUPP``: the kernel was not configured with encryption
+  support for this filesystem
+
+Note: if you only need to know whether a file is encrypted or not, on
+most filesystems it is also possible to use the FS_IOC_GETFLAGS ioctl
+and check for FS_ENCRYPT_FL, or to use the statx() system call and
+check for STATX_ATTR_ENCRYPTED in stx_attributes.
+
+Getting the per-filesystem salt
+-------------------------------
+
+Some filesystems, such as ext4 and F2FS, also support the deprecated
+ioctl FS_IOC_GET_ENCRYPTION_PWSALT.  This ioctl retrieves a randomly
+generated 16-byte value stored in the filesystem superblock.  This
+value is intended to used as a salt when deriving an encryption key
+from a passphrase or other low-entropy user credential.
+
+FS_IOC_GET_ENCRYPTION_PWSALT is deprecated.  Instead, prefer to
+generate and manage any needed salt(s) in userspace.
+
+Adding keys
+-----------
+
+To provide a master key, userspace must add it to an appropriate
+keyring using the add_key() system call (see:
+``Documentation/security/keys/core.rst``).  The key type must be
+"logon"; keys of this type are kept in kernel memory and cannot be
+read back by userspace.  The key description must be "fscrypt:"
+followed by the 16-character lower case hex representation of the
+``master_key_descriptor`` that was set in the encryption policy.  The
+key payload must conform to the following structure::
+
+    #define FS_MAX_KEY_SIZE 64
+
+    struct fscrypt_key {
+            u32 mode;
+            u8 raw[FS_MAX_KEY_SIZE];
+            u32 size;
+    };
+
+``mode`` is ignored; just set it to 0.  The actual key is provided in
+``raw`` with ``size`` indicating its size in bytes.  That is, the
+bytes ``raw[0..size-1]`` (inclusive) are the actual key.
+
+The key description prefix "fscrypt:" may alternatively be replaced
+with a filesystem-specific prefix such as "ext4:".  However, the
+filesystem-specific prefixes are deprecated and should not be used in
+new programs.
+
+There are several different types of keyrings in which encryption keys
+may be placed, such as a session keyring, a user session keyring, or a
+user keyring.  Each key must be placed in a keyring that is "attached"
+to all processes that might need to access files encrypted with it, in
+the sense that request_key() will find the key.  Generally, if only
+processes belonging to a specific user need to access a given
+encrypted directory and no session keyring has been installed, then
+that directory's key should be placed in that user's user session
+keyring or user keyring.  Otherwise, a session keyring should be
+installed if needed, and the key should be linked into that session
+keyring, or in a keyring linked into that session keyring.
+
+Note: introducing the complex visibility semantics of keyrings here
+was arguably a mistake --- especially given that by design, after any
+process successfully opens an encrypted file (thereby setting up the
+per-file key), possessing the keyring key is not actually required for
+any process to read/write the file until its in-memory inode is
+evicted.  In the future there probably should be a way to provide keys
+directly to the filesystem instead, which would make the intended
+semantics clearer.
+
+Access semantics
+================
+
+With the key
+------------
+
+With the encryption key, encrypted regular files, directories, and
+symlinks behave very similarly to their unencrypted counterparts ---
+after all, the encryption is intended to be transparent.  However,
+astute users may notice some differences in behavior:
+
+- Unencrypted files, or files encrypted with a different encryption
+  policy (i.e. different key, modes, or flags), cannot be renamed or
+  linked into an encrypted directory; see `Encryption policy
+  enforcement`_.  Attempts to do so will fail with EPERM.  However,
+  encrypted files can be renamed within an encrypted directory, or
+  into an unencrypted directory.
+
+- Direct I/O is not supported on encrypted files.  Attempts to use
+  direct I/O on such files will fall back to buffered I/O.
+
+- The fallocate operations FALLOC_FL_COLLAPSE_RANGE,
+  FALLOC_FL_INSERT_RANGE, and FALLOC_FL_ZERO_RANGE are not supported
+  on encrypted files and will fail with EOPNOTSUPP.
+
+- Online defragmentation of encrypted files is not supported.  The
+  EXT4_IOC_MOVE_EXT and F2FS_IOC_MOVE_RANGE ioctls will fail with
+  EOPNOTSUPP.
+
+- The ext4 filesystem does not support data journaling with encrypted
+  regular files.  It will fall back to ordered data mode instead.
+
+- DAX (Direct Access) is not supported on encrypted files.
+
+- The st_size of an encrypted symlink will not necessarily give the
+  length of the symlink target as required by POSIX.  It will actually
+  give the length of the ciphertext, which will be slightly longer
+  than the plaintext due to NUL-padding and an extra 2-byte overhead.
+
+- The maximum length of an encrypted symlink is 2 bytes shorter than
+  the maximum length of an unencrypted symlink.  For example, on an
+  EXT4 filesystem with a 4K block size, unencrypted symlinks can be up
+  to 4095 bytes long, while encrypted symlinks can only be up to 4093
+  bytes long (both lengths excluding the terminating null).
+
+Note that mmap *is* supported.  This is possible because the pagecache
+for an encrypted file contains the plaintext, not the ciphertext.
+
+Without the key
+---------------
+
+Some filesystem operations may be performed on encrypted regular
+files, directories, and symlinks even before their encryption key has
+been provided:
+
+- File metadata may be read, e.g. using stat().
+
+- Directories may be listed, in which case the filenames will be
+  listed in an encoded form derived from their ciphertext.  The
+  current encoding algorithm is described in `Filename hashing and
+  encoding`_.  The algorithm is subject to change, but it is
+  guaranteed that the presented filenames will be no longer than
+  NAME_MAX bytes, will not contain the ``/`` or ``\0`` characters, and
+  will uniquely identify directory entries.
+
+  The ``.`` and ``..`` directory entries are special.  They are always
+  present and are not encrypted or encoded.
+
+- Files may be deleted.  That is, nondirectory files may be deleted
+  with unlink() as usual, and empty directories may be deleted with
+  rmdir() as usual.  Therefore, ``rm`` and ``rm -r`` will work as
+  expected.
+
+- Symlink targets may be read and followed, but they will be presented
+  in encrypted form, similar to filenames in directories.  Hence, they
+  are unlikely to point to anywhere useful.
+
+Without the key, regular files cannot be opened or truncated.
+Attempts to do so will fail with ENOKEY.  This implies that any
+regular file operations that require a file descriptor, such as
+read(), write(), mmap(), fallocate(), and ioctl(), are also forbidden.
+
+Also without the key, files of any type (including directories) cannot
+be created or linked into an encrypted directory, nor can a name in an
+encrypted directory be the source or target of a rename, nor can an
+O_TMPFILE temporary file be created in an encrypted directory.  All
+such operations will fail with ENOKEY.
+
+It is not currently possible to backup and restore encrypted files
+without the encryption key.  This would require special APIs which
+have not yet been implemented.
+
+Encryption policy enforcement
+=============================
+
+After an encryption policy has been set on a directory, all regular
+files, directories, and symbolic links created in that directory
+(recursively) will inherit that encryption policy.  Special files ---
+that is, named pipes, device nodes, and UNIX domain sockets --- will
+not be encrypted.
+
+Except for those special files, it is forbidden to have unencrypted
+files, or files encrypted with a different encryption policy, in an
+encrypted directory tree.  Attempts to link or rename such a file into
+an encrypted directory will fail with EPERM.  This is also enforced
+during ->lookup() to provide limited protection against offline
+attacks that try to disable or downgrade encryption in known locations
+where applications may later write sensitive data.  It is recommended
+that systems implementing a form of "verified boot" take advantage of
+this by validating all top-level encryption policies prior to access.
+
+Implementation details
+======================
+
+Encryption context
+------------------
+
+An encryption policy is represented on-disk by a :c:type:`struct
+fscrypt_context`.  It is up to individual filesystems to decide where
+to store it, but normally it would be stored in a hidden extended
+attribute.  It should *not* be exposed by the xattr-related system
+calls such as getxattr() and setxattr() because of the special
+semantics of the encryption xattr.  (In particular, there would be
+much confusion if an encryption policy were to be added to or removed
+from anything other than an empty directory.)  The struct is defined
+as follows::
+
+    #define FS_KEY_DESCRIPTOR_SIZE  8
+    #define FS_KEY_DERIVATION_NONCE_SIZE 16
+
+    struct fscrypt_context {
+            u8 format;
+            u8 contents_encryption_mode;
+            u8 filenames_encryption_mode;
+            u8 flags;
+            u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+            u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
+    };
+
+Note that :c:type:`struct fscrypt_context` contains the same
+information as :c:type:`struct fscrypt_policy` (see `Setting an
+encryption policy`_), except that :c:type:`struct fscrypt_context`
+also contains a nonce.  The nonce is randomly generated by the kernel
+and is used to derive the inode's encryption key as described in
+`Per-file keys`_.
+
+Data path changes
+-----------------
+
+For the read path (->readpage()) of regular files, filesystems can
+read the ciphertext into the page cache and decrypt it in-place.  The
+page lock must be held until decryption has finished, to prevent the
+page from becoming visible to userspace prematurely.
+
+For the write path (->writepage()) of regular files, filesystems
+cannot encrypt data in-place in the page cache, since the cached
+plaintext must be preserved.  Instead, filesystems must encrypt into a
+temporary buffer or "bounce page", then write out the temporary
+buffer.  Some filesystems, such as UBIFS, already use temporary
+buffers regardless of encryption.  Other filesystems, such as ext4 and
+F2FS, have to allocate bounce pages specially for encryption.
+
+Filename hashing and encoding
+-----------------------------
+
+Modern filesystems accelerate directory lookups by using indexed
+directories.  An indexed directory is organized as a tree keyed by
+filename hashes.  When a ->lookup() is requested, the filesystem
+normally hashes the filename being looked up so that it can quickly
+find the corresponding directory entry, if any.
+
+With encryption, lookups must be supported and efficient both with and
+without the encryption key.  Clearly, it would not work to hash the
+plaintext filenames, since the plaintext filenames are unavailable
+without the key.  (Hashing the plaintext filenames would also make it
+impossible for the filesystem's fsck tool to optimize encrypted
+directories.)  Instead, filesystems hash the ciphertext filenames,
+i.e. the bytes actually stored on-disk in the directory entries.  When
+asked to do a ->lookup() with the key, the filesystem just encrypts
+the user-supplied name to get the ciphertext.
+
+Lookups without the key are more complicated.  The raw ciphertext may
+contain the ``\0`` and ``/`` characters, which are illegal in
+filenames.  Therefore, readdir() must base64-encode the ciphertext for
+presentation.  For most filenames, this works fine; on ->lookup(), the
+filesystem just base64-decodes the user-supplied name to get back to
+the raw ciphertext.
+
+However, for very long filenames, base64 encoding would cause the
+filename length to exceed NAME_MAX.  To prevent this, readdir()
+actually presents long filenames in an abbreviated form which encodes
+a strong "hash" of the ciphertext filename, along with the optional
+filesystem-specific hash(es) needed for directory lookups.  This
+allows the filesystem to still, with a high degree of confidence, map
+the filename given in ->lookup() back to a particular directory entry
+that was previously listed by readdir().  See :c:type:`struct
+fscrypt_digested_name` in the source for more details.
+
+Note that the precise way that filenames are presented to userspace
+without the key is subject to change in the future.  It is only meant
+as a way to temporarily present valid filenames so that commands like
+``rm -r`` work as expected on encrypted directories.
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@ -82,6 +82,29 @@ Only the lists of names from directories are merged.  Other content
 such as metadata and extended attributes are reported for the upper
 directory only.  These attributes of the lower directory are hidden.

+credentials
+-----------
+
+By default, all access to the upper, lower and work directories is the
+recorded mounter's MAC and DAC credentials.  The incoming accesses are
+checked against the caller's credentials.
+
+In the case where caller MAC or DAC credentials do not overlap, a
+use case available in older versions of the driver, the
+override_creds mount flag can be turned off and help when the use
+pattern has caller with legitimate credentials where the mounter
+does not.  Several unintended side effects will occur though.  The
+caller without certain key capabilities or lower privilege will not
+always be able to delete files or directories, create nodes, or
+search some restricted directories.  The ability to search and read
+a directory entry is spotty as a result of the cache mechanism not
+retesting the credentials because of the assumption, a privileged
+caller can fill cache, then a lower privilege can read the directory
+cache.  The uneven security model where cache, upperdir and workdir
+are opened at privilege, but accessed without creating a form of
+privilege escalation, should only be used with strict understanding
+of the side effects and of the security policies.
+
 whiteouts and opaque directories
 --------------------------------

--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@ -487,7 +487,9 @@ manner. The codes are the following:

 Note that there is no guarantee that every flag and associated mnemonic will
 be present in all further kernel releases. Things get changed, the flags may
-be vanished or the reverse -- new added.
+be vanished or the reverse -- new added. Interpretation of their meaning
+might change in future as well. So each consumer of these flags has to
+follow each specific kernel version for the exact semantic.

 The "Name" field will only be present on a mapping that has been named by
 userspace, and will show the name passed in by userspace.
--- a/Documentation/hid/uhid.txt
+++ b/Documentation/hid/uhid.txt
@ -160,7 +160,7 @@ them but you should handle them according to your needs.
  UHID_OUTPUT:
  This is sent if the HID device driver wants to send raw data to the I/O
  device on the interrupt channel. You should read the payload and forward it to
-  the device. The payload is of type "struct uhid_data_req".
+  the device. The payload is of type "struct uhid_output_req".
  This may be received even though you haven't received UHID_OPEN, yet.

  UHID_GET_REPORT:
--- a/Documentation/hw-vuln/index.rst
+++ b/Documentation/hw-vuln/index.rst
@ -0,0 +1,15 @@
+========================
+Hardware vulnerabilities
+========================
+
+This section describes CPU vulnerabilities and provides an overview of the
+possible mitigations along with guidance for selecting mitigations if they
+are configurable at compile, boot or run time.
+
+.. toctree::
+   :maxdepth: 1
+
+   l1tf
+   mds
+   tsx_async_abort
+   multihit.rst
--- a/Documentation/hw-vuln/l1tf.rst
+++ b/Documentation/hw-vuln/l1tf.rst
@ -0,0 +1,615 @@
+L1TF - L1 Terminal Fault
+========================
+
+L1 Terminal Fault is a hardware vulnerability which allows unprivileged
+speculative access to data which is available in the Level 1 Data Cache
+when the page table entry controlling the virtual address, which is used
+for the access, has the Present bit cleared or other reserved bits set.
+
+Affected processors
+-------------------
+
+This vulnerability affects a wide range of Intel processors. The
+vulnerability is not present on:
+
+   - Processors from AMD, Centaur and other non Intel vendors
+
+   - Older processor models, where the CPU family is < 6
+
+   - A range of Intel ATOM processors (Cedarview, Cloverview, Lincroft,
+     Penwell, Pineview, Silvermont, Airmont, Merrifield)
+
+   - The Intel XEON PHI family
+
+   - Intel processors which have the ARCH_CAP_RDCL_NO bit set in the
+     IA32_ARCH_CAPABILITIES MSR. If the bit is set the CPU is not affected
+     by the Meltdown vulnerability either. These CPUs should become
+     available by end of 2018.
+
+Whether a processor is affected or not can be read out from the L1TF
+vulnerability file in sysfs. See :ref:`l1tf_sys_info`.
+
+Related CVEs
+------------
+
+The following CVE entries are related to the L1TF vulnerability:
+
+   =============  =================  ==============================
+   CVE-2018-3615  L1 Terminal Fault  SGX related aspects
+   CVE-2018-3620  L1 Terminal Fault  OS, SMM related aspects
+   CVE-2018-3646  L1 Terminal Fault  Virtualization related aspects
+   =============  =================  ==============================
+
+Problem
+-------
+
+If an instruction accesses a virtual address for which the relevant page
+table entry (PTE) has the Present bit cleared or other reserved bits set,
+then speculative execution ignores the invalid PTE and loads the referenced
+data if it is present in the Level 1 Data Cache, as if the page referenced
+by the address bits in the PTE was still present and accessible.
+
+While this is a purely speculative mechanism and the instruction will raise
+a page fault when it is retired eventually, the pure act of loading the
+data and making it available to other speculative instructions opens up the
+opportunity for side channel attacks to unprivileged malicious code,
+similar to the Meltdown attack.
+
+While Meltdown breaks the user space to kernel space protection, L1TF
+allows to attack any physical memory address in the system and the attack
+works across all protection domains. It allows an attack of SGX and also
+works from inside virtual machines because the speculation bypasses the
+extended page table (EPT) protection mechanism.
+
+
+Attack scenarios
+----------------
+
+1. Malicious user space
+^^^^^^^^^^^^^^^^^^^^^^^
+
+   Operating Systems store arbitrary information in the address bits of a
+   PTE which is marked non present. This allows a malicious user space
+   application to attack the physical memory to which these PTEs resolve.
+   In some cases user-space can maliciously influence the information
+   encoded in the address bits of the PTE, thus making attacks more
+   deterministic and more practical.
+
+   The Linux kernel contains a mitigation for this attack vector, PTE
+   inversion, which is permanently enabled and has no performance
+   impact. The kernel ensures that the address bits of PTEs, which are not
+   marked present, never point to cacheable physical memory space.
+
+   A system with an up to date kernel is protected against attacks from
+   malicious user space applications.
+
+2. Malicious guest in a virtual machine
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   The fact that L1TF breaks all domain protections allows malicious guest
+   OSes, which can control the PTEs directly, and malicious guest user
+   space applications, which run on an unprotected guest kernel lacking the
+   PTE inversion mitigation for L1TF, to attack physical host memory.
+
+   A special aspect of L1TF in the context of virtualization is symmetric
+   multi threading (SMT). The Intel implementation of SMT is called
+   HyperThreading. The fact that Hyperthreads on the affected processors
+   share the L1 Data Cache (L1D) is important for this. As the flaw allows
+   only to attack data which is present in L1D, a malicious guest running
+   on one Hyperthread can attack the data which is brought into the L1D by
+   the context which runs on the sibling Hyperthread of the same physical
+   core. This context can be host OS, host user space or a different guest.
+
+   If the processor does not support Extended Page Tables, the attack is
+   only possible, when the hypervisor does not sanitize the content of the
+   effective (shadow) page tables.
+
+   While solutions exist to mitigate these attack vectors fully, these
+   mitigations are not enabled by default in the Linux kernel because they
+   can affect performance significantly. The kernel provides several
+   mechanisms which can be utilized to address the problem depending on the
+   deployment scenario. The mitigations, their protection scope and impact
+   are described in the next sections.
+
+   The default mitigations and the rationale for choosing them are explained
+   at the end of this document. See :ref:`default_mitigations`.
+
+.. _l1tf_sys_info:
+
+L1TF system information
+-----------------------
+
+The Linux kernel provides a sysfs interface to enumerate the current L1TF
+status of the system: whether the system is vulnerable, and which
+mitigations are active. The relevant sysfs file is:
+
+/sys/devices/system/cpu/vulnerabilities/l1tf
+
+The possible values in this file are:
+
+  ===========================   ===============================
+  'Not affected'		The processor is not vulnerable
+  'Mitigation: PTE Inversion'	The host protection is active
+  ===========================   ===============================
+
+If KVM/VMX is enabled and the processor is vulnerable then the following
+information is appended to the 'Mitigation: PTE Inversion' part:
+
+  - SMT status:
+
+    =====================  ================
+    'VMX: SMT vulnerable'  SMT is enabled
+    'VMX: SMT disabled'    SMT is disabled
+    =====================  ================
+
+  - L1D Flush mode:
+
+    ================================  ====================================
+    'L1D vulnerable'		      L1D flushing is disabled
+
+    'L1D conditional cache flushes'   L1D flush is conditionally enabled
+
+    'L1D cache flushes'		      L1D flush is unconditionally enabled
+    ================================  ====================================
+
+The resulting grade of protection is discussed in the following sections.
+
+
+Host mitigation mechanism
+-------------------------
+
+The kernel is unconditionally protected against L1TF attacks from malicious
+user space running on the host.
+
+
+Guest mitigation mechanisms
+---------------------------
+
+.. _l1d_flush:
+
+1. L1D flush on VMENTER
+^^^^^^^^^^^^^^^^^^^^^^^
+
+   To make sure that a guest cannot attack data which is present in the L1D
+   the hypervisor flushes the L1D before entering the guest.
+
+   Flushing the L1D evicts not only the data which should not be accessed
+   by a potentially malicious guest, it also flushes the guest
+   data. Flushing the L1D has a performance impact as the processor has to
+   bring the flushed guest data back into the L1D. Depending on the
+   frequency of VMEXIT/VMENTER and the type of computations in the guest
+   performance degradation in the range of 1% to 50% has been observed. For
+   scenarios where guest VMEXIT/VMENTER are rare the performance impact is
+   minimal. Virtio and mechanisms like posted interrupts are designed to
+   confine the VMEXITs to a bare minimum, but specific configurations and
+   application scenarios might still suffer from a high VMEXIT rate.
+
+   The kernel provides two L1D flush modes:
+    - conditional ('cond')
+    - unconditional ('always')
+
+   The conditional mode avoids L1D flushing after VMEXITs which execute
+   only audited code paths before the corresponding VMENTER. These code
+   paths have been verified that they cannot expose secrets or other
+   interesting data to an attacker, but they can leak information about the
+   address space layout of the hypervisor.
+
+   Unconditional mode flushes L1D on all VMENTER invocations and provides
+   maximum protection. It has a higher overhead than the conditional
+   mode. The overhead cannot be quantified correctly as it depends on the
+   workload scenario and the resulting number of VMEXITs.
+
+   The general recommendation is to enable L1D flush on VMENTER. The kernel
+   defaults to conditional mode on affected processors.
+
+   **Note**, that L1D flush does not prevent the SMT problem because the
+   sibling thread will also bring back its data into the L1D which makes it
+   attackable again.
+
+   L1D flush can be controlled by the administrator via the kernel command
+   line and sysfs control files. See :ref:`mitigation_control_command_line`
+   and :ref:`mitigation_control_kvm`.
+
+.. _guest_confinement:
+
+2. Guest VCPU confinement to dedicated physical cores
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   To address the SMT problem, it is possible to make a guest or a group of
+   guests affine to one or more physical cores. The proper mechanism for
+   that is to utilize exclusive cpusets to ensure that no other guest or
+   host tasks can run on these cores.
+
+   If only a single guest or related guests run on sibling SMT threads on
+   the same physical core then they can only attack their own memory and
+   restricted parts of the host memory.
+
+   Host memory is attackable, when one of the sibling SMT threads runs in
+   host OS (hypervisor) context and the other in guest context. The amount
+   of valuable information from the host OS context depends on the context
+   which the host OS executes, i.e. interrupts, soft interrupts and kernel
+   threads. The amount of valuable data from these contexts cannot be
+   declared as non-interesting for an attacker without deep inspection of
+   the code.
+
+   **Note**, that assigning guests to a fixed set of physical cores affects
+   the ability of the scheduler to do load balancing and might have
+   negative effects on CPU utilization depending on the hosting
+   scenario. Disabling SMT might be a viable alternative for particular
+   scenarios.
+
+   For further information about confining guests to a single or to a group
+   of cores consult the cpusets documentation:
+
+   https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt
+
+.. _interrupt_isolation:
+
+3. Interrupt affinity
+^^^^^^^^^^^^^^^^^^^^^
+
+   Interrupts can be made affine to logical CPUs. This is not universally
+   true because there are types of interrupts which are truly per CPU
+   interrupts, e.g. the local timer interrupt. Aside of that multi queue
+   devices affine their interrupts to single CPUs or groups of CPUs per
+   queue without allowing the administrator to control the affinities.
+
+   Moving the interrupts, which can be affinity controlled, away from CPUs
+   which run untrusted guests, reduces the attack vector space.
+
+   Whether the interrupts with are affine to CPUs, which run untrusted
+   guests, provide interesting data for an attacker depends on the system
+   configuration and the scenarios which run on the system. While for some
+   of the interrupts it can be assumed that they won't expose interesting
+   information beyond exposing hints about the host OS memory layout, there
+   is no way to make general assumptions.
+
+   Interrupt affinity can be controlled by the administrator via the
+   /proc/irq/$NR/smp_affinity[_list] files. Limited documentation is
+   available at:
+
+   https://www.kernel.org/doc/Documentation/IRQ-affinity.txt
+
+.. _smt_control:
+
+4. SMT control
+^^^^^^^^^^^^^^
+
+   To prevent the SMT issues of L1TF it might be necessary to disable SMT
+   completely. Disabling SMT can have a significant performance impact, but
+   the impact depends on the hosting scenario and the type of workloads.
+   The impact of disabling SMT needs also to be weighted against the impact
+   of other mitigation solutions like confining guests to dedicated cores.
+
+   The kernel provides a sysfs interface to retrieve the status of SMT and
+   to control it. It also provides a kernel command line interface to
+   control SMT.
+
+   The kernel command line interface consists of the following options:
+
+     =========== ==========================================================
+     nosmt	 Affects the bring up of the secondary CPUs during boot. The
+		 kernel tries to bring all present CPUs online during the
+		 boot process. "nosmt" makes sure that from each physical
+		 core only one - the so called primary (hyper) thread is
+		 activated. Due to a design flaw of Intel processors related
+		 to Machine Check Exceptions the non primary siblings have
+		 to be brought up at least partially and are then shut down
+		 again.  "nosmt" can be undone via the sysfs interface.
+
+     nosmt=force Has the same effect as "nosmt" but it does not allow to
+		 undo the SMT disable via the sysfs interface.
+     =========== ==========================================================
+
+   The sysfs interface provides two files:
+
+   - /sys/devices/system/cpu/smt/control
+   - /sys/devices/system/cpu/smt/active
+
+   /sys/devices/system/cpu/smt/control:
+
+     This file allows to read out the SMT control state and provides the
+     ability to disable or (re)enable SMT. The possible states are:
+
+	==============  ===================================================
+	on		SMT is supported by the CPU and enabled. All
+			logical CPUs can be onlined and offlined without
+			restrictions.
+
+	off		SMT is supported by the CPU and disabled. Only
+			the so called primary SMT threads can be onlined
+			and offlined without restrictions. An attempt to
+			online a non-primary sibling is rejected
+
+	forceoff	Same as 'off' but the state cannot be controlled.
+			Attempts to write to the control file are rejected.
+
+	notsupported	The processor does not support SMT. It's therefore
+			not affected by the SMT implications of L1TF.
+			Attempts to write to the control file are rejected.
+	==============  ===================================================
+
+     The possible states which can be written into this file to control SMT
+     state are:
+
+     - on
+     - off
+     - forceoff
+
+   /sys/devices/system/cpu/smt/active:
+
+     This file reports whether SMT is enabled and active, i.e. if on any
+     physical core two or more sibling threads are online.
+
+   SMT control is also possible at boot time via the l1tf kernel command
+   line parameter in combination with L1D flush control. See
+   :ref:`mitigation_control_command_line`.
+
+5. Disabling EPT
+^^^^^^^^^^^^^^^^
+
+  Disabling EPT for virtual machines provides full mitigation for L1TF even
+  with SMT enabled, because the effective page tables for guests are
+  managed and sanitized by the hypervisor. Though disabling EPT has a
+  significant performance impact especially when the Meltdown mitigation
+  KPTI is enabled.
+
+  EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter.
+
+There is ongoing research and development for new mitigation mechanisms to
+address the performance impact of disabling SMT or EPT.
+
+.. _mitigation_control_command_line:
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+The kernel command line allows to control the L1TF mitigations at boot
+time with the option "l1tf=". The valid arguments for this option are:
+
+  ============  =============================================================
+  full		Provides all available mitigations for the L1TF
+		vulnerability. Disables SMT and enables all mitigations in
+		the hypervisors, i.e. unconditional L1D flushing
+
+		SMT control and L1D flush control via the sysfs interface
+		is still possible after boot.  Hypervisors will issue a
+		warning when the first VM is started in a potentially
+		insecure configuration, i.e. SMT enabled or L1D flush
+		disabled.
+
+  full,force	Same as 'full', but disables SMT and L1D flush runtime
+		control. Implies the 'nosmt=force' command line option.
+		(i.e. sysfs control of SMT is disabled.)
+
+  flush		Leaves SMT enabled and enables the default hypervisor
+		mitigation, i.e. conditional L1D flushing
+
+		SMT control and L1D flush control via the sysfs interface
+		is still possible after boot.  Hypervisors will issue a
+		warning when the first VM is started in a potentially
+		insecure configuration, i.e. SMT enabled or L1D flush
+		disabled.
+
+  flush,nosmt	Disables SMT and enables the default hypervisor mitigation,
+		i.e. conditional L1D flushing.
+
+		SMT control and L1D flush control via the sysfs interface
+		is still possible after boot.  Hypervisors will issue a
+		warning when the first VM is started in a potentially
+		insecure configuration, i.e. SMT enabled or L1D flush
+		disabled.
+
+  flush,nowarn	Same as 'flush', but hypervisors will not warn when a VM is
+		started in a potentially insecure configuration.
+
+  off		Disables hypervisor mitigations and doesn't emit any
+		warnings.
+		It also drops the swap size and available RAM limit restrictions
+		on both hypervisor and bare metal.
+
+  ============  =============================================================
+
+The default is 'flush'. For details about L1D flushing see :ref:`l1d_flush`.
+
+
+.. _mitigation_control_kvm:
+
+Mitigation control for KVM - module parameter
+-------------------------------------------------------------
+
+The KVM hypervisor mitigation mechanism, flushing the L1D cache when
+entering a guest, can be controlled with a module parameter.
+
+The option/parameter is "kvm-intel.vmentry_l1d_flush=". It takes the
+following arguments:
+
+  ============  ==============================================================
+  always	L1D cache flush on every VMENTER.
+
+  cond		Flush L1D on VMENTER only when the code between VMEXIT and
+		VMENTER can leak host memory which is considered
+		interesting for an attacker. This still can leak host memory
+		which allows e.g. to determine the hosts address space layout.
+
+  never		Disables the mitigation
+  ============  ==============================================================
+
+The parameter can be provided on the kernel command line, as a module
+parameter when loading the modules and at runtime modified via the sysfs
+file:
+
+/sys/module/kvm_intel/parameters/vmentry_l1d_flush
+
+The default is 'cond'. If 'l1tf=full,force' is given on the kernel command
+line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush
+module parameter is ignored and writes to the sysfs file are rejected.
+
+.. _mitigation_selection:
+
+Mitigation selection guide
+--------------------------
+
+1. No virtualization in use
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   The system is protected by the kernel unconditionally and no further
+   action is required.
+
+2. Virtualization with trusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   If the guest comes from a trusted source and the guest OS kernel is
+   guaranteed to have the L1TF mitigations in place the system is fully
+   protected against L1TF and no further action is required.
+
+   To avoid the overhead of the default L1D flushing on VMENTER the
+   administrator can disable the flushing via the kernel command line and
+   sysfs control files. See :ref:`mitigation_control_command_line` and
+   :ref:`mitigation_control_kvm`.
+
+
+3. Virtualization with untrusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+3.1. SMT not supported or disabled
+""""""""""""""""""""""""""""""""""
+
+  If SMT is not supported by the processor or disabled in the BIOS or by
+  the kernel, it's only required to enforce L1D flushing on VMENTER.
+
+  Conditional L1D flushing is the default behaviour and can be tuned. See
+  :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`.
+
+3.2. EPT not supported or disabled
+""""""""""""""""""""""""""""""""""
+
+  If EPT is not supported by the processor or disabled in the hypervisor,
+  the system is fully protected. SMT can stay enabled and L1D flushing on
+  VMENTER is not required.
+
+  EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter.
+
+3.3. SMT and EPT supported and active
+"""""""""""""""""""""""""""""""""""""
+
+  If SMT and EPT are supported and active then various degrees of
+  mitigations can be employed:
+
+  - L1D flushing on VMENTER:
+
+    L1D flushing on VMENTER is the minimal protection requirement, but it
+    is only potent in combination with other mitigation methods.
+
+    Conditional L1D flushing is the default behaviour and can be tuned. See
+    :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`.
+
+  - Guest confinement:
+
+    Confinement of guests to a single or a group of physical cores which
+    are not running any other processes, can reduce the attack surface
+    significantly, but interrupts, soft interrupts and kernel threads can
+    still expose valuable data to a potential attacker. See
+    :ref:`guest_confinement`.
+
+  - Interrupt isolation:
+
+    Isolating the guest CPUs from interrupts can reduce the attack surface
+    further, but still allows a malicious guest to explore a limited amount
+    of host physical memory. This can at least be used to gain knowledge
+    about the host address space layout. The interrupts which have a fixed
+    affinity to the CPUs which run the untrusted guests can depending on
+    the scenario still trigger soft interrupts and schedule kernel threads
+    which might expose valuable information. See
+    :ref:`interrupt_isolation`.
+
+The above three mitigation methods combined can provide protection to a
+certain degree, but the risk of the remaining attack surface has to be
+carefully analyzed. For full protection the following methods are
+available:
+
+  - Disabling SMT:
+
+    Disabling SMT and enforcing the L1D flushing provides the maximum
+    amount of protection. This mitigation is not depending on any of the
+    above mitigation methods.
+
+    SMT control and L1D flushing can be tuned by the command line
+    parameters 'nosmt', 'l1tf', 'kvm-intel.vmentry_l1d_flush' and at run
+    time with the matching sysfs control files. See :ref:`smt_control`,
+    :ref:`mitigation_control_command_line` and
+    :ref:`mitigation_control_kvm`.
+
+  - Disabling EPT:
+
+    Disabling EPT provides the maximum amount of protection as well. It is
+    not depending on any of the above mitigation methods. SMT can stay
+    enabled and L1D flushing is not required, but the performance impact is
+    significant.
+
+    EPT can be disabled in the hypervisor via the 'kvm-intel.ept'
+    parameter.
+
+3.4. Nested virtual machines
+""""""""""""""""""""""""""""
+
+When nested virtualization is in use, three operating systems are involved:
+the bare metal hypervisor, the nested hypervisor and the nested virtual
+machine.  VMENTER operations from the nested hypervisor into the nested
+guest will always be processed by the bare metal hypervisor. If KVM is the
+bare metal hypervisor it will:
+
+ - Flush the L1D cache on every switch from the nested hypervisor to the
+   nested virtual machine, so that the nested hypervisor's secrets are not
+   exposed to the nested virtual machine;
+
+ - Flush the L1D cache on every switch from the nested virtual machine to
+   the nested hypervisor; this is a complex operation, and flushing the L1D
+   cache avoids that the bare metal hypervisor's secrets are exposed to the
+   nested virtual machine;
+
+ - Instruct the nested hypervisor to not perform any L1D cache flush. This
+   is an optimization to avoid double L1D flushing.
+
+
+.. _default_mitigations:
+
+Default mitigations
+-------------------
+
+  The kernel default mitigations for vulnerable processors are:
+
+  - PTE inversion to protect against malicious user space. This is done
+    unconditionally and cannot be controlled. The swap storage is limited
+    to ~16TB.
+
+  - L1D conditional flushing on VMENTER when EPT is enabled for
+    a guest.
+
+  The kernel does not by default enforce the disabling of SMT, which leaves
+  SMT systems vulnerable when running untrusted guests with EPT enabled.
+
+  The rationale for this choice is:
+
+  - Force disabling SMT can break existing setups, especially with
+    unattended updates.
+
+  - If regular users run untrusted guests on their machine, then L1TF is
+    just an add on to other malware which might be embedded in an untrusted
+    guest, e.g. spam-bots or attacks on the local network.
+
+    There is no technical way to prevent a user from running untrusted code
+    on their machines blindly.
+
+  - It's technically extremely unlikely and from today's knowledge even
+    impossible that L1TF can be exploited via the most popular attack
+    mechanisms like JavaScript because these mechanisms have no way to
+    control PTEs. If this would be possible and not other mitigation would
+    be possible, then the default might be different.
+
+  - The administrators of cloud and hosting setups have to carefully
+    analyze the risk for their scenarios and make the appropriate
+    mitigation choices, which might even vary across their deployed
+    machines and also result in other changes of their overall setup.
+    There is no way for the kernel to provide a sensible default for this
+    kind of scenarios.
--- a/Documentation/hw-vuln/mds.rst
+++ b/Documentation/hw-vuln/mds.rst
@ -0,0 +1,311 @@
+MDS - Microarchitectural Data Sampling
+======================================
+
+Microarchitectural Data Sampling is a hardware vulnerability which allows
+unprivileged speculative access to data which is available in various CPU
+internal buffers.
+
+Affected processors
+-------------------
+
+This vulnerability affects a wide range of Intel processors. The
+vulnerability is not present on:
+
+   - Processors from AMD, Centaur and other non Intel vendors
+
+   - Older processor models, where the CPU family is < 6
+
+   - Some Atoms (Bonnell, Saltwell, Goldmont, GoldmontPlus)
+
+   - Intel processors which have the ARCH_CAP_MDS_NO bit set in the
+     IA32_ARCH_CAPABILITIES MSR.
+
+Whether a processor is affected or not can be read out from the MDS
+vulnerability file in sysfs. See :ref:`mds_sys_info`.
+
+Not all processors are affected by all variants of MDS, but the mitigation
+is identical for all of them so the kernel treats them as a single
+vulnerability.
+
+Related CVEs
+------------
+
+The following CVE entries are related to the MDS vulnerability:
+
+   ==============  =====  ===================================================
+   CVE-2018-12126  MSBDS  Microarchitectural Store Buffer Data Sampling
+   CVE-2018-12130  MFBDS  Microarchitectural Fill Buffer Data Sampling
+   CVE-2018-12127  MLPDS  Microarchitectural Load Port Data Sampling
+   CVE-2019-11091  MDSUM  Microarchitectural Data Sampling Uncacheable Memory
+   ==============  =====  ===================================================
+
+Problem
+-------
+
+When performing store, load, L1 refill operations, processors write data
+into temporary microarchitectural structures (buffers). The data in the
+buffer can be forwarded to load operations as an optimization.
+
+Under certain conditions, usually a fault/assist caused by a load
+operation, data unrelated to the load memory address can be speculatively
+forwarded from the buffers. Because the load operation causes a fault or
+assist and its result will be discarded, the forwarded data will not cause
+incorrect program execution or state changes. But a malicious operation
+may be able to forward this speculative data to a disclosure gadget which
+allows in turn to infer the value via a cache side channel attack.
+
+Because the buffers are potentially shared between Hyper-Threads cross
+Hyper-Thread attacks are possible.
+
+Deeper technical information is available in the MDS specific x86
+architecture section: :ref:`Documentation/x86/mds.rst <mds>`.
+
+
+Attack scenarios
+----------------
+
+Attacks against the MDS vulnerabilities can be mounted from malicious non
+priviledged user space applications running on hosts or guest. Malicious
+guest OSes can obviously mount attacks as well.
+
+Contrary to other speculation based vulnerabilities the MDS vulnerability
+does not allow the attacker to control the memory target address. As a
+consequence the attacks are purely sampling based, but as demonstrated with
+the TLBleed attack samples can be postprocessed successfully.
+
+Web-Browsers
+^^^^^^^^^^^^
+
+  It's unclear whether attacks through Web-Browsers are possible at
+  all. The exploitation through Java-Script is considered very unlikely,
+  but other widely used web technologies like Webassembly could possibly be
+  abused.
+
+
+.. _mds_sys_info:
+
+MDS system information
+-----------------------
+
+The Linux kernel provides a sysfs interface to enumerate the current MDS
+status of the system: whether the system is vulnerable, and which
+mitigations are active. The relevant sysfs file is:
+
+/sys/devices/system/cpu/vulnerabilities/mds
+
+The possible values in this file are:
+
+  .. list-table::
+
+     * - 'Not affected'
+       - The processor is not vulnerable
+     * - 'Vulnerable'
+       - The processor is vulnerable, but no mitigation enabled
+     * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
+       - The processor is vulnerable but microcode is not updated.
+
+         The mitigation is enabled on a best effort basis. See :ref:`vmwerv`
+     * - 'Mitigation: Clear CPU buffers'
+       - The processor is vulnerable and the CPU buffer clearing mitigation is
+         enabled.
+
+If the processor is vulnerable then the following information is appended
+to the above information:
+
+    ========================  ============================================
+    'SMT vulnerable'          SMT is enabled
+    'SMT mitigated'           SMT is enabled and mitigated
+    'SMT disabled'            SMT is disabled
+    'SMT Host state unknown'  Kernel runs in a VM, Host SMT state unknown
+    ========================  ============================================
+
+.. _vmwerv:
+
+Best effort mitigation mode
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+  If the processor is vulnerable, but the availability of the microcode based
+  mitigation mechanism is not advertised via CPUID the kernel selects a best
+  effort mitigation mode.  This mode invokes the mitigation instructions
+  without a guarantee that they clear the CPU buffers.
+
+  This is done to address virtualization scenarios where the host has the
+  microcode update applied, but the hypervisor is not yet updated to expose
+  the CPUID to the guest. If the host has updated microcode the protection
+  takes effect otherwise a few cpu cycles are wasted pointlessly.
+
+  The state in the mds sysfs file reflects this situation accordingly.
+
+
+Mitigation mechanism
+-------------------------
+
+The kernel detects the affected CPUs and the presence of the microcode
+which is required.
+
+If a CPU is affected and the microcode is available, then the kernel
+enables the mitigation by default. The mitigation can be controlled at boot
+time via a kernel command line option. See
+:ref:`mds_mitigation_control_command_line`.
+
+.. _cpu_buffer_clear:
+
+CPU buffer clearing
+^^^^^^^^^^^^^^^^^^^
+
+  The mitigation for MDS clears the affected CPU buffers on return to user
+  space and when entering a guest.
+
+  If SMT is enabled it also clears the buffers on idle entry when the CPU
+  is only affected by MSBDS and not any other MDS variant, because the
+  other variants cannot be protected against cross Hyper-Thread attacks.
+
+  For CPUs which are only affected by MSBDS the user space, guest and idle
+  transition mitigations are sufficient and SMT is not affected.
+
+.. _virt_mechanism:
+
+Virtualization mitigation
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+  The protection for host to guest transition depends on the L1TF
+  vulnerability of the CPU:
+
+  - CPU is affected by L1TF:
+
+    If the L1D flush mitigation is enabled and up to date microcode is
+    available, the L1D flush mitigation is automatically protecting the
+    guest transition.
+
+    If the L1D flush mitigation is disabled then the MDS mitigation is
+    invoked explicit when the host MDS mitigation is enabled.
+
+    For details on L1TF and virtualization see:
+    :ref:`Documentation/hw-vuln//l1tf.rst <mitigation_control_kvm>`.
+
+  - CPU is not affected by L1TF:
+
+    CPU buffers are flushed before entering the guest when the host MDS
+    mitigation is enabled.
+
+  The resulting MDS protection matrix for the host to guest transition:
+
+  ============ ===== ============= ============ =================
+   L1TF         MDS   VMX-L1FLUSH   Host MDS     MDS-State
+
+   Don't care   No    Don't care    N/A          Not affected
+
+   Yes          Yes   Disabled      Off          Vulnerable
+
+   Yes          Yes   Disabled      Full         Mitigated
+
+   Yes          Yes   Enabled       Don't care   Mitigated
+
+   No           Yes   N/A           Off          Vulnerable
+
+   No           Yes   N/A           Full         Mitigated
+  ============ ===== ============= ============ =================
+
+  This only covers the host to guest transition, i.e. prevents leakage from
+  host to guest, but does not protect the guest internally. Guests need to
+  have their own protections.
+
+.. _xeon_phi:
+
+XEON PHI specific considerations
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+  The XEON PHI processor family is affected by MSBDS which can be exploited
+  cross Hyper-Threads when entering idle states. Some XEON PHI variants allow
+  to use MWAIT in user space (Ring 3) which opens an potential attack vector
+  for malicious user space. The exposure can be disabled on the kernel
+  command line with the 'ring3mwait=disable' command line option.
+
+  XEON PHI is not affected by the other MDS variants and MSBDS is mitigated
+  before the CPU enters a idle state. As XEON PHI is not affected by L1TF
+  either disabling SMT is not required for full protection.
+
+.. _mds_smt_control:
+
+SMT control
+^^^^^^^^^^^
+
+  All MDS variants except MSBDS can be attacked cross Hyper-Threads. That
+  means on CPUs which are affected by MFBDS or MLPDS it is necessary to
+  disable SMT for full protection. These are most of the affected CPUs; the
+  exception is XEON PHI, see :ref:`xeon_phi`.
+
+  Disabling SMT can have a significant performance impact, but the impact
+  depends on the type of workloads.
+
+  See the relevant chapter in the L1TF mitigation documentation for details:
+  :ref:`Documentation/hw-vuln/l1tf.rst <smt_control>`.
+
+
+.. _mds_mitigation_control_command_line:
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+The kernel command line allows to control the MDS mitigations at boot
+time with the option "mds=". The valid arguments for this option are:
+
+  ============  =============================================================
+  full		If the CPU is vulnerable, enable all available mitigations
+		for the MDS vulnerability, CPU buffer clearing on exit to
+		userspace and when entering a VM. Idle transitions are
+		protected as well if SMT is enabled.
+
+		It does not automatically disable SMT.
+
+  full,nosmt	The same as mds=full, with SMT disabled on vulnerable
+		CPUs.  This is the complete mitigation.
+
+  off		Disables MDS mitigations completely.
+
+  ============  =============================================================
+
+Not specifying this option is equivalent to "mds=full". For processors
+that are affected by both TAA (TSX Asynchronous Abort) and MDS,
+specifying just "mds=off" without an accompanying "tsx_async_abort=off"
+will have no effect as the same mitigation is used for both
+vulnerabilities.
+
+Mitigation selection guide
+--------------------------
+
+1. Trusted userspace
+^^^^^^^^^^^^^^^^^^^^
+
+   If all userspace applications are from a trusted source and do not
+   execute untrusted code which is supplied externally, then the mitigation
+   can be disabled.
+
+
+2. Virtualization with trusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   The same considerations as above versus trusted user space apply.
+
+3. Virtualization with untrusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   The protection depends on the state of the L1TF mitigations.
+   See :ref:`virt_mechanism`.
+
+   If the MDS mitigation is enabled and SMT is disabled, guest to host and
+   guest to guest attacks are prevented.
+
+.. _mds_default_mitigations:
+
+Default mitigations
+-------------------
+
+  The kernel default mitigations for vulnerable processors are:
+
+  - Enable CPU buffer clearing
+
+  The kernel does not by default enforce the disabling of SMT, which leaves
+  SMT systems vulnerable when running untrusted code. The same rationale as
+  for L1TF applies.
+  See :ref:`Documentation/hw-vuln//l1tf.rst <default_mitigations>`.
--- a/Documentation/hw-vuln/multihit.rst
+++ b/Documentation/hw-vuln/multihit.rst
@ -0,0 +1,163 @@
+iTLB multihit
+=============
+
+iTLB multihit is an erratum where some processors may incur a machine check
+error, possibly resulting in an unrecoverable CPU lockup, when an
+instruction fetch hits multiple entries in the instruction TLB. This can
+occur when the page size is changed along with either the physical address
+or cache type. A malicious guest running on a virtualized system can
+exploit this erratum to perform a denial of service attack.
+
+
+Affected processors
+-------------------
+
+Variations of this erratum are present on most Intel Core and Xeon processor
+models. The erratum is not present on:
+
+   - non-Intel processors
+
+   - Some Atoms (Airmont, Bonnell, Goldmont, GoldmontPlus, Saltwell, Silvermont)
+
+   - Intel processors that have the PSCHANGE_MC_NO bit set in the
+     IA32_ARCH_CAPABILITIES MSR.
+
+
+Related CVEs
+------------
+
+The following CVE entry is related to this issue:
+
+   ==============  =================================================
+   CVE-2018-12207  Machine Check Error Avoidance on Page Size Change
+   ==============  =================================================
+
+
+Problem
+-------
+
+Privileged software, including OS and virtual machine managers (VMM), are in
+charge of memory management. A key component in memory management is the control
+of the page tables. Modern processors use virtual memory, a technique that creates
+the illusion of a very large memory for processors. This virtual space is split
+into pages of a given size. Page tables translate virtual addresses to physical
+addresses.
+
+To reduce latency when performing a virtual to physical address translation,
+processors include a structure, called TLB, that caches recent translations.
+There are separate TLBs for instruction (iTLB) and data (dTLB).
+
+Under this errata, instructions are fetched from a linear address translated
+using a 4 KB translation cached in the iTLB. Privileged software modifies the
+paging structure so that the same linear address using large page size (2 MB, 4
+MB, 1 GB) with a different physical address or memory type.  After the page
+structure modification but before the software invalidates any iTLB entries for
+the linear address, a code fetch that happens on the same linear address may
+cause a machine-check error which can result in a system hang or shutdown.
+
+
+Attack scenarios
+----------------
+
+Attacks against the iTLB multihit erratum can be mounted from malicious
+guests in a virtualized system.
+
+
+iTLB multihit system information
+--------------------------------
+
+The Linux kernel provides a sysfs interface to enumerate the current iTLB
+multihit status of the system:whether the system is vulnerable and which
+mitigations are active. The relevant sysfs file is:
+
+/sys/devices/system/cpu/vulnerabilities/itlb_multihit
+
+The possible values in this file are:
+
+.. list-table::
+
+     * - Not affected
+       - The processor is not vulnerable.
+     * - KVM: Mitigation: Split huge pages
+       - Software changes mitigate this issue.
+     * - KVM: Vulnerable
+       - The processor is vulnerable, but no mitigation enabled
+
+
+Enumeration of the erratum
+--------------------------------
+
+A new bit has been allocated in the IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) msr
+and will be set on CPU's which are mitigated against this issue.
+
+   =======================================   ===========   ===============================
+   IA32_ARCH_CAPABILITIES MSR                Not present   Possibly vulnerable,check model
+   IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO]    '0'           Likely vulnerable,check model
+   IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO]    '1'           Not vulnerable
+   =======================================   ===========   ===============================
+
+
+Mitigation mechanism
+-------------------------
+
+This erratum can be mitigated by restricting the use of large page sizes to
+non-executable pages.  This forces all iTLB entries to be 4K, and removes
+the possibility of multiple hits.
+
+In order to mitigate the vulnerability, KVM initially marks all huge pages
+as non-executable. If the guest attempts to execute in one of those pages,
+the page is broken down into 4K pages, which are then marked executable.
+
+If EPT is disabled or not available on the host, KVM is in control of TLB
+flushes and the problematic situation cannot happen.  However, the shadow
+EPT paging mechanism used by nested virtualization is vulnerable, because
+the nested guest can trigger multiple iTLB hits by modifying its own
+(non-nested) page tables.  For simplicity, KVM will make large pages
+non-executable in all shadow paging modes.
+
+Mitigation control on the kernel command line and KVM - module parameter
+------------------------------------------------------------------------
+
+The KVM hypervisor mitigation mechanism for marking huge pages as
+non-executable can be controlled with a module parameter "nx_huge_pages=".
+The kernel command line allows to control the iTLB multihit mitigations at
+boot time with the option "kvm.nx_huge_pages=".
+
+The valid arguments for these options are:
+
+  ==========  ================================================================
+  force       Mitigation is enabled. In this case, the mitigation implements
+              non-executable huge pages in Linux kernel KVM module. All huge
+              pages in the EPT are marked as non-executable.
+              If a guest attempts to execute in one of those pages, the page is
+              broken down into 4K pages, which are then marked executable.
+
+  off	      Mitigation is disabled.
+
+  auto        Enable mitigation only if the platform is affected and the kernel
+              was not booted with the "mitigations=off" command line parameter.
+	      This is the default option.
+  ==========  ================================================================
+
+
+Mitigation selection guide
+--------------------------
+
+1. No virtualization in use
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   The system is protected by the kernel unconditionally and no further
+   action is required.
+
+2. Virtualization with trusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   If the guest comes from a trusted source, you may assume that the guest will
+   not attempt to maliciously exploit these errata and no further action is
+   required.
+
+3. Virtualization with untrusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+   If the guest comes from an untrusted source, the guest host kernel will need
+   to apply iTLB multihit mitigation via the kernel command line or kvm
+   module parameter.
--- a/Documentation/hw-vuln/tsx_async_abort.rst
+++ b/Documentation/hw-vuln/tsx_async_abort.rst
@ -0,0 +1,279 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+TAA - TSX Asynchronous Abort
+======================================
+
+TAA is a hardware vulnerability that allows unprivileged speculative access to
+data which is available in various CPU internal buffers by using asynchronous
+aborts within an Intel TSX transactional region.
+
+Affected processors
+-------------------
+
+This vulnerability only affects Intel processors that support Intel
+Transactional Synchronization Extensions (TSX) when the TAA_NO bit (bit 8)
+is 0 in the IA32_ARCH_CAPABILITIES MSR.  On processors where the MDS_NO bit
+(bit 5) is 0 in the IA32_ARCH_CAPABILITIES MSR, the existing MDS mitigations
+also mitigate against TAA.
+
+Whether a processor is affected or not can be read out from the TAA
+vulnerability file in sysfs. See :ref:`tsx_async_abort_sys_info`.
+
+Related CVEs
+------------
+
+The following CVE entry is related to this TAA issue:
+
+   ==============  =====  ===================================================
+   CVE-2019-11135  TAA    TSX Asynchronous Abort (TAA) condition on some
+                          microprocessors utilizing speculative execution may
+                          allow an authenticated user to potentially enable
+                          information disclosure via a side channel with
+                          local access.
+   ==============  =====  ===================================================
+
+Problem
+-------
+
+When performing store, load or L1 refill operations, processors write
+data into temporary microarchitectural structures (buffers). The data in
+those buffers can be forwarded to load operations as an optimization.
+
+Intel TSX is an extension to the x86 instruction set architecture that adds
+hardware transactional memory support to improve performance of multi-threaded
+software. TSX lets the processor expose and exploit concurrency hidden in an
+application due to dynamically avoiding unnecessary synchronization.
+
+TSX supports atomic memory transactions that are either committed (success) or
+aborted. During an abort, operations that happened within the transactional region
+are rolled back. An asynchronous abort takes place, among other options, when a
+different thread accesses a cache line that is also used within the transactional
+region when that access might lead to a data race.
+
+Immediately after an uncompleted asynchronous abort, certain speculatively
+executed loads may read data from those internal buffers and pass it to dependent
+operations. This can be then used to infer the value via a cache side channel
+attack.
+
+Because the buffers are potentially shared between Hyper-Threads cross
+Hyper-Thread attacks are possible.
+
+The victim of a malicious actor does not need to make use of TSX. Only the
+attacker needs to begin a TSX transaction and raise an asynchronous abort
+which in turn potenitally leaks data stored in the buffers.
+
+More detailed technical information is available in the TAA specific x86
+architecture section: :ref:`Documentation/x86/tsx_async_abort.rst <tsx_async_abort>`.
+
+
+Attack scenarios
+----------------
+
+Attacks against the TAA vulnerability can be implemented from unprivileged
+applications running on hosts or guests.
+
+As for MDS, the attacker has no control over the memory addresses that can
+be leaked. Only the victim is responsible for bringing data to the CPU. As
+a result, the malicious actor has to sample as much data as possible and
+then postprocess it to try to infer any useful information from it.
+
+A potential attacker only has read access to the data. Also, there is no direct
+privilege escalation by using this technique.
+
+
+.. _tsx_async_abort_sys_info:
+
+TAA system information
+-----------------------
+
+The Linux kernel provides a sysfs interface to enumerate the current TAA status
+of mitigated systems. The relevant sysfs file is:
+
+/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+
+The possible values in this file are:
+
+.. list-table::
+
+   * - 'Vulnerable'
+     - The CPU is affected by this vulnerability and the microcode and kernel mitigation are not applied.
+   * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
+     - The system tries to clear the buffers but the microcode might not support the operation.
+   * - 'Mitigation: Clear CPU buffers'
+     - The microcode has been updated to clear the buffers. TSX is still enabled.
+   * - 'Mitigation: TSX disabled'
+     - TSX is disabled.
+   * - 'Not affected'
+     - The CPU is not affected by this issue.
+
+.. _ucode_needed:
+
+Best effort mitigation mode
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If the processor is vulnerable, but the availability of the microcode-based
+mitigation mechanism is not advertised via CPUID the kernel selects a best
+effort mitigation mode.  This mode invokes the mitigation instructions
+without a guarantee that they clear the CPU buffers.
+
+This is done to address virtualization scenarios where the host has the
+microcode update applied, but the hypervisor is not yet updated to expose the
+CPUID to the guest. If the host has updated microcode the protection takes
+effect; otherwise a few CPU cycles are wasted pointlessly.
+
+The state in the tsx_async_abort sysfs file reflects this situation
+accordingly.
+
+
+Mitigation mechanism
+--------------------
+
+The kernel detects the affected CPUs and the presence of the microcode which is
+required. If a CPU is affected and the microcode is available, then the kernel
+enables the mitigation by default.
+
+
+The mitigation can be controlled at boot time via a kernel command line option.
+See :ref:`taa_mitigation_control_command_line`.
+
+.. _virt_mechanism:
+
+Virtualization mitigation
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Affected systems where the host has TAA microcode and TAA is mitigated by
+having disabled TSX previously, are not vulnerable regardless of the status
+of the VMs.
+
+In all other cases, if the host either does not have the TAA microcode or
+the kernel is not mitigated, the system might be vulnerable.
+
+
+.. _taa_mitigation_control_command_line:
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+The kernel command line allows to control the TAA mitigations at boot time with
+the option "tsx_async_abort=". The valid arguments for this option are:
+
+  ============  =============================================================
+  off		This option disables the TAA mitigation on affected platforms.
+                If the system has TSX enabled (see next parameter) and the CPU
+                is affected, the system is vulnerable.
+
+  full	        TAA mitigation is enabled. If TSX is enabled, on an affected
+                system it will clear CPU buffers on ring transitions. On
+                systems which are MDS-affected and deploy MDS mitigation,
+                TAA is also mitigated. Specifying this option on those
+                systems will have no effect.
+
+  full,nosmt    The same as tsx_async_abort=full, with SMT disabled on
+                vulnerable CPUs that have TSX enabled. This is the complete
+                mitigation. When TSX is disabled, SMT is not disabled because
+                CPU is not vulnerable to cross-thread TAA attacks.
+  ============  =============================================================
+
+Not specifying this option is equivalent to "tsx_async_abort=full". For
+processors that are affected by both TAA and MDS, specifying just
+"tsx_async_abort=off" without an accompanying "mds=off" will have no
+effect as the same mitigation is used for both vulnerabilities.
+
+The kernel command line also allows to control the TSX feature using the
+parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used
+to control the TSX feature and the enumeration of the TSX feature bits (RTM
+and HLE) in CPUID.
+
+The valid options are:
+
+  ============  =============================================================
+  off		Disables TSX on the system.
+
+                Note that this option takes effect only on newer CPUs which are
+                not vulnerable to MDS, i.e., have MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1
+                and which get the new IA32_TSX_CTRL MSR through a microcode
+                update. This new MSR allows for the reliable deactivation of
+                the TSX functionality.
+
+  on		Enables TSX.
+
+                Although there are mitigations for all known security
+                vulnerabilities, TSX has been known to be an accelerator for
+                several previous speculation-related CVEs, and so there may be
+                unknown security risks associated with leaving it enabled.
+
+  auto		Disables TSX if X86_BUG_TAA is present, otherwise enables TSX
+                on the system.
+  ============  =============================================================
+
+Not specifying this option is equivalent to "tsx=off".
+
+The following combinations of the "tsx_async_abort" and "tsx" are possible. For
+affected platforms tsx=auto is equivalent to tsx=off and the result will be:
+
+  =========  ==========================   =========================================
+  tsx=on     tsx_async_abort=full         The system will use VERW to clear CPU
+                                          buffers. Cross-thread attacks are still
+					  possible on SMT machines.
+  tsx=on     tsx_async_abort=full,nosmt   As above, cross-thread attacks on SMT
+                                          mitigated.
+  tsx=on     tsx_async_abort=off          The system is vulnerable.
+  tsx=off    tsx_async_abort=full         TSX might be disabled if microcode
+                                          provides a TSX control MSR. If so,
+					  system is not vulnerable.
+  tsx=off    tsx_async_abort=full,nosmt   Ditto
+  tsx=off    tsx_async_abort=off          ditto
+  =========  ==========================   =========================================
+
+
+For unaffected platforms "tsx=on" and "tsx_async_abort=full" does not clear CPU
+buffers.  For platforms without TSX control (MSR_IA32_ARCH_CAPABILITIES.MDS_NO=0)
+"tsx" command line argument has no effect.
+
+For the affected platforms below table indicates the mitigation status for the
+combinations of CPUID bit MD_CLEAR and IA32_ARCH_CAPABILITIES MSR bits MDS_NO
+and TSX_CTRL_MSR.
+
+  =======  =========  =============  ========================================
+  MDS_NO   MD_CLEAR   TSX_CTRL_MSR   Status
+  =======  =========  =============  ========================================
+    0          0            0        Vulnerable (needs microcode)
+    0          1            0        MDS and TAA mitigated via VERW
+    1          1            0        MDS fixed, TAA vulnerable if TSX enabled
+                                     because MD_CLEAR has no meaning and
+                                     VERW is not guaranteed to clear buffers
+    1          X            1        MDS fixed, TAA can be mitigated by
+                                     VERW or TSX_CTRL_MSR
+  =======  =========  =============  ========================================
+
+Mitigation selection guide
+--------------------------
+
+1. Trusted userspace and guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If all user space applications are from a trusted source and do not execute
+untrusted code which is supplied externally, then the mitigation can be
+disabled. The same applies to virtualized environments with trusted guests.
+
+
+2. Untrusted userspace and guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If there are untrusted applications or guests on the system, enabling TSX
+might allow a malicious actor to leak data from the host or from other
+processes running on the same physical core.
+
+If the microcode is available and the TSX is disabled on the host, attacks
+are prevented in a virtualized environment as well, even if the VMs do not
+explicitly enable the mitigation.
+
+
+.. _taa_default_mitigations:
+
+Default mitigations
+-------------------
+
+The kernel's default action for vulnerable processors is:
+
+  - Deploy TSX disable mitigation (tsx_async_abort=full tsx=off).
--- a/Documentation/hwmon/ina2xx
+++ b/Documentation/hwmon/ina2xx
@ -32,7 +32,7 @@ Supported chips:
    Datasheet: Publicly available at the Texas Instruments website
               http://www.ti.com/

-Author: Lothar Felten <l-felten@ti.com>
+Author: Lothar Felten <lothar.felten@gmail.com>

 Description
 -----------
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@ -19,6 +19,24 @@ Contents:
   gpu/index
   80211/index

+This section describes CPU vulnerabilities and their mitigations.
+
+.. toctree::
+   :maxdepth: 1
+
+   hw-vuln/index
+
+Architecture-specific documentation
+-----------------------------------
+
+These books provide programming details about architecture-specific
+implementation.
+
+.. toctree::
+   :maxdepth: 2
+
+   x86/index
+
 Indices and tables
 ==================

--- a/Documentation/input/event-codes.txt
+++ b/Documentation/input/event-codes.txt
@ -301,7 +301,10 @@ them as any other INPUT_PROP_BUTTONPAD device.
 INPUT_PROP_ACCELEROMETER
 -------------------------
 Directional axes on this device (absolute and/or relative x, y, z) represent
-accelerometer data. All other axes retain their meaning. A device must not mix
+accelerometer data. Some devices also report gyroscope data, which devices
+can report through the rotational axes (absolute and/or relative rx, ry, rz).
+
+All other axes retain their meaning. A device must not mix
 regular directional axes and accelerometer axes on the same event node.

 Guidelines:
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@ -314,7 +314,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			This facility can be used to prevent such uncontrolled
 			GPE floodings.
 			Format: <int>
-			Support masking of GPEs numbered from 0x00 to 0x7f.

 	acpi_no_auto_serialize	[HW,ACPI]
 			Disable auto-serialization of AML methods
@ -1090,12 +1089,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	nopku		[X86] Disable Memory Protection Keys CPU feature found
 			in some Intel CPUs.

-	eagerfpu=	[X86]
-			on	enable eager fpu restore
-			off	disable eager fpu restore
-			auto	selects the default scheme, which automatically
-				enables eagerfpu restore for xsaveopt.
-
 	module.async_probe [KNL]
 			Enable asynchronous probe on this module.

@ -1984,6 +1977,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			kmemcheck=2 (one-shot mode)
 			Default: 2 (one-shot mode)

+	kpti=		[ARM64] Control page table isolation of user
+			and kernel address spaces.
+			Default: enabled on cores which need mitigation.
+			0: force disabled
+			1: force enabled
+
 	kstack=N	[X86] Print N words from the kernel stack
 			in oops dumps.

@ -1994,6 +1993,25 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			KVM MMU at runtime.
 			Default is 0 (off)

+	kvm.nx_huge_pages=
+			[KVM] Controls the software workaround for the
+			X86_BUG_ITLB_MULTIHIT bug.
+			force	: Always deploy workaround.
+			off	: Never deploy workaround.
+			auto    : Deploy workaround based on the presence of
+				  X86_BUG_ITLB_MULTIHIT.
+
+			Default is 'auto'.
+
+			If the software workaround is enabled for the host,
+			guests do need not to enable it for nested guests.
+
+	kvm.nx_huge_pages_recovery_ratio=
+			[KVM] Controls how many 4KiB pages are periodically zapped
+			back to huge pages.  0 disables the recovery, otherwise if
+			the value is N KVM will zap 1/Nth of the 4KiB pages every
+			minute.  The default is 60.
+
 	kvm-amd.nested=	[KVM,AMD] Allow nested virtualization in KVM/SVM.
 			Default is 1 (enabled)

@ -2022,10 +2040,87 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			(virtualized real and unpaged mode) on capable
 			Intel chips. Default is 1 (enabled)

+	kvm-intel.vmentry_l1d_flush=[KVM,Intel] Mitigation for L1 Terminal Fault
+			CVE-2018-3620.
+
+			Valid arguments: never, cond, always
+
+			always: L1D cache flush on every VMENTER.
+			cond:	Flush L1D on VMENTER only when the code between
+				VMEXIT and VMENTER can leak host memory.
+			never:	Disables the mitigation
+
+			Default is cond (do L1 cache flush in specific instances)
+
 	kvm-intel.vpid=	[KVM,Intel] Disable Virtual Processor Identification
 			feature (tagged TLBs) on capable Intel chips.
 			Default is 1 (enabled)

+	l1tf=           [X86] Control mitigation of the L1TF vulnerability on
+			      affected CPUs
+
+			The kernel PTE inversion protection is unconditionally
+			enabled and cannot be disabled.
+
+			full
+				Provides all available mitigations for the
+				L1TF vulnerability. Disables SMT and
+				enables all mitigations in the
+				hypervisors, i.e. unconditional L1D flush.
+
+				SMT control and L1D flush control via the
+				sysfs interface is still possible after
+				boot.  Hypervisors will issue a warning
+				when the first VM is started in a
+				potentially insecure configuration,
+				i.e. SMT enabled or L1D flush disabled.
+
+			full,force
+				Same as 'full', but disables SMT and L1D
+				flush runtime control. Implies the
+				'nosmt=force' command line option.
+				(i.e. sysfs control of SMT is disabled.)
+
+			flush
+				Leaves SMT enabled and enables the default
+				hypervisor mitigation, i.e. conditional
+				L1D flush.
+
+				SMT control and L1D flush control via the
+				sysfs interface is still possible after
+				boot.  Hypervisors will issue a warning
+				when the first VM is started in a
+				potentially insecure configuration,
+				i.e. SMT enabled or L1D flush disabled.
+
+			flush,nosmt
+
+				Disables SMT and enables the default
+				hypervisor mitigation.
+
+				SMT control and L1D flush control via the
+				sysfs interface is still possible after
+				boot.  Hypervisors will issue a warning
+				when the first VM is started in a
+				potentially insecure configuration,
+				i.e. SMT enabled or L1D flush disabled.
+
+			flush,nowarn
+				Same as 'flush', but hypervisors will not
+				warn when a VM is started in a potentially
+				insecure configuration.
+
+			off
+				Disables hypervisor mitigations and doesn't
+				emit any warnings.
+				It also drops the swap size and available
+				RAM limit restriction on both hypervisor and
+				bare metal.
+
+			Default is 'flush'.
+
+			For details see: Documentation/hw-vuln/l1tf.rst
+
 	l2cr=		[PPC]

 	l3cr=		[PPC]
@ -2267,6 +2362,38 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Format: <first>,<last>
 			Specifies range of consoles to be captured by the MDA.

+	mds=		[X86,INTEL]
+			Control mitigation for the Micro-architectural Data
+			Sampling (MDS) vulnerability.
+
+			Certain CPUs are vulnerable to an exploit against CPU
+			internal buffers which can forward information to a
+			disclosure gadget under certain conditions.
+
+			In vulnerable processors, the speculatively
+			forwarded data can be used in a cache side channel
+			attack, to access data to which the attacker does
+			not have direct access.
+
+			This parameter controls the MDS mitigation. The
+			options are:
+
+			full       - Enable MDS mitigation on vulnerable CPUs
+			full,nosmt - Enable MDS mitigation and disable
+				     SMT on vulnerable CPUs
+			off        - Unconditionally disable MDS mitigation
+
+			On TAA-affected machines, mds=off can be prevented by
+			an active TAA mitigation as both vulnerabilities are
+			mitigated with the same mechanism so in order to disable
+			this mitigation, you need to specify tsx_async_abort=off
+			too.
+
+			Not specifying this option is equivalent to
+			mds=full.
+
+			For details see: Documentation/hw-vuln/mds.rst
+
 	mem=nn[KMG]	[KNL,BOOT] Force usage of a specific amount of memory
 			Amount of memory to be used when the kernel is not able
 			to see the whole system memory or for test.
@ -2389,6 +2516,47 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			in the "bleeding edge" mini2440 support kernel at
 			http://repo.or.cz/w/linux-2.6/mini2440.git

+	mitigations=
+			[X86] Control optional mitigations for CPU
+			vulnerabilities.  This is a set of curated,
+			arch-independent options, each of which is an
+			aggregation of existing arch-specific options.
+
+			off
+				Disable all optional CPU mitigations.  This
+				improves system performance, but it may also
+				expose users to several CPU vulnerabilities.
+				Equivalent to: nopti [X86]
+					       nospectre_v1 [X86]
+					       nospectre_v2 [X86]
+					       spectre_v2_user=off [X86]
+					       spec_store_bypass_disable=off [X86]
+					       l1tf=off [X86]
+					       mds=off [X86]
+					       tsx_async_abort=off [X86]
+					       kvm.nx_huge_pages=off [X86]
+
+				Exceptions:
+					       This does not have any effect on
+					       kvm.nx_huge_pages when
+					       kvm.nx_huge_pages=force.
+
+			auto (default)
+				Mitigate all CPU vulnerabilities, but leave SMT
+				enabled, even if it's vulnerable.  This is for
+				users who don't want to be surprised by SMT
+				getting disabled across kernel upgrades, or who
+				have other ways of avoiding SMT-based attacks.
+				Equivalent to: (default behavior)
+
+			auto,nosmt
+				Mitigate all CPU vulnerabilities, disabling SMT
+				if needed.  This is for users who always want to
+				be fully mitigated, even if it means losing SMT.
+				Equivalent to: l1tf=flush,nosmt [X86]
+					       mds=full,nosmt [X86]
+					       tsx_async_abort=full,nosmt [X86]
+
 	mminit_loglevel=
 			[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
 			parameter allows control of the logging verbosity for
@ -2706,7 +2874,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	nosmt		[KNL,S390] Disable symmetric multithreading (SMT).
 			Equivalent to smt=1.

-	nospectre_v2	[X86] Disable all mitigations for the Spectre variant 2
+			[KNL,x86] Disable symmetric multithreading (SMT).
+			nosmt=force: Force disable SMT, cannot be undone
+				     via the sysfs control file.
+
+	nospectre_v1	[X86,PPC] Disable mitigations for Spectre Variant 1
+			(bounds check bypass). With this option data leaks are
+			possible in the system.
+
+	nospectre_v2	[X86,PPC_FSL_BOOK3E] Disable all mitigations for the Spectre variant 2
 			(indirect branch prediction) vulnerability. System may
 			allow data leaks with this option, which is equivalent
 			to spectre_v2=off.
@ -3328,6 +3504,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			before loading.
 			See Documentation/blockdev/ramdisk.txt.

+	psi=		[KNL] Enable or disable pressure stall information
+			tracking.
+			Format: <bool>
+
 	psmouse.proto=	[HW,MOUSE] Highest PS2 mouse protocol extension to
 			probe for; one of (bare|imps|exps|lifebook|any).
 	psmouse.rate=	[HW,MOUSE] Set desired mouse report rate, in reports
@ -3704,6 +3884,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Run specified binary instead of /init from the ramdisk,
 			used for early userspace startup. See initrd.

+	rdrand=		[X86]
+			force - Override the decision by the kernel to hide the
+				advertisement of RDRAND support (this affects
+				certain AMD processors because of buggy BIOS
+				support, specifically around the suspend/resume
+				path).
+
 	reboot=		[KNL]
 			Format (x86 or x86_64):
 				[w[arm] | c[old] | h[ard] | s[oft] | g[pio]] \
@ -3908,6 +4095,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			last alloc / free. For more information see
 			Documentation/vm/slub.txt.

+	slub_memcg_sysfs=       [MM, SLUB]
+			Determines whether to enable sysfs directories for
+			memory cgroup sub-caches. 1 to enable, 0 to disable.
+			The default is determined by CONFIG_SLUB_MEMCG_SYSFS_ON.
+			Enabling this can lead to a very high number of debug
+			directories and files being created under
+			/sys/kernel/slub.
+
 	slub_max_order= [MM, SLUB]
 			Determines the maximum allowed order for slabs.
 			A high setting may cause OOMs due to memory
@ -3967,9 +4162,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted.

 	spectre_v2=	[X86] Control mitigation of Spectre variant 2
 			(indirect branch speculation) vulnerability.
+			The default operation protects the kernel from
+			user space attacks.

-			on   - unconditionally enable
-			off  - unconditionally disable
+			on   - unconditionally enable, implies
+			       spectre_v2_user=on
+			off  - unconditionally disable, implies
+			       spectre_v2_user=off
 			auto - kernel detects whether your CPU model is
 			       vulnerable

@ -3979,6 +4178,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			CONFIG_RETPOLINE configuration option, and the
 			compiler with which the kernel was built.

+			Selecting 'on' will also enable the mitigation
+			against user space to user space task attacks.
+
+			Selecting 'off' will disable both the kernel and
+			the user space protections.
+
 			Specific mitigations can also be selected manually:

 			retpoline	  - replace indirect branches
@ -3988,6 +4193,48 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Not specifying this option is equivalent to
 			spectre_v2=auto.

+	spectre_v2_user=
+			[X86] Control mitigation of Spectre variant 2
+		        (indirect branch speculation) vulnerability between
+		        user space tasks
+
+			on	- Unconditionally enable mitigations. Is
+				  enforced by spectre_v2=on
+
+			off     - Unconditionally disable mitigations. Is
+				  enforced by spectre_v2=off
+
+			prctl   - Indirect branch speculation is enabled,
+				  but mitigation can be enabled via prctl
+				  per thread.  The mitigation control state
+				  is inherited on fork.
+
+			prctl,ibpb
+				- Like "prctl" above, but only STIBP is
+				  controlled per thread. IBPB is issued
+				  always when switching between different user
+				  space processes.
+
+			seccomp
+				- Same as "prctl" above, but all seccomp
+				  threads will enable the mitigation unless
+				  they explicitly opt out.
+
+			seccomp,ibpb
+				- Like "seccomp" above, but only STIBP is
+				  controlled per thread. IBPB is issued
+				  always when switching between different
+				  user space processes.
+
+			auto    - Kernel selects the mitigation depending on
+				  the available CPU features and vulnerability.
+
+			Default mitigation:
+			If CONFIG_SECCOMP=y then "seccomp", otherwise "prctl"
+
+			Not specifying this option is equivalent to
+			spectre_v2_user=auto.
+
 	spec_store_bypass_disable=
 			[HW] Control Speculative Store Bypass (SSB) Disable mitigation
 			(Speculative Store Bypass vulnerability)
@ -4332,6 +4579,76 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			platforms where RDTSC is slow and this accounting
 			can add overhead.

+	tsx=		[X86] Control Transactional Synchronization
+			Extensions (TSX) feature in Intel processors that
+			support TSX control.
+
+			This parameter controls the TSX feature. The options are:
+
+			on	- Enable TSX on the system. Although there are
+				mitigations for all known security vulnerabilities,
+				TSX has been known to be an accelerator for
+				several previous speculation-related CVEs, and
+				so there may be unknown	security risks associated
+				with leaving it enabled.
+
+			off	- Disable TSX on the system. (Note that this
+				option takes effect only on newer CPUs which are
+				not vulnerable to MDS, i.e., have
+				MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 and which get
+				the new IA32_TSX_CTRL MSR through a microcode
+				update. This new MSR allows for the reliable
+				deactivation of the TSX functionality.)
+
+			auto	- Disable TSX if X86_BUG_TAA is present,
+				  otherwise enable TSX on the system.
+
+			Not specifying this option is equivalent to tsx=off.
+
+			See Documentation/hw-vuln/tsx_async_abort.rst
+			for more details.
+
+	tsx_async_abort= [X86,INTEL] Control mitigation for the TSX Async
+			Abort (TAA) vulnerability.
+
+			Similar to Micro-architectural Data Sampling (MDS)
+			certain CPUs that support Transactional
+			Synchronization Extensions (TSX) are vulnerable to an
+			exploit against CPU internal buffers which can forward
+			information to a disclosure gadget under certain
+			conditions.
+
+			In vulnerable processors, the speculatively forwarded
+			data can be used in a cache side channel attack, to
+			access data to which the attacker does not have direct
+			access.
+
+			This parameter controls the TAA mitigation.  The
+			options are:
+
+			full       - Enable TAA mitigation on vulnerable CPUs
+				     if TSX is enabled.
+
+			full,nosmt - Enable TAA mitigation and disable SMT on
+				     vulnerable CPUs. If TSX is disabled, SMT
+				     is not disabled because CPU is not
+				     vulnerable to cross-thread TAA attacks.
+			off        - Unconditionally disable TAA mitigation
+
+			On MDS-affected machines, tsx_async_abort=off can be
+			prevented by an active MDS mitigation as both vulnerabilities
+			are mitigated with the same mechanism so in order to disable
+			this mitigation, you need to specify mds=off too.
+
+			Not specifying this option is equivalent to
+			tsx_async_abort=full.  On CPUs which are MDS affected
+			and deploy MDS mitigation, TAA mitigation is not
+			required and doesn't provide any additional
+			mitigation.
+
+			For details see:
+			Documentation/hw-vuln/tsx_async_abort.rst
+
 	turbografx.map[2|3]=	[HW,JOY]
 			TurboGraFX parallel port interface
 			Format:
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@ -122,14 +122,11 @@ min_adv_mss - INTEGER

 IP Fragmentation:

-ipfrag_high_thresh - INTEGER
-	Maximum memory used to reassemble IP fragments. When
-	ipfrag_high_thresh bytes of memory is allocated for this purpose,
-	the fragment handler will toss packets until ipfrag_low_thresh
-	is reached. This also serves as a maximum limit to namespaces
-	different from the initial one.
+ipfrag_high_thresh - LONG INTEGER
+	Maximum memory used to reassemble IP fragments.

-ipfrag_low_thresh - INTEGER
+ipfrag_low_thresh - LONG INTEGER
+	(Obsolete since linux-4.17)
 	Maximum memory used to reassemble IP fragments before the kernel
 	begins to remove incomplete fragment queues to free up resources.
 	The kernel still accepts new fragments for defragmentation.
@ -233,6 +230,14 @@ tcp_base_mss - INTEGER
 	Path MTU discovery (MTU probing).  If MTU probing is enabled,
 	this is the initial MSS used by the connection.

+tcp_min_snd_mss - INTEGER
+	TCP SYN and SYNACK messages usually advertise an ADVMSS option,
+	as described in RFC 1122 and RFC 6691.
+	If this ADVMSS option is smaller than tcp_min_snd_mss,
+	it is silently capped to tcp_min_snd_mss.
+
+	Default : 48 (at least 8 bytes of payload per segment)
+
 tcp_congestion_control - STRING
 	Set the congestion control algorithm to be used for new
 	connections. The algorithm "reno" is always available, but
@ -408,6 +413,7 @@ tcp_min_rtt_wlen - INTEGER
 	minimum RTT when it is moved to a longer path (e.g., due to traffic
 	engineering). A longer window makes the filter more resistant to RTT
 	inflations such as transient congestion. The unit is seconds.
+	Possible values: 0 - 86400 (1 day)
 	Default: 300

 tcp_moderate_rcvbuf - BOOLEAN
--- a/Documentation/printk-formats.txt
+++ b/Documentation/printk-formats.txt
@ -31,6 +31,15 @@ return from vsnprintf.
 Raw pointer value SHOULD be printed with %p. The kernel supports
 the following extended format specifiers for pointer types:

+Pointer Types:
+
+Pointers printed without a specifier extension (i.e unadorned %p) are
+hashed to give a unique identifier without leaking kernel addresses to user
+space. On 64 bit machines the first 32 bits are zeroed. If you _really_
+want the address see %px below.
+
+	%p	abcdef12 or 00000000abcdef12
+
 Symbols/Function Pointers:

 	%pF	versatile_init+0x0/0x110
@ -58,12 +67,24 @@ Symbols/Function Pointers:

 Kernel Pointers:

-	%pK	0x01234567 or 0x0123456789abcdef
+	%pK	01234567 or 0123456789abcdef

 	For printing kernel pointers which should be hidden from unprivileged
 	users. The behaviour of %pK depends on the kptr_restrict sysctl - see
 	Documentation/sysctl/kernel.txt for more details.

+Unmodified Addresses:
+
+	%px	01234567 or 0123456789abcdef
+
+	For printing pointers when you _really_ want to print the address. Please
+	consider whether or not you are leaking sensitive information about the
+	Kernel layout in memory before printing pointers with %px. %px is
+	functionally equivalent to %lx. %px is preferred to %lx because it is more
+	uniquely grep'able. If, in the future, we need to modify the way the Kernel
+	handles printing pointers it will be nice to be able to find the call
+	sites.
+
 Struct Resources:

 	%pr	[mem 0x60000000-0x6fffffff flags 0x2200] or
--- a/Documentation/siphash.txt
+++ b/Documentation/siphash.txt
@ -0,0 +1,175 @@
+         SipHash - a short input PRF
+-----------------------------------------------
+Written by Jason A. Donenfeld <jason@zx2c4.com>
+
+SipHash is a cryptographically secure PRF -- a keyed hash function -- that
+performs very well for short inputs, hence the name. It was designed by
+cryptographers Daniel J. Bernstein and Jean-Philippe Aumasson. It is intended
+as a replacement for some uses of: `jhash`, `md5_transform`, `sha_transform`,
+and so forth.
+
+SipHash takes a secret key filled with randomly generated numbers and either
+an input buffer or several input integers. It spits out an integer that is
+indistinguishable from random. You may then use that integer as part of secure
+sequence numbers, secure cookies, or mask it off for use in a hash table.
+
+1. Generating a key
+
+Keys should always be generated from a cryptographically secure source of
+random numbers, either using get_random_bytes or get_random_once:
+
+siphash_key_t key;
+get_random_bytes(&key, sizeof(key));
+
+If you're not deriving your key from here, you're doing it wrong.
+
+2. Using the functions
+
+There are two variants of the function, one that takes a list of integers, and
+one that takes a buffer:
+
+u64 siphash(const void *data, size_t len, const siphash_key_t *key);
+
+And:
+
+u64 siphash_1u64(u64, const siphash_key_t *key);
+u64 siphash_2u64(u64, u64, const siphash_key_t *key);
+u64 siphash_3u64(u64, u64, u64, const siphash_key_t *key);
+u64 siphash_4u64(u64, u64, u64, u64, const siphash_key_t *key);
+u64 siphash_1u32(u32, const siphash_key_t *key);
+u64 siphash_2u32(u32, u32, const siphash_key_t *key);
+u64 siphash_3u32(u32, u32, u32, const siphash_key_t *key);
+u64 siphash_4u32(u32, u32, u32, u32, const siphash_key_t *key);
+
+If you pass the generic siphash function something of a constant length, it
+will constant fold at compile-time and automatically choose one of the
+optimized functions.
+
+3. Hashtable key function usage:
+
+struct some_hashtable {
+	DECLARE_HASHTABLE(hashtable, 8);
+	siphash_key_t key;
+};
+
+void init_hashtable(struct some_hashtable *table)
+{
+	get_random_bytes(&table->key, sizeof(table->key));
+}
+
+static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input)
+{
+	return &table->hashtable[siphash(input, sizeof(*input), &table->key) & (HASH_SIZE(table->hashtable) - 1)];
+}
+
+You may then iterate like usual over the returned hash bucket.
+
+4. Security
+
+SipHash has a very high security margin, with its 128-bit key. So long as the
+key is kept secret, it is impossible for an attacker to guess the outputs of
+the function, even if being able to observe many outputs, since 2^128 outputs
+is significant.
+
+Linux implements the "2-4" variant of SipHash.
+
+5. Struct-passing Pitfalls
+
+Often times the XuY functions will not be large enough, and instead you'll
+want to pass a pre-filled struct to siphash. When doing this, it's important
+to always ensure the struct has no padding holes. The easiest way to do this
+is to simply arrange the members of the struct in descending order of size,
+and to use offsetendof() instead of sizeof() for getting the size. For
+performance reasons, if possible, it's probably a good thing to align the
+struct to the right boundary. Here's an example:
+
+const struct {
+	struct in6_addr saddr;
+	u32 counter;
+	u16 dport;
+} __aligned(SIPHASH_ALIGNMENT) combined = {
+	.saddr = *(struct in6_addr *)saddr,
+	.counter = counter,
+	.dport = dport
+};
+u64 h = siphash(&combined, offsetofend(typeof(combined), dport), &secret);
+
+6. Resources
+
+Read the SipHash paper if you're interested in learning more:
+https://131002.net/siphash/siphash.pdf
+
+
+~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~
+
+HalfSipHash - SipHash's insecure younger cousin
+-----------------------------------------------
+Written by Jason A. Donenfeld <jason@zx2c4.com>
+
+On the off-chance that SipHash is not fast enough for your needs, you might be
+able to justify using HalfSipHash, a terrifying but potentially useful
+possibility. HalfSipHash cuts SipHash's rounds down from "2-4" to "1-3" and,
+even scarier, uses an easily brute-forcable 64-bit key (with a 32-bit output)
+instead of SipHash's 128-bit key. However, this may appeal to some
+high-performance `jhash` users.
+
+Danger!
+
+Do not ever use HalfSipHash except for as a hashtable key function, and only
+then when you can be absolutely certain that the outputs will never be
+transmitted out of the kernel. This is only remotely useful over `jhash` as a
+means of mitigating hashtable flooding denial of service attacks.
+
+1. Generating a key
+
+Keys should always be generated from a cryptographically secure source of
+random numbers, either using get_random_bytes or get_random_once:
+
+hsiphash_key_t key;
+get_random_bytes(&key, sizeof(key));
+
+If you're not deriving your key from here, you're doing it wrong.
+
+2. Using the functions
+
+There are two variants of the function, one that takes a list of integers, and
+one that takes a buffer:
+
+u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key);
+
+And:
+
+u32 hsiphash_1u32(u32, const hsiphash_key_t *key);
+u32 hsiphash_2u32(u32, u32, const hsiphash_key_t *key);
+u32 hsiphash_3u32(u32, u32, u32, const hsiphash_key_t *key);
+u32 hsiphash_4u32(u32, u32, u32, u32, const hsiphash_key_t *key);
+
+If you pass the generic hsiphash function something of a constant length, it
+will constant fold at compile-time and automatically choose one of the
+optimized functions.
+
+3. Hashtable key function usage:
+
+struct some_hashtable {
+	DECLARE_HASHTABLE(hashtable, 8);
+	hsiphash_key_t key;
+};
+
+void init_hashtable(struct some_hashtable *table)
+{
+	get_random_bytes(&table->key, sizeof(table->key));
+}
+
+static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input)
+{
+	return &table->hashtable[hsiphash(input, sizeof(*input), &table->key) & (HASH_SIZE(table->hashtable) - 1)];
+}
+
+You may then iterate like usual over the returned hash bucket.
+
+4. Performance
+
+HalfSipHash is roughly 3 times slower than JenkinsHash. For many replacements,
+this will not be a problem, as the hashtable lookup isn't the bottleneck. And
+in general, this is probably a good sacrifice to make for the security and DoS
+resistance of HalfSipHash.
--- a/Documentation/spec_ctrl.txt
+++ b/Documentation/spec_ctrl.txt
@ -92,3 +92,12 @@ Speculation misfeature controls
   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);
+
+- PR_SPEC_INDIR_BRANCH: Indirect Branch Speculation in User Processes
+                        (Mitigate Spectre V2 style attacks against user processes)
+
+  Invocations:
+   * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0);
+   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0);
+   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0);
+   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0);
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@ -34,7 +34,9 @@ Currently, these files are in /proc/sys/fs:
 - overflowgid
 - pipe-user-pages-hard
 - pipe-user-pages-soft
+- protected_fifos
 - protected_hardlinks
+- protected_regular
 - protected_symlinks
 - suid_dumpable
 - super-max
@ -182,6 +184,24 @@ applied.

 ==============================================================

+protected_fifos:
+
+The intent of this protection is to avoid unintentional writes to
+an attacker-controlled FIFO, where a program expected to create a regular
+file.
+
+When set to "0", writing to FIFOs is unrestricted.
+
+When set to "1" don't allow O_CREAT open on FIFOs that we don't own
+in world writable sticky directories, unless they are owned by the
+owner of the directory.
+
+When set to "2" it also applies to group writable sticky directories.
+
+This protection is based on the restrictions in Openwall.
+
+==============================================================
+
 protected_hardlinks:

 A long-standing class of security issues is the hardlink-based
@ -202,6 +222,22 @@ This protection is based on the restrictions in Openwall and grsecurity.

 ==============================================================

+protected_regular:
+
+This protection is similar to protected_fifos, but it
+avoids writes to an attacker-controlled regular file, where a program
+expected to create one.
+
+When set to "0", writing to regular files is unrestricted.
+
+When set to "1" don't allow O_CREAT open on regular files that we
+don't own in world writable sticky directories, unless they are
+owned by the owner of the directory.
+
+When set to "2" it also applies to group writable sticky directories.
+
+==============================================================
+
 protected_symlinks:

 A long-standing class of security issues is the symlink-based
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@ -54,6 +54,14 @@ Values :
 	1 - enable JIT hardening for unprivileged users only
 	2 - enable JIT hardening for all users

+bpf_jit_limit
+-------------
+
+This enforces a global limit for memory allocations to the BPF JIT
+compiler in order to reject unprivileged JIT requests once it has
+been surpassed. bpf_jit_limit contains the value of the global limit
+in bytes.
+
 dev_weight
 --------------

--- a/Documentation/usb/power-management.txt
+++ b/Documentation/usb/power-management.txt
@ -365,11 +365,15 @@ autosuspend the interface's device.  When the usage counter is = 0
 then the interface is considered to be idle, and the kernel may
 autosuspend the device.

-Drivers need not be concerned about balancing changes to the usage
-counter; the USB core will undo any remaining "get"s when a driver
-is unbound from its interface.  As a corollary, drivers must not call
-any of the usb_autopm_* functions after their disconnect() routine has
-returned.
+Drivers must be careful to balance their overall changes to the usage
+counter.  Unbalanced "get"s will remain in effect when a driver is
+unbound from its interface, preventing the device from going into
+runtime suspend should the interface be bound to a driver again.  On
+the other hand, drivers are allowed to achieve this balance by calling
+the ``usb_autopm_*`` functions even after their ``disconnect`` routine
+has returned -- say from within a work-queue routine -- provided they
+retain an active reference to the interface (via ``usb_get_intf`` and
+``usb_put_intf``).

 Drivers using the async routines are responsible for their own
 synchronization and mutual exclusion.
--- a/Documentation/usb/rio.txt
+++ b/Documentation/usb/rio.txt
@ -1,138 +0,0 @@
-Copyright (C) 1999, 2000 Bruce Tenison
-Portions Copyright (C) 1999, 2000 David Nelson
-Thanks to David Nelson for guidance and the usage of the scanner.txt
-and scanner.c files to model our driver and this informative file.
-
-Mar. 2, 2000
-
-CHANGES
-
- Initial Revision
-
-
-OVERVIEW
-
-This README will address issues regarding how to configure the kernel
-to access a RIO 500 mp3 player.  
-Before I explain how to use this to access the Rio500 please be warned:
-
-W A R N I N G:
--------------
-
-Please note that this software is still under development.  The authors
-are in no way responsible for any damage that may occur, no matter how
-inconsequential.
-
-It seems that the Rio has a problem when sending .mp3 with low batteries.
-I suggest when the batteries are low and you want to transfer stuff that you
-replace it with a fresh one. In my case, what happened is I lost two 16kb
-blocks (they are no longer usable to store information to it). But I don't
-know if that's normal or not; it could simply be a problem with the flash 
-memory.
-
-In an extreme case, I left my Rio playing overnight and the batteries wore 
-down to nothing and appear to have corrupted the flash memory. My RIO 
-needed to be replaced as a result.  Diamond tech support is aware of the 
-problem.  Do NOT allow your batteries to wear down to nothing before 
-changing them.  It appears RIO 500 firmware does not handle low battery 
-power well at all. 
-
-On systems with OHCI controllers, the kernel OHCI code appears to have 
-power on problems with some chipsets.  If you are having problems 
-connecting to your RIO 500, try turning it on first and then plugging it 
-into the USB cable.  
-
-Contact information:
--------------------
-
-   The main page for the project is hosted at sourceforge.net in the following
-   URL: <http://rio500.sourceforge.net>. You can also go to the project's
-   sourceforge home page at: <http://sourceforge.net/projects/rio500/>.
-   There is also a mailing list: rio500-users@lists.sourceforge.net
-
-Authors:
-------
-
-Most of the code was written by Cesar Miquel <miquel@df.uba.ar>. Keith 
-Clayton <kclayton@jps.net> is incharge of the PPC port and making sure
-things work there. Bruce Tenison <btenison@dibbs.net> is adding support
-for .fon files and also does testing. The program will mostly sure be
-re-written and Pete Ikusz along with the rest will re-design it. I would
-also like to thank Tri Nguyen <tmn_3022000@hotmail.com> who provided use 
-with some important information regarding the communication with the Rio.
-
-ADDITIONAL INFORMATION and Userspace tools
-
-http://rio500.sourceforge.net/
-
-
-REQUIREMENTS
-
-A host with a USB port.  Ideally, either a UHCI (Intel) or OHCI
-(Compaq and others) hardware port should work.
-
-A Linux development kernel (2.3.x) with USB support enabled or a
-backported version to linux-2.2.x.  See http://www.linux-usb.org for
-more information on accomplishing this.
-
-A Linux kernel with RIO 500 support enabled.
-
-'lspci' which is only needed to determine the type of USB hardware
-available in your machine.
-
-CONFIGURATION
-
-Using `lspci -v`, determine the type of USB hardware available.
-
-  If you see something like:
-
-    USB Controller: ......
-    Flags: .....
-    I/O ports at ....
-
-  Then you have a UHCI based controller.
-
-  If you see something like:
-
-     USB Controller: .....
-     Flags: ....
-     Memory at .....
-
-  Then you have a OHCI based controller.
-
-Using `make menuconfig` or your preferred method for configuring the
-kernel, select 'Support for USB', 'OHCI/UHCI' depending on your
-hardware (determined from the steps above), 'USB Diamond Rio500 support', and
-'Preliminary USB device filesystem'.  Compile and install the modules
-(you may need to execute `depmod -a` to update the module
-dependencies).
-
-Add a device for the USB rio500:
-  `mknod /dev/usb/rio500 c 180 64`
-
-Set appropriate permissions for /dev/usb/rio500 (don't forget about
-group and world permissions).  Both read and write permissions are
-required for proper operation.
-
-Load the appropriate modules (if compiled as modules):
-
-  OHCI:
-    modprobe usbcore
-    modprobe usb-ohci
-    modprobe rio500
-
-  UHCI:
-    modprobe usbcore
-    modprobe usb-uhci  (or uhci)
-    modprobe rio500
-
-That's it.  The Rio500 Utils at: http://rio500.sourceforge.net should
-be able to access the rio500.
-
-BUGS
-
-If you encounter any problems feel free to drop me an email.
-
-Bruce Tenison
-btenison@dibbs.net
-
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@ -13,7 +13,7 @@ of a virtual machine.  The ioctls belong to three classes

 - VM ioctls: These query and set attributes that affect an entire virtual
   machine, for example memory layout.  In addition a VM ioctl is used to
-   create virtual cpus (vcpus).
+   create virtual cpus (vcpus) and devices.

   Only run VM ioctls from the same process (address space) that was used
   to create the VM.
@ -24,6 +24,11 @@ of a virtual machine.  The ioctls belong to three classes
   Only run vcpu ioctls from the same thread that was used to create the
   vcpu.

+ - device ioctls: These query and set attributes that control the operation
+   of a single device.
+
+   device ioctls must be issued from the same process (address space) that
+   was used to create the VM.

 2. File descriptors
 -------------------
@ -32,10 +37,11 @@ The kvm API is centered around file descriptors.  An initial
 open("/dev/kvm") obtains a handle to the kvm subsystem; this handle
 can be used to issue system ioctls.  A KVM_CREATE_VM ioctl on this
 handle will create a VM file descriptor which can be used to issue VM
-ioctls.  A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu
-and return a file descriptor pointing to it.  Finally, ioctls on a vcpu
-fd can be used to control the vcpu, including the important task of
-actually running guest code.
+ioctls.  A KVM_CREATE_VCPU or KVM_CREATE_DEVICE ioctl on a VM fd will
+create a virtual cpu or device and return a file descriptor pointing to
+the new resource.  Finally, ioctls on a vcpu or device fd can be used
+to control the vcpu or device.  For vcpus, this includes the important
+task of actually running guest code.

 In general file descriptors can be migrated among processes by means
 of fork() and the SCM_RIGHTS facility of unix domain socket.  These
@ -122,14 +128,15 @@ KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as
 privileged user (CAP_SYS_ADMIN).


-4.3 KVM_GET_MSR_INDEX_LIST
+4.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST

-Capability: basic
+Capability: basic, KVM_CAP_GET_MSR_FEATURES for KVM_GET_MSR_FEATURE_INDEX_LIST
 Architectures: x86
-Type: system
+Type: system ioctl
 Parameters: struct kvm_msr_list (in/out)
 Returns: 0 on success; -1 on error
 Errors:
+  EFAULT:    the msr index list cannot be read from or written to
  E2BIG:     the msr index list is to be to fit in the array specified by
             the user.

@ -138,16 +145,23 @@ struct kvm_msr_list {
 	__u32 indices[0];
 };

-This ioctl returns the guest msrs that are supported.  The list varies
-by kvm version and host processor, but does not change otherwise.  The
-user fills in the size of the indices array in nmsrs, and in return
-kvm adjusts nmsrs to reflect the actual number of msrs and fills in
-the indices array with their numbers.
+The user fills in the size of the indices array in nmsrs, and in return
+kvm adjusts nmsrs to reflect the actual number of msrs and fills in the
+indices array with their numbers.
+
+KVM_GET_MSR_INDEX_LIST returns the guest msrs that are supported.  The list
+varies by kvm version and host processor, but does not change otherwise.

 Note: if kvm indicates supports MCE (KVM_CAP_MCE), then the MCE bank MSRs are
 not returned in the MSR list, as different vcpus can have a different number
 of banks, as set via the KVM_X86_SETUP_MCE ioctl.

+KVM_GET_MSR_FEATURE_INDEX_LIST returns the list of MSRs that can be passed
+to the KVM_GET_MSRS system ioctl.  This lets userspace probe host capabilities
+and processor features that are exposed via MSRs (e.g., VMX capabilities).
+This list also varies by kvm version and host processor, but does not change
+otherwise.
+

 4.4 KVM_CHECK_EXTENSION

@ -474,14 +488,22 @@ Support for this has been removed.  Use KVM_SET_GUEST_DEBUG instead.

 4.18 KVM_GET_MSRS

-Capability: basic
+Capability: basic (vcpu), KVM_CAP_GET_MSR_FEATURES (system)
 Architectures: x86
-Type: vcpu ioctl
+Type: system ioctl, vcpu ioctl
 Parameters: struct kvm_msrs (in/out)
-Returns: 0 on success, -1 on error
+Returns: number of msrs successfully returned;
+        -1 on error

+When used as a system ioctl:
+Reads the values of MSR-based features that are available for the VM.  This
+is similar to KVM_GET_SUPPORTED_CPUID, but it returns MSR indices and values.
+The list of msr-based features can be obtained using KVM_GET_MSR_FEATURE_INDEX_LIST
+in a system ioctl.
+
+When used as a vcpu ioctl:
 Reads model-specific registers from the vcpu.  Supported msr indices can
-be obtained using KVM_GET_MSR_INDEX_LIST.
+be obtained using KVM_GET_MSR_INDEX_LIST in a system ioctl.

 struct kvm_msrs {
 	__u32 nmsrs; /* number of msrs in entries */
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@ -13,8 +13,8 @@ The acquisition orders for mutexes are as follows:
 - kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
  them together is quite rare.

-For spinlocks, kvm_lock is taken outside kvm->mmu_lock.  Everything
-else is a leaf: no other lock is taken inside the critical sections.
+Everything else is a leaf: no other lock is taken inside the critical
+sections.

 2: Exception
 ------------
@ -142,7 +142,7 @@ See the comments in spte_has_volatile_bits() and mmu_spte_update().
 ------------

 Name:		kvm_lock
-Type:		spinlock_t
+Type:		mutex
 Arch:		any
 Protects:	- vm_list

--- a/Documentation/x86/conf.py
+++ b/Documentation/x86/conf.py
@ -0,0 +1,10 @@
+# -*- coding: utf-8; mode: python -*-
+
+project = "X86 architecture specific documentation"
+
+tags.add("subproject")
+
+latex_documents = [
+    ('index', 'x86.tex', project,
+     'The kernel development community', 'manual'),
+]
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@ -0,0 +1,9 @@
+==========================
+x86 architecture specifics
+==========================
+
+.. toctree::
+   :maxdepth: 1
+
+   mds
+   tsx_async_abort
--- a/Documentation/x86/mds.rst
+++ b/Documentation/x86/mds.rst
@ -0,0 +1,193 @@
+Microarchitectural Data Sampling (MDS) mitigation
+=================================================
+
+.. _mds:
+
+Overview
+--------
+
+Microarchitectural Data Sampling (MDS) is a family of side channel attacks
+on internal buffers in Intel CPUs. The variants are:
+
+ - Microarchitectural Store Buffer Data Sampling (MSBDS) (CVE-2018-12126)
+ - Microarchitectural Fill Buffer Data Sampling (MFBDS) (CVE-2018-12130)
+ - Microarchitectural Load Port Data Sampling (MLPDS) (CVE-2018-12127)
+ - Microarchitectural Data Sampling Uncacheable Memory (MDSUM) (CVE-2019-11091)
+
+MSBDS leaks Store Buffer Entries which can be speculatively forwarded to a
+dependent load (store-to-load forwarding) as an optimization. The forward
+can also happen to a faulting or assisting load operation for a different
+memory address, which can be exploited under certain conditions. Store
+buffers are partitioned between Hyper-Threads so cross thread forwarding is
+not possible. But if a thread enters or exits a sleep state the store
+buffer is repartitioned which can expose data from one thread to the other.
+
+MFBDS leaks Fill Buffer Entries. Fill buffers are used internally to manage
+L1 miss situations and to hold data which is returned or sent in response
+to a memory or I/O operation. Fill buffers can forward data to a load
+operation and also write data to the cache. When the fill buffer is
+deallocated it can retain the stale data of the preceding operations which
+can then be forwarded to a faulting or assisting load operation, which can
+be exploited under certain conditions. Fill buffers are shared between
+Hyper-Threads so cross thread leakage is possible.
+
+MLPDS leaks Load Port Data. Load ports are used to perform load operations
+from memory or I/O. The received data is then forwarded to the register
+file or a subsequent operation. In some implementations the Load Port can
+contain stale data from a previous operation which can be forwarded to
+faulting or assisting loads under certain conditions, which again can be
+exploited eventually. Load ports are shared between Hyper-Threads so cross
+thread leakage is possible.
+
+MDSUM is a special case of MSBDS, MFBDS and MLPDS. An uncacheable load from
+memory that takes a fault or assist can leave data in a microarchitectural
+structure that may later be observed using one of the same methods used by
+MSBDS, MFBDS or MLPDS.
+
+Exposure assumptions
+--------------------
+
+It is assumed that attack code resides in user space or in a guest with one
+exception. The rationale behind this assumption is that the code construct
+needed for exploiting MDS requires:
+
+ - to control the load to trigger a fault or assist
+
+ - to have a disclosure gadget which exposes the speculatively accessed
+   data for consumption through a side channel.
+
+ - to control the pointer through which the disclosure gadget exposes the
+   data
+
+The existence of such a construct in the kernel cannot be excluded with
+100% certainty, but the complexity involved makes it extremly unlikely.
+
+There is one exception, which is untrusted BPF. The functionality of
+untrusted BPF is limited, but it needs to be thoroughly investigated
+whether it can be used to create such a construct.
+
+
+Mitigation strategy
+-------------------
+
+All variants have the same mitigation strategy at least for the single CPU
+thread case (SMT off): Force the CPU to clear the affected buffers.
+
+This is achieved by using the otherwise unused and obsolete VERW
+instruction in combination with a microcode update. The microcode clears
+the affected CPU buffers when the VERW instruction is executed.
+
+For virtualization there are two ways to achieve CPU buffer
+clearing. Either the modified VERW instruction or via the L1D Flush
+command. The latter is issued when L1TF mitigation is enabled so the extra
+VERW can be avoided. If the CPU is not affected by L1TF then VERW needs to
+be issued.
+
+If the VERW instruction with the supplied segment selector argument is
+executed on a CPU without the microcode update there is no side effect
+other than a small number of pointlessly wasted CPU cycles.
+
+This does not protect against cross Hyper-Thread attacks except for MSBDS
+which is only exploitable cross Hyper-thread when one of the Hyper-Threads
+enters a C-state.
+
+The kernel provides a function to invoke the buffer clearing:
+
+    mds_clear_cpu_buffers()
+
+The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state
+(idle) transitions.
+
+As a special quirk to address virtualization scenarios where the host has
+the microcode updated, but the hypervisor does not (yet) expose the
+MD_CLEAR CPUID bit to guests, the kernel issues the VERW instruction in the
+hope that it might actually clear the buffers. The state is reflected
+accordingly.
+
+According to current knowledge additional mitigations inside the kernel
+itself are not required because the necessary gadgets to expose the leaked
+data cannot be controlled in a way which allows exploitation from malicious
+user space or VM guests.
+
+Kernel internal mitigation modes
+--------------------------------
+
+ ======= ============================================================
+ off      Mitigation is disabled. Either the CPU is not affected or
+          mds=off is supplied on the kernel command line
+
+ full     Mitigation is enabled. CPU is affected and MD_CLEAR is
+          advertised in CPUID.
+
+ vmwerv	  Mitigation is enabled. CPU is affected and MD_CLEAR is not
+	  advertised in CPUID. That is mainly for virtualization
+	  scenarios where the host has the updated microcode but the
+	  hypervisor does not expose MD_CLEAR in CPUID. It's a best
+	  effort approach without guarantee.
+ ======= ============================================================
+
+If the CPU is affected and mds=off is not supplied on the kernel command
+line then the kernel selects the appropriate mitigation mode depending on
+the availability of the MD_CLEAR CPUID bit.
+
+Mitigation points
+-----------------
+
+1. Return to user space
+^^^^^^^^^^^^^^^^^^^^^^^
+
+   When transitioning from kernel to user space the CPU buffers are flushed
+   on affected CPUs when the mitigation is not disabled on the kernel
+   command line. The migitation is enabled through the static key
+   mds_user_clear.
+
+   The mitigation is invoked in prepare_exit_to_usermode() which covers
+   all but one of the kernel to user space transitions.  The exception
+   is when we return from a Non Maskable Interrupt (NMI), which is
+   handled directly in do_nmi().
+
+   (The reason that NMI is special is that prepare_exit_to_usermode() can
+    enable IRQs.  In NMI context, NMIs are blocked, and we don't want to
+    enable IRQs with NMIs blocked.)
+
+
+2. C-State transition
+^^^^^^^^^^^^^^^^^^^^^
+
+   When a CPU goes idle and enters a C-State the CPU buffers need to be
+   cleared on affected CPUs when SMT is active. This addresses the
+   repartitioning of the store buffer when one of the Hyper-Threads enters
+   a C-State.
+
+   When SMT is inactive, i.e. either the CPU does not support it or all
+   sibling threads are offline CPU buffer clearing is not required.
+
+   The idle clearing is enabled on CPUs which are only affected by MSBDS
+   and not by any other MDS variant. The other MDS variants cannot be
+   protected against cross Hyper-Thread attacks because the Fill Buffer and
+   the Load Ports are shared. So on CPUs affected by other variants, the
+   idle clearing would be a window dressing exercise and is therefore not
+   activated.
+
+   The invocation is controlled by the static key mds_idle_clear which is
+   switched depending on the chosen mitigation mode and the SMT state of
+   the system.
+
+   The buffer clear is only invoked before entering the C-State to prevent
+   that stale data from the idling CPU from spilling to the Hyper-Thread
+   sibling after the store buffer got repartitioned and all entries are
+   available to the non idle sibling.
+
+   When coming out of idle the store buffer is partitioned again so each
+   sibling has half of it available. The back from idle CPU could be then
+   speculatively exposed to contents of the sibling. The buffers are
+   flushed either on exit to user space or on VMENTER so malicious code
+   in user space or the guest cannot speculatively access them.
+
+   The mitigation is hooked into all variants of halt()/mwait(), but does
+   not cover the legacy ACPI IO-Port mechanism because the ACPI idle driver
+   has been superseded by the intel_idle driver around 2010 and is
+   preferred on all affected CPUs which are expected to gain the MD_CLEAR
+   functionality in microcode. Aside of that the IO-Port mechanism is a
+   legacy interface which is only used on older systems which are either
+   not affected or do not receive microcode updates anymore.
--- a/Documentation/x86/tsx_async_abort.rst
+++ b/Documentation/x86/tsx_async_abort.rst
@ -0,0 +1,117 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+TSX Async Abort (TAA) mitigation
+================================
+
+.. _tsx_async_abort:
+
+Overview
+--------
+
+TSX Async Abort (TAA) is a side channel attack on internal buffers in some
+Intel processors similar to Microachitectural Data Sampling (MDS).  In this
+case certain loads may speculatively pass invalid data to dependent operations
+when an asynchronous abort condition is pending in a Transactional
+Synchronization Extensions (TSX) transaction.  This includes loads with no
+fault or assist condition. Such loads may speculatively expose stale data from
+the same uarch data structures as in MDS, with same scope of exposure i.e.
+same-thread and cross-thread. This issue affects all current processors that
+support TSX.
+
+Mitigation strategy
+-------------------
+
+a) TSX disable - one of the mitigations is to disable TSX. A new MSR
+IA32_TSX_CTRL will be available in future and current processors after
+microcode update which can be used to disable TSX. In addition, it
+controls the enumeration of the TSX feature bits (RTM and HLE) in CPUID.
+
+b) Clear CPU buffers - similar to MDS, clearing the CPU buffers mitigates this
+vulnerability. More details on this approach can be found in
+:ref:`Documentation/hw-vuln/mds.rst <mds>`.
+
+Kernel internal mitigation modes
+--------------------------------
+
+ =============    ============================================================
+ off              Mitigation is disabled. Either the CPU is not affected or
+                  tsx_async_abort=off is supplied on the kernel command line.
+
+ tsx disabled     Mitigation is enabled. TSX feature is disabled by default at
+                  bootup on processors that support TSX control.
+
+ verw             Mitigation is enabled. CPU is affected and MD_CLEAR is
+                  advertised in CPUID.
+
+ ucode needed     Mitigation is enabled. CPU is affected and MD_CLEAR is not
+                  advertised in CPUID. That is mainly for virtualization
+                  scenarios where the host has the updated microcode but the
+                  hypervisor does not expose MD_CLEAR in CPUID. It's a best
+                  effort approach without guarantee.
+ =============    ============================================================
+
+If the CPU is affected and the "tsx_async_abort" kernel command line parameter is
+not provided then the kernel selects an appropriate mitigation depending on the
+status of RTM and MD_CLEAR CPUID bits.
+
+Below tables indicate the impact of tsx=on|off|auto cmdline options on state of
+TAA mitigation, VERW behavior and TSX feature for various combinations of
+MSR_IA32_ARCH_CAPABILITIES bits.
+
+1. "tsx=off"
+
+=========  =========  ============  ============  ==============  ===================  ======================
+MSR_IA32_ARCH_CAPABILITIES bits     Result with cmdline tsx=off
+----------------------------------  -------------------------------------------------------------------------
+TAA_NO     MDS_NO     TSX_CTRL_MSR  TSX state     VERW can clear  TAA mitigation       TAA mitigation
+                                    after bootup  CPU buffers     tsx_async_abort=off  tsx_async_abort=full
+=========  =========  ============  ============  ==============  ===================  ======================
+    0          0           0         HW default         Yes           Same as MDS           Same as MDS
+    0          0           1        Invalid case   Invalid case       Invalid case          Invalid case
+    0          1           0         HW default         No         Need ucode update     Need ucode update
+    0          1           1          Disabled          Yes           TSX disabled          TSX disabled
+    1          X           1          Disabled           X             None needed           None needed
+=========  =========  ============  ============  ==============  ===================  ======================
+
+2. "tsx=on"
+
+=========  =========  ============  ============  ==============  ===================  ======================
+MSR_IA32_ARCH_CAPABILITIES bits     Result with cmdline tsx=on
+----------------------------------  -------------------------------------------------------------------------
+TAA_NO     MDS_NO     TSX_CTRL_MSR  TSX state     VERW can clear  TAA mitigation       TAA mitigation
+                                    after bootup  CPU buffers     tsx_async_abort=off  tsx_async_abort=full
+=========  =========  ============  ============  ==============  ===================  ======================
+    0          0           0         HW default        Yes            Same as MDS          Same as MDS
+    0          0           1        Invalid case   Invalid case       Invalid case         Invalid case
+    0          1           0         HW default        No          Need ucode update     Need ucode update
+    0          1           1          Enabled          Yes               None              Same as MDS
+    1          X           1          Enabled          X              None needed          None needed
+=========  =========  ============  ============  ==============  ===================  ======================
+
+3. "tsx=auto"
+
+=========  =========  ============  ============  ==============  ===================  ======================
+MSR_IA32_ARCH_CAPABILITIES bits     Result with cmdline tsx=auto
+----------------------------------  -------------------------------------------------------------------------
+TAA_NO     MDS_NO     TSX_CTRL_MSR  TSX state     VERW can clear  TAA mitigation       TAA mitigation
+                                    after bootup  CPU buffers     tsx_async_abort=off  tsx_async_abort=full
+=========  =========  ============  ============  ==============  ===================  ======================
+    0          0           0         HW default    Yes                Same as MDS           Same as MDS
+    0          0           1        Invalid case  Invalid case        Invalid case          Invalid case
+    0          1           0         HW default    No              Need ucode update     Need ucode update
+    0          1           1          Disabled      Yes               TSX disabled          TSX disabled
+    1          X           1          Enabled       X                 None needed           None needed
+=========  =========  ============  ============  ==============  ===================  ======================
+
+In the tables, TSX_CTRL_MSR is a new bit in MSR_IA32_ARCH_CAPABILITIES that
+indicates whether MSR_IA32_TSX_CTRL is supported.
+
+There are two control bits in IA32_TSX_CTRL MSR:
+
+      Bit 0: When set it disables the Restricted Transactional Memory (RTM)
+             sub-feature of TSX (will force all transactions to abort on the
+             XBEGIN instruction).
+
+      Bit 1: When set it disables the enumeration of the RTM and HLE feature
+             (i.e. it will make CPUID(EAX=7).EBX{bit4} and
+             CPUID(EAX=7).EBX{bit11} read as 0).
--- a/15
+++ b/15
@ -11081,6 +11081,13 @@ F:	arch/arm/mach-s3c24xx/mach-bast.c
 F:	arch/arm/mach-s3c24xx/bast-ide.c
 F:	arch/arm/mach-s3c24xx/bast-irq.c

+SIPHASH PRF ROUTINES
+M:	Jason A. Donenfeld <Jason@zx2c4.com>
+S:	Maintained
+F:	lib/siphash.c
+F:	lib/test_siphash.c
+F:	include/linux/siphash.h
+
 TI DAVINCI MACHINE SUPPORT
 M:	Sekhar Nori <nsekhar@ti.com>
 M:	Kevin Hilman <khilman@kernel.org>
@ -11482,6 +11489,7 @@ F:	arch/alpha/kernel/srm_env.c

 STABLE BRANCH
 M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+M:	Sasha Levin <sashal@kernel.org>
 L:	stable@vger.kernel.org
 S:	Supported
 F:	Documentation/stable_kernel_rules.txt
@ -12478,13 +12486,6 @@ W:	http://www.linux-usb.org/usbnet
 S:	Maintained
 F:	drivers/net/usb/dm9601.c

-USB DIAMOND RIO500 DRIVER
-M:	Cesar Miquel <miquel@df.uba.ar>
-L:	rio500-users@lists.sourceforge.net
-W:	http://rio500.sourceforge.net
-S:	Maintained
-F:	drivers/usb/misc/rio500*
-
 USB EHCI DRIVER
 M:	Alan Stern <stern@rowland.harvard.edu>
 L:	linux-usb@vger.kernel.org
--- a/82
+++ b/82
@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 118
+SUBLEVEL = 212
 EXTRAVERSION =
 NAME = Roaring Lionus

@ -309,11 +309,6 @@ HOSTCXX      = g++
 HOSTCFLAGS   := -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89
 HOSTCXXFLAGS = -O2

-ifeq ($(shell $(HOSTCC) -v 2>&1 | grep -c "clang version"), 1)
-HOSTCFLAGS  += -Wno-unused-value -Wno-unused-parameter \
-		-Wno-missing-field-initializers -fno-delete-null-pointer-checks
-endif
-
 # Decide whether to build built-in, modular, or both.
 # Normally, just do built-in.

@ -398,7 +393,7 @@ LINUXINCLUDE	+= $(filter-out $(LINUXINCLUDE),$(USERINCLUDE))

 KBUILD_AFLAGS   := -D__ASSEMBLY__
 KBUILD_CFLAGS   := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-		   -fno-strict-aliasing -fno-common \
+		   -fno-strict-aliasing -fno-common -fshort-wchar \
 		   -Werror-implicit-function-declaration \
 		   -Wno-format-security \
 		   -Werror \
@ -410,6 +405,7 @@ KBUILD_AFLAGS_MODULE  := -DMODULE
 KBUILD_CFLAGS_MODULE  := -DMODULE
 KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds
 GCC_PLUGINS_CFLAGS :=
+CLANG_FLAGS :=

 # Read KERNELRELEASE from include/config/kernel.release (if it exists)
 KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null)
@ -422,7 +418,8 @@ export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE
 export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS

 export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
-export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_KASAN CFLAGS_UBSAN
+export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE
+export CFLAGS_KASAN CFLAGS_KASAN_NOSANITIZE CFLAGS_UBSAN
 export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE
 export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE
 export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
@ -533,6 +530,27 @@ ifneq ($(filter install,$(MAKECMDGOALS)),)
        endif
 endif

+ifeq ($(cc-name),clang)
+ifneq ($(CROSS_COMPILE),)
+CLANG_TRIPLE	?= $(CROSS_COMPILE)
+CLANG_FLAGS	+= --target=$(notdir $(CLANG_TRIPLE:%-=%))
+ifeq ($(shell $(srctree)/scripts/clang-android.sh $(CC) $(CLANG_FLAGS)), y)
+$(error "Clang with Android --target detected. Did you specify CLANG_TRIPLE?")
+endif
+GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)elfedit))
+CLANG_FLAGS	+= --prefix=$(GCC_TOOLCHAIN_DIR)
+GCC_TOOLCHAIN	:= $(realpath $(GCC_TOOLCHAIN_DIR)/..)
+endif
+ifneq ($(GCC_TOOLCHAIN),)
+CLANG_FLAGS	+= --gcc-toolchain=$(GCC_TOOLCHAIN)
+endif
+CLANG_FLAGS	+= -no-integrated-as
+CLANG_FLAGS	+= -Werror=unknown-warning-option
+KBUILD_CFLAGS	+= $(CLANG_FLAGS)
+KBUILD_AFLAGS	+= $(CLANG_FLAGS)
+endif
+
+
 ifeq ($(mixed-targets),1)
 # ===========================================================================
 # We're called with mixed targets (*config and build targets).
@ -674,6 +692,7 @@ KBUILD_CFLAGS	+= $(call cc-disable-warning,frame-address,)
 KBUILD_CFLAGS	+= $(call cc-disable-warning, format-truncation)
 KBUILD_CFLAGS	+= $(call cc-disable-warning, format-overflow)
 KBUILD_CFLAGS	+= $(call cc-disable-warning, int-in-bool-context)
+KBUILD_CFLAGS	+= $(call cc-disable-warning, address-of-packed-member)
 KBUILD_CFLAGS	+= $(call cc-disable-warning, attribute-alias)

 ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
@ -729,8 +748,7 @@ export DISABLE_CFI
 endif

 ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-KBUILD_CFLAGS	+= $(call cc-option,-Oz,-Os)
-KBUILD_CFLAGS	+= $(call cc-disable-warning,maybe-uninitialized,)
+KBUILD_CFLAGS	+= -Os $(call cc-disable-warning,maybe-uninitialized,)
 else
 ifdef CONFIG_PROFILE_ALL_BRANCHES
 KBUILD_CFLAGS	+= -O2 $(call cc-disable-warning,maybe-uninitialized,)
@ -790,21 +808,9 @@ endif
 KBUILD_CFLAGS += $(stackp-flag)

 ifeq ($(cc-name),clang)
-ifneq ($(CROSS_COMPILE),)
-CLANG_TRIPLE    ?= $(CROSS_COMPILE)
-CLANG_TARGET	:= --target=$(notdir $(CLANG_TRIPLE:%-=%))
-GCC_TOOLCHAIN	:= $(realpath $(dir $(shell which $(LD)))/..)
-endif
-ifneq ($(GCC_TOOLCHAIN),)
-CLANG_GCC_TC	:= --gcc-toolchain=$(GCC_TOOLCHAIN)
-endif
-KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
-KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
 KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
 KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
 KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
-KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
 KBUILD_CFLAGS += $(call cc-disable-warning, duplicate-decl-specifier)
 # Quiet clang warning: comparison of unsigned expression < 0 is always false
 KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
@ -813,16 +819,14 @@ KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
 # See modpost pattern 2
 KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
 KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
-KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
-KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
 else

 # These warnings generated too much noise in a regular build.
-# Use make W=1 to enable them (see scripts/Makefile.build)
+# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
 KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
 endif

+KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
 ifdef CONFIG_FRAME_POINTER
 KBUILD_CFLAGS	+= -fno-omit-frame-pointer -fno-optimize-sibling-calls
 else
@ -894,6 +898,9 @@ KBUILD_CFLAGS += $(call cc-option,-Wdeclaration-after-statement,)
 # disable pointer signed / unsigned warnings in gcc 4.0
 KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign)

+# disable stringop warnings in gcc 8+
+KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
+
 # disable invalid "can't wrap" optimizations for signed / pointers
 KBUILD_CFLAGS	+= $(call cc-option,-fno-strict-overflow)

@ -924,6 +931,18 @@ KBUILD_CFLAGS   += $(call cc-option,-Werror=date-time)
 # enforce correct pointer usage
 KBUILD_CFLAGS   += $(call cc-option,-Werror=incompatible-pointer-types)

+# Require designated initializers for all marked structures
+KBUILD_CFLAGS   += $(call cc-option,-Werror=designated-init)
+
+# change __FILE__ to the relative path from the srctree
+KBUILD_CFLAGS	+= $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
+
+# ensure -fcf-protection is disabled when using retpoline as it is
+# incompatible with -mindirect-branch=thunk-extern
+ifdef CONFIG_RETPOLINE
+KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
+endif
+
 # use the deterministic mode of AR if available
 KBUILD_ARFLAGS := $(call ar-option,D)

@ -1628,9 +1647,6 @@ else # KBUILD_EXTMOD

 # We are always building modules
 KBUILD_MODULES := 1
-PHONY += crmodverdir
-crmodverdir:
-	$(cmd_crmodverdir)

 PHONY += $(objtree)/Module.symvers
 $(objtree)/Module.symvers:
@ -1642,7 +1658,7 @@ $(objtree)/Module.symvers:

 module-dirs := $(addprefix _module_,$(KBUILD_EXTMOD))
 PHONY += $(module-dirs) modules
-$(module-dirs): crmodverdir $(objtree)/Module.symvers
+$(module-dirs): prepare $(objtree)/Module.symvers
 	$(Q)$(MAKE) $(build)=$(patsubst _module_%,%,$@)

 modules: $(module-dirs)
@ -1683,7 +1699,8 @@ help:

 # Dummies...
 PHONY += prepare scripts
-prepare: ;
+prepare:
+	$(cmd_crmodverdir)
 scripts: ;
 endif # KBUILD_EXTMOD

@ -1809,17 +1826,14 @@ endif

 # Modules
 /: prepare scripts FORCE
-	$(cmd_crmodverdir)
 	$(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \
 	$(build)=$(build-dir)
 # Make sure the latest headers are built for Documentation
 Documentation/ samples/: headers_install
 %/: prepare scripts FORCE
-	$(cmd_crmodverdir)
 	$(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \
 	$(build)=$(build-dir)
 %.ko: prepare scripts FORCE
-	$(cmd_crmodverdir)
 	$(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1)   \
 	$(build)=$(build-dir) $(@:.ko=.o)
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
--- a/arch/Kconfig
+++ b/arch/Kconfig
@ -5,6 +5,9 @@
 config KEXEC_CORE
 	bool

+config HOTPLUG_SMT
+	bool
+
 config OPROFILE
 	tristate "OProfile system profiling"
 	depends on PROFILING
@ -513,6 +516,7 @@ config LTO_CLANG
 	bool "Use clang Link Time Optimization (LTO) (EXPERIMENTAL)"
 	depends on ARCH_SUPPORTS_LTO_CLANG
 	depends on !FTRACE_MCOUNT_RECORD || HAVE_C_RECORDMCOUNT
+	depends on !KASAN
 	select LTO
 	select THIN_ARCHIVES
 	select LD_DEAD_CODE_DATA_ELIMINATION
--- a/arch/alpha/include/asm/irq.h
+++ b/arch/alpha/include/asm/irq.h
@ -55,15 +55,15 @@

 #elif defined(CONFIG_ALPHA_DP264) || \
      defined(CONFIG_ALPHA_LYNX)  || \
-      defined(CONFIG_ALPHA_SHARK) || \
-      defined(CONFIG_ALPHA_EIGER)
+      defined(CONFIG_ALPHA_SHARK)
 # define NR_IRQS	64

 #elif defined(CONFIG_ALPHA_TITAN)
 #define NR_IRQS		80

 #elif defined(CONFIG_ALPHA_RAWHIDE) || \
-	defined(CONFIG_ALPHA_TAKARA)
+      defined(CONFIG_ALPHA_TAKARA) || \
+      defined(CONFIG_ALPHA_EIGER)
 # define NR_IRQS	128

 #elif defined(CONFIG_ALPHA_WILDFIRE)
--- a/arch/alpha/include/asm/termios.h
+++ b/arch/alpha/include/asm/termios.h
@ -72,9 +72,15 @@
 })

 #define user_termios_to_kernel_termios(k, u) \
-	copy_from_user(k, u, sizeof(struct termios))
+	copy_from_user(k, u, sizeof(struct termios2))

 #define kernel_termios_to_user_termios(u, k) \
+	copy_to_user(u, k, sizeof(struct termios2))
+
+#define user_termios_to_kernel_termios_1(k, u) \
+	copy_from_user(k, u, sizeof(struct termios))
+
+#define kernel_termios_to_user_termios_1(u, k) \
 	copy_to_user(u, k, sizeof(struct termios))

 #endif	/* _ALPHA_TERMIOS_H */
--- a/arch/alpha/include/uapi/asm/ioctls.h
+++ b/arch/alpha/include/uapi/asm/ioctls.h
@ -31,6 +31,11 @@
 #define TCXONC		_IO('t', 30)
 #define TCFLSH		_IO('t', 31)

+#define TCGETS2		_IOR('T', 42, struct termios2)
+#define TCSETS2		_IOW('T', 43, struct termios2)
+#define TCSETSW2	_IOW('T', 44, struct termios2)
+#define TCSETSF2	_IOW('T', 45, struct termios2)
+
 #define TIOCSWINSZ	_IOW('t', 103, struct winsize)
 #define TIOCGWINSZ	_IOR('t', 104, struct winsize)
 #define	TIOCSTART	_IO('t', 110)		/* start output, like ^Q */
--- a/arch/alpha/include/uapi/asm/termbits.h
+++ b/arch/alpha/include/uapi/asm/termbits.h
@ -25,6 +25,19 @@ struct termios {
 	speed_t c_ospeed;		/* output speed */
 };

+/* Alpha has identical termios and termios2 */
+
+struct termios2 {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_cc[NCCS];		/* control characters */
+	cc_t c_line;			/* line discipline (== c_cc[19]) */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
 /* Alpha has matching termios and ktermios */

 struct ktermios {
@ -147,6 +160,7 @@ struct ktermios {
 #define B3000000  00034
 #define B3500000  00035
 #define B4000000  00036
+#define BOTHER    00037

 #define CSIZE	00001400
 #define   CS5	00000000
@ -164,6 +178,9 @@ struct ktermios {
 #define CMSPAR	  010000000000		/* mark or space (stick) parity */
 #define CRTSCTS	  020000000000		/* flow control */

+#define CIBAUD	07600000
+#define IBSHIFT	16
+
 /* c_lflag bits */
 #define ISIG	0x00000080
 #define ICANON	0x00000100
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@ -526,24 +526,19 @@ SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, const char __user *, path,
 SYSCALL_DEFINE1(osf_utsname, char __user *, name)
 {
 	int error;
+	char tmp[5 * 32];

 	down_read(&uts_sem);
-	error = -EFAULT;
-	if (copy_to_user(name + 0, utsname()->sysname, 32))
-		goto out;
-	if (copy_to_user(name + 32, utsname()->nodename, 32))
-		goto out;
-	if (copy_to_user(name + 64, utsname()->release, 32))
-		goto out;
-	if (copy_to_user(name + 96, utsname()->version, 32))
-		goto out;
-	if (copy_to_user(name + 128, utsname()->machine, 32))
-		goto out;
+	memcpy(tmp + 0 * 32, utsname()->sysname, 32);
+	memcpy(tmp + 1 * 32, utsname()->nodename, 32);
+	memcpy(tmp + 2 * 32, utsname()->release, 32);
+	memcpy(tmp + 3 * 32, utsname()->version, 32);
+	memcpy(tmp + 4 * 32, utsname()->machine, 32);
+	up_read(&uts_sem);

-	error = 0;
- out:
-	up_read(&uts_sem);	
-	return error;
+	if (copy_to_user(name, tmp, sizeof(tmp)))
+		return -EFAULT;
+	return 0;
 }

 SYSCALL_DEFINE0(getpagesize)
@ -561,24 +556,22 @@ SYSCALL_DEFINE0(getdtablesize)
 */
 SYSCALL_DEFINE2(osf_getdomainname, char __user *, name, int, namelen)
 {
-	unsigned len;
-	int i;
+	int len, err = 0;
+	char *kname;
+	char tmp[32];

-	if (!access_ok(VERIFY_WRITE, name, namelen))
-		return -EFAULT;
-
-	len = namelen;
-	if (len > 32)
-		len = 32;
+	if (namelen < 0 || namelen > 32)
+		namelen = 32;

 	down_read(&uts_sem);
-	for (i = 0; i < len; ++i) {
-		__put_user(utsname()->domainname[i], name + i);
-		if (utsname()->domainname[i] == '\0')
-			break;
-	}
+	kname = utsname()->domainname;
+	len = strnlen(kname, namelen);
+	len = min(len + 1, namelen);
+	memcpy(tmp, kname, len);
 	up_read(&uts_sem);

+	if (copy_to_user(name, tmp, len))
+		return -EFAULT;
 	return 0;
 }

@ -741,13 +734,14 @@ SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count)
 	};
 	unsigned long offset;
 	const char *res;
-	long len, err = -EINVAL;
+	long len;
+	char tmp[__NEW_UTS_LEN + 1];

 	offset = command-1;
 	if (offset >= ARRAY_SIZE(sysinfo_table)) {
 		/* Digital UNIX has a few unpublished interfaces here */
 		printk("sysinfo(%d)", command);
-		goto out;
+		return -EINVAL;
 	}

 	down_read(&uts_sem);
@ -755,13 +749,11 @@ SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count)
 	len = strlen(res)+1;
 	if ((unsigned long)len > (unsigned long)count)
 		len = count;
-	if (copy_to_user(buf, res, len))
-		err = -EFAULT;
-	else
-		err = 0;
+	memcpy(tmp, res, len);
 	up_read(&uts_sem);
- out:
-	return err;
+	if (copy_to_user(buf, tmp, len))
+		return -EFAULT;
+	return 0;
 }

 SYSCALL_DEFINE5(osf_getsysinfo, unsigned long, op, void __user *, buffer,
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@ -77,7 +77,7 @@ __load_new_mm_context(struct mm_struct *next_mm)
 /* Macro for exception fixup code to access integer registers.  */
 #define dpf_reg(r)							\
 	(((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-16 :	\
-				 (r) <= 18 ? (r)+8 : (r)-10])
+				 (r) <= 18 ? (r)+10 : (r)-10])

 asmlinkage void
 do_page_fault(unsigned long address, unsigned long mmcsr,
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@ -23,7 +23,7 @@ config ARC
 	select GENERIC_SMP_IDLE_THREAD
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_TRACEHOOK
-	select HAVE_FUTEX_CMPXCHG
+	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
--- a/arch/arc/configs/axs101_defconfig
+++ b/arch/arc/configs/axs101_defconfig
@ -1,5 +1,4 @@
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
-# CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 # CONFIG_CROSS_MEMORY_ATTACH is not set
@ -98,6 +97,7 @@ CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
--- a/arch/arc/configs/axs103_defconfig
+++ b/arch/arc/configs/axs103_defconfig
@ -1,5 +1,4 @@
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
-# CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 # CONFIG_CROSS_MEMORY_ATTACH is not set
@ -98,6 +97,7 @@ CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
--- a/arch/arc/configs/axs103_smp_defconfig
+++ b/arch/arc/configs/axs103_smp_defconfig
@ -1,5 +1,4 @@
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
-# CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 # CONFIG_CROSS_MEMORY_ATTACH is not set
@ -99,6 +98,7 @@ CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
--- a/arch/arc/configs/nps_defconfig
+++ b/arch/arc/configs/nps_defconfig
@ -76,6 +76,7 @@ CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_ROOT_NFS=y
 CONFIG_DEBUG_INFO=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@ -71,5 +71,6 @@ CONFIG_EXT2_FS_XATTR=y
 CONFIG_TMPFS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
--- a/arch/arc/configs/nsimosci_hs_defconfig
+++ b/arch/arc/configs/nsimosci_hs_defconfig
@ -69,5 +69,6 @@ CONFIG_EXT2_FS_XATTR=y
 CONFIG_TMPFS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
--- a/arch/arc/configs/nsimosci_hs_smp_defconfig
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig
@ -80,6 +80,7 @@ CONFIG_EXT2_FS_XATTR=y
 CONFIG_TMPFS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
 CONFIG_FTRACE=y
--- a/arch/arc/configs/vdk_hs38_defconfig
+++ b/arch/arc/configs/vdk_hs38_defconfig
@ -88,6 +88,7 @@ CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
 CONFIG_JFFS2_FS=y
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
--- a/arch/arc/configs/vdk_hs38_smp_defconfig
+++ b/arch/arc/configs/vdk_hs38_smp_defconfig
@ -87,6 +87,7 @@ CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
 CONFIG_JFFS2_FS=y
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@ -84,7 +84,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 	"1:	llock   %[orig], [%[ctr]]		\n"		\
 	"	" #asm_op " %[val], %[orig], %[i]	\n"		\
 	"	scond   %[val], [%[ctr]]		\n"		\
-	"						\n"		\
+	"	bnz     1b				\n"		\
 	: [val]	"=&r"	(val),						\
 	  [orig] "=&r" (orig)						\
 	: [ctr]	"r"	(&v->counter),					\
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@ -340,7 +340,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x)
 /*
 * __ffs: Similar to ffs, but zero based (0-31)
 */
-static inline __attribute__ ((const)) int __ffs(unsigned long word)
+static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word)
 {
 	if (!word)
 		return word;
@ -400,9 +400,9 @@ static inline __attribute__ ((const)) int ffs(unsigned long x)
 /*
 * __ffs: Similar to ffs, but zero based (0-31)
 */
-static inline __attribute__ ((const)) int __ffs(unsigned long x)
+static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
 {
-	int n;
+	unsigned long n;

 	asm volatile(
 	"	ffs.f	%0, %1		\n"  /* 0:31; 31(Z) if src 0 */
--- a/arch/arc/include/asm/bug.h
+++ b/arch/arc/include/asm/bug.h
@ -23,7 +23,8 @@ void die(const char *str, struct pt_regs *regs, unsigned long address);

 #define BUG()	do {								\
 	pr_warn("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
-	dump_stack();								\
+	barrier_before_unreachable();						\
+	__builtin_trap();							\
 } while (0)

 #define HAVE_ARCH_BUG
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@ -49,6 +49,17 @@

 #define ARCH_DMA_MINALIGN      L1_CACHE_BYTES

+/*
+ * Make sure slab-allocated buffers are 64-bit aligned when atomic64_t uses
+ * ARCv2 64-bit atomics (LLOCKD/SCONDD). This guarantess runtime 64-bit
+ * alignment for any atomic64_t embedded in buffer.
+ * Default ARCH_SLAB_MINALIGN is __alignof__(long long) which has a relaxed
+ * value of 4 (and not 8) in ARC ABI.
+ */
+#if defined(CONFIG_ARC_HAS_LL64) && defined(CONFIG_ARC_HAS_LLSC)
+#define ARCH_SLAB_MINALIGN	8
+#endif
+
 extern void arc_cache_init(void);
 extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
 extern void read_decode_cache_bcr(void);
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@ -92,8 +92,11 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)

 #endif /* CONFIG_ARC_HAS_LLSC */

-#define cmpxchg(ptr, o, n) ((typeof(*(ptr)))__cmpxchg((ptr), \
-				(unsigned long)(o), (unsigned long)(n)))
+#define cmpxchg(ptr, o, n) ({				\
+	(typeof(*(ptr)))__cmpxchg((ptr),		\
+				  (unsigned long)(o),	\
+				  (unsigned long)(n));	\
+})

 /*
 * atomic_cmpxchg is same as cmpxchg
@ -198,8 +201,11 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
 	return __xchg_bad_pointer();
 }

-#define xchg(ptr, with) ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), \
-						 sizeof(*(ptr))))
+#define xchg(ptr, with) ({				\
+	(typeof(*(ptr)))__xchg((unsigned long)(with),	\
+			       (ptr),			\
+			       sizeof(*(ptr)));		\
+})

 #endif /* CONFIG_ARC_PLAT_EZNPS */

--- a/arch/arc/include/asm/delay.h
+++ b/arch/arc/include/asm/delay.h
@ -17,8 +17,11 @@
 #ifndef __ASM_ARC_UDELAY_H
 #define __ASM_ARC_UDELAY_H

+#include <asm-generic/types.h>
 #include <asm/param.h>		/* HZ */

+extern unsigned long loops_per_jiffy;
+
 static inline void __delay(unsigned long loops)
 {
 	__asm__ __volatile__(
--- a/arch/arc/include/asm/io.h
+++ b/arch/arc/include/asm/io.h
@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <asm/byteorder.h>
 #include <asm/page.h>
+#include <asm/unaligned.h>

 #ifdef CONFIG_ISA_ARCV2
 #include <asm/barrier.h>
@ -94,6 +95,42 @@ static inline u32 __raw_readl(const volatile void __iomem *addr)
 	return w;
 }

+/*
+ * {read,write}s{b,w,l}() repeatedly access the same IO address in
+ * native endianness in 8-, 16-, 32-bit chunks {into,from} memory,
+ * @count times
+ */
+#define __raw_readsx(t,f) \
+static inline void __raw_reads##f(const volatile void __iomem *addr,	\
+				  void *ptr, unsigned int count)	\
+{									\
+	bool is_aligned = ((unsigned long)ptr % ((t) / 8)) == 0;	\
+	u##t *buf = ptr;						\
+									\
+	if (!count)							\
+		return;							\
+									\
+	/* Some ARC CPU's don't support unaligned accesses */		\
+	if (is_aligned) {						\
+		do {							\
+			u##t x = __raw_read##f(addr);			\
+			*buf++ = x;					\
+		} while (--count);					\
+	} else {							\
+		do {							\
+			u##t x = __raw_read##f(addr);			\
+			put_unaligned(x, buf++);			\
+		} while (--count);					\
+	}								\
+}
+
+#define __raw_readsb __raw_readsb
+__raw_readsx(8, b)
+#define __raw_readsw __raw_readsw
+__raw_readsx(16, w)
+#define __raw_readsl __raw_readsl
+__raw_readsx(32, l)
+
 #define __raw_writeb __raw_writeb
 static inline void __raw_writeb(u8 b, volatile void __iomem *addr)
 {
@ -126,6 +163,35 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr)

 }

+#define __raw_writesx(t,f)						\
+static inline void __raw_writes##f(volatile void __iomem *addr, 	\
+				   const void *ptr, unsigned int count)	\
+{									\
+	bool is_aligned = ((unsigned long)ptr % ((t) / 8)) == 0;	\
+	const u##t *buf = ptr;						\
+									\
+	if (!count)							\
+		return;							\
+									\
+	/* Some ARC CPU's don't support unaligned accesses */		\
+	if (is_aligned) {						\
+		do {							\
+			__raw_write##f(*buf++, addr);			\
+		} while (--count);					\
+	} else {							\
+		do {							\
+			__raw_write##f(get_unaligned(buf++), addr);	\
+		} while (--count);					\
+	}								\
+}
+
+#define __raw_writesb __raw_writesb
+__raw_writesx(8, b)
+#define __raw_writesw __raw_writesw
+__raw_writesx(16, w)
+#define __raw_writesl __raw_writesl
+__raw_writesx(32, l)
+
 /*
 * MMIO can also get buffered/optimized in micro-arch, so barriers needed
 * Based on ARM model for the typical use case
@ -141,10 +207,16 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr)
 #define readb(c)		({ u8  __v = readb_relaxed(c); __iormb(); __v; })
 #define readw(c)		({ u16 __v = readw_relaxed(c); __iormb(); __v; })
 #define readl(c)		({ u32 __v = readl_relaxed(c); __iormb(); __v; })
+#define readsb(p,d,l)		({ __raw_readsb(p,d,l); __iormb(); })
+#define readsw(p,d,l)		({ __raw_readsw(p,d,l); __iormb(); })
+#define readsl(p,d,l)		({ __raw_readsl(p,d,l); __iormb(); })

 #define writeb(v,c)		({ __iowmb(); writeb_relaxed(v,c); })
 #define writew(v,c)		({ __iowmb(); writew_relaxed(v,c); })
 #define writel(v,c)		({ __iowmb(); writel_relaxed(v,c); })
+#define writesb(p,d,l)		({ __iowmb(); __raw_writesb(p,d,l); })
+#define writesw(p,d,l)		({ __iowmb(); __raw_writesw(p,d,l); })
+#define writesl(p,d,l)		({ __iowmb(); __raw_writesl(p,d,l); })

 /*
 * Relaxed API for drivers which can handle barrier ordering themselves
--- a/arch/arc/include/asm/mach_desc.h
+++ b/arch/arc/include/asm/mach_desc.h
@ -34,9 +34,7 @@ struct machine_desc {
 	const char		*name;
 	const char		**dt_compat;
 	void			(*init_early)(void);
-#ifdef CONFIG_SMP
 	void			(*init_per_cpu)(unsigned int);
-#endif
 	void			(*init_machine)(void);
 	void			(*init_late)(void);

--- a/arch/arc/include/asm/perf_event.h
+++ b/arch/arc/include/asm/perf_event.h
@ -103,7 +103,8 @@ static const char * const arc_pmu_ev_hw_map[] = {

 	/* counts condition */
 	[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", /* Excludes ZOL jumps */
+	/* All jump instructions that are taken */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak",
 	[PERF_COUNT_ARC_BPOK]         = "bpok",	  /* NP-NT, PT-T, PNT-NT */
 #ifdef CONFIG_ISA_ARCV2
 	[PERF_COUNT_HW_BRANCH_MISSES] = "bpmp",
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@ -209,7 +209,7 @@ __arc_copy_from_user(void *to, const void __user *from, unsigned long n)
 		*/
 		  "=&r" (tmp), "+r" (to), "+r" (from)
 		:
-		: "lp_count", "lp_start", "lp_end", "memory");
+		: "lp_count", "memory");

 		return n;
 	}
@ -438,7 +438,7 @@ __arc_copy_to_user(void __user *to, const void *from, unsigned long n)
 		 */
 		  "=&r" (tmp), "+r" (to), "+r" (from)
 		:
-		: "lp_count", "lp_start", "lp_end", "memory");
+		: "lp_count", "memory");

 		return n;
 	}
@ -658,7 +658,7 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n)
 	"	.previous			\n"
 	: "+r"(d_char), "+r"(res)
 	: "i"(0)
-	: "lp_count", "lp_start", "lp_end", "memory");
+	: "lp_count", "memory");

 	return res;
 }
@ -691,7 +691,7 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count)
 	"	.previous			\n"
 	: "+r"(res), "+r"(dst), "+r"(src), "=r"(val)
 	: "g"(-EFAULT), "r"(count)
-	: "lp_count", "lp_start", "lp_end", "memory");
+	: "lp_count", "memory");

 	return res;
 }
--- a/arch/arc/kernel/head.S
+++ b/arch/arc/kernel/head.S
@ -17,6 +17,7 @@
 #include <asm/entry.h>
 #include <asm/arcregs.h>
 #include <asm/cache.h>
+#include <asm/irqflags.h>

 .macro CPU_EARLY_SETUP

@ -47,6 +48,15 @@
 	sr	r5, [ARC_REG_DC_CTRL]

 1:
+
+#ifdef CONFIG_ISA_ARCV2
+	; Unaligned access is disabled at reset, so re-enable early as
+	; gcc 7.3.1 (ARC GNU 2018.03) onwards generates unaligned access
+	; by default
+	lr	r5, [status32]
+	bset	r5, r5, STATUS_AD_BIT
+	kflag	r5
+#endif
 .endm

 	.section .init.text, "ax",@progbits
@ -93,10 +103,11 @@ ENTRY(stext)
 #ifdef CONFIG_ARC_UBOOT_SUPPORT
 	; Uboot - kernel ABI
 	;    r0 = [0] No uboot interaction, [1] cmdline in r2, [2] DTB in r2
-	;    r1 = magic number (board identity, unused as of now
+	;    r1 = magic number (always zero as of now)
 	;    r2 = pointer to uboot provided cmdline or external DTB in mem
-	; These are handled later in setup_arch()
+	; These are handled later in handle_uboot_args()
 	st	r0, [@uboot_tag]
+	st      r1, [@uboot_magic]
 	st	r2, [@uboot_arg]
 #endif

--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@ -31,10 +31,10 @@ void __init init_IRQ(void)
 	/* a SMP H/w block could do IPI IRQ request here */
 	if (plat_smp_ops.init_per_cpu)
 		plat_smp_ops.init_per_cpu(smp_processor_id());
+#endif

 	if (machine_desc->init_per_cpu)
 		machine_desc->init_per_cpu(smp_processor_id());
-#endif
 }

 /*
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@ -488,8 +488,8 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
 	/* loop thru all available h/w condition indexes */
 	for (j = 0; j < cc_bcr.c; j++) {
 		write_aux_reg(ARC_REG_CC_INDEX, j);
-		cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0);
-		cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1);
+		cc_name.indiv.word0 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME0));
+		cc_name.indiv.word1 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME1));

 		/* See if it has been mapped to a perf event_id */
 		for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) {
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@ -44,7 +44,8 @@ SYSCALL_DEFINE0(arc_gettls)
 SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new)
 {
 	struct pt_regs *regs = current_pt_regs();
-	int uval = -EFAULT;
+	u32 uval;
+	int ret;

 	/*
 	 * This is only for old cores lacking LLOCK/SCOND, which by defintion
@ -57,23 +58,47 @@ SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new)
 	/* Z indicates to userspace if operation succeded */
 	regs->status32 &= ~STATUS_Z_MASK;

-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
-		return -EFAULT;
+	ret = access_ok(VERIFY_WRITE, uaddr, sizeof(*uaddr));
+	if (!ret)
+		 goto fail;

+again:
 	preempt_disable();

-	if (__get_user(uval, uaddr))
-		goto done;
+	ret = __get_user(uval, uaddr);
+	if (ret)
+		 goto fault;

-	if (uval == expected) {
-		if (!__put_user(new, uaddr))
-			regs->status32 |= STATUS_Z_MASK;
-	}
+	if (uval != expected)
+		 goto out;

-done:
+	ret = __put_user(new, uaddr);
+	if (ret)
+		 goto fault;
+
+	regs->status32 |= STATUS_Z_MASK;
+
+out:
+	preempt_enable();
+	return uval;
+
+fault:
 	preempt_enable();

-	return uval;
+	if (unlikely(ret != -EFAULT))
+		 goto fail;
+
+	down_read(&current->mm->mmap_sem);
+	ret = fixup_user_fault(current, current->mm, (unsigned long) uaddr,
+			       FAULT_FLAG_WRITE, NULL);
+	up_read(&current->mm->mmap_sem);
+
+	if (likely(!ret))
+		 goto again;
+
+fail:
+	force_sig(SIGSEGV, current);
+	return ret;
 }

 void arch_cpu_idle(void)
@ -188,6 +213,26 @@ int copy_thread(unsigned long clone_flags,
 		task_thread_info(current)->thr_ptr;
 	}

+
+	/*
+	 * setup usermode thread pointer #1:
+	 * when child is picked by scheduler, __switch_to() uses @c_callee to
+	 * populate usermode callee regs: this works (despite being in a kernel
+	 * function) since special return path for child @ret_from_fork()
+	 * ensures those regs are not clobbered all the way to RTIE to usermode
+	 */
+	c_callee->r25 = task_thread_info(p)->thr_ptr;
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+	/*
+	 * setup usermode thread pointer #2:
+	 * however for this special use of r25 in kernel, __switch_to() sets
+	 * r25 for kernel needs and only in the final return path is usermode
+	 * r25 setup, from pt_regs->user_r25. So set that up as well
+	 */
+	c_regs->user_r25 = c_callee->r25;
+#endif
+
 	return 0;
 }

--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@ -32,6 +32,7 @@ unsigned int intr_to_DE_cnt;

 /* Part of U-boot ABI: see head.S */
 int __initdata uboot_tag;
+int __initdata uboot_magic;
 char __initdata *uboot_arg;

 const struct machine_desc *machine_desc;
@ -381,43 +382,87 @@ void setup_processor(void)
 	arc_chk_core_config();
 }

-static inline int is_kernel(unsigned long addr)
+static inline bool uboot_arg_invalid(unsigned long addr)
 {
-	if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end)
-		return 1;
-	return 0;
+	/*
+	 * Check that it is a untranslated address (although MMU is not enabled
+	 * yet, it being a high address ensures this is not by fluke)
+	 */
+	if (addr < PAGE_OFFSET)
+		return true;
+
+	/* Check that address doesn't clobber resident kernel image */
+	return addr >= (unsigned long)_stext && addr <= (unsigned long)_end;
+}
+
+#define IGNORE_ARGS		"Ignore U-boot args: "
+
+/* uboot_tag values for U-boot - kernel ABI revision 0; see head.S */
+#define UBOOT_TAG_NONE		0
+#define UBOOT_TAG_CMDLINE	1
+#define UBOOT_TAG_DTB		2
+/* We always pass 0 as magic from U-boot */
+#define UBOOT_MAGIC_VALUE	0
+
+void __init handle_uboot_args(void)
+{
+	bool use_embedded_dtb = true;
+	bool append_cmdline = false;
+
+#ifdef CONFIG_ARC_UBOOT_SUPPORT
+	/* check that we know this tag */
+	if (uboot_tag != UBOOT_TAG_NONE &&
+	    uboot_tag != UBOOT_TAG_CMDLINE &&
+	    uboot_tag != UBOOT_TAG_DTB) {
+		pr_warn(IGNORE_ARGS "invalid uboot tag: '%08x'\n", uboot_tag);
+		goto ignore_uboot_args;
+	}
+
+	if (uboot_magic != UBOOT_MAGIC_VALUE) {
+		pr_warn(IGNORE_ARGS "non zero uboot magic\n");
+		goto ignore_uboot_args;
+	}
+
+	if (uboot_tag != UBOOT_TAG_NONE &&
+            uboot_arg_invalid((unsigned long)uboot_arg)) {
+		pr_warn(IGNORE_ARGS "invalid uboot arg: '%px'\n", uboot_arg);
+		goto ignore_uboot_args;
+	}
+
+	/* see if U-boot passed an external Device Tree blob */
+	if (uboot_tag == UBOOT_TAG_DTB) {
+		machine_desc = setup_machine_fdt((void *)uboot_arg);
+
+		/* external Device Tree blob is invalid - use embedded one */
+		use_embedded_dtb = !machine_desc;
+	}
+
+	if (uboot_tag == UBOOT_TAG_CMDLINE)
+		append_cmdline = true;
+
+ignore_uboot_args:
+#endif
+
+	if (use_embedded_dtb) {
+		machine_desc = setup_machine_fdt(__dtb_start);
+		if (!machine_desc)
+			panic("Embedded DT invalid\n");
+	}
+
+	/*
+	 * NOTE: @boot_command_line is populated by setup_machine_fdt() so this
+	 * append processing can only happen after.
+	 */
+	if (append_cmdline) {
+		/* Ensure a whitespace between the 2 cmdlines */
+		strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+		strlcat(boot_command_line, uboot_arg, COMMAND_LINE_SIZE);
+	}
 }

 void __init setup_arch(char **cmdline_p)
 {
-#ifdef CONFIG_ARC_UBOOT_SUPPORT
-	/* make sure that uboot passed pointer to cmdline/dtb is valid */
-	if (uboot_tag && is_kernel((unsigned long)uboot_arg))
-		panic("Invalid uboot arg\n");
-
-	/* See if u-boot passed an external Device Tree blob */
-	machine_desc = setup_machine_fdt(uboot_arg);	/* uboot_tag == 2 */
-	if (!machine_desc)
-#endif
-	{
-		/* No, so try the embedded one */
-		machine_desc = setup_machine_fdt(__dtb_start);
-		if (!machine_desc)
-			panic("Embedded DT invalid\n");
-
-		/*
-		 * If we are here, it is established that @uboot_arg didn't
-		 * point to DT blob. Instead if u-boot says it is cmdline,
-		 * append to embedded DT cmdline.
-		 * setup_machine_fdt() would have populated @boot_command_line
-		 */
-		if (uboot_tag == 1) {
-			/* Ensure a whitespace between the 2 cmdlines */
-			strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
-			strlcat(boot_command_line, uboot_arg,
-				COMMAND_LINE_SIZE);
-		}
-	}
+	handle_uboot_args();

 	/* Save unparsed command line copy for /proc/cmdline */
 	*cmdline_p = boot_command_line;
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@ -155,3 +155,12 @@ void do_insterror_or_kprobe(unsigned long address, struct pt_regs *regs)

 	insterror_is_error(address, regs);
 }
+
+/*
+ * abort() call generated by older gcc for __builtin_trap()
+ */
+void abort(void)
+{
+	__asm__ __volatile__("trap_s  5\n");
+}
+EXPORT_SYMBOL(abort);
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@ -185,11 +185,6 @@ static void *__init unw_hdr_alloc_early(unsigned long sz)
 				       MAX_DMA_ADDRESS);
 }

-static void *unw_hdr_alloc(unsigned long sz)
-{
-	return kmalloc(sz, GFP_KERNEL);
-}
-
 static void init_unwind_table(struct unwind_table *table, const char *name,
 			      const void *core_start, unsigned long core_size,
 			      const void *init_start, unsigned long init_size,
@ -370,6 +365,10 @@ ret_err:
 }

 #ifdef CONFIG_MODULES
+static void *unw_hdr_alloc(unsigned long sz)
+{
+	return kmalloc(sz, GFP_KERNEL);
+}

 static struct unwind_table *last_table;

--- a/arch/arc/lib/memcpy-archs.S
+++ b/arch/arc/lib/memcpy-archs.S
@ -25,15 +25,11 @@
 #endif

 #ifdef CONFIG_ARC_HAS_LL64
-# define PREFETCH_READ(RX)	prefetch    [RX, 56]
-# define PREFETCH_WRITE(RX)	prefetchw   [RX, 64]
 # define LOADX(DST,RX)		ldd.ab	DST, [RX, 8]
 # define STOREX(SRC,RX)		std.ab	SRC, [RX, 8]
 # define ZOLSHFT		5
 # define ZOLAND			0x1F
 #else
-# define PREFETCH_READ(RX)	prefetch    [RX, 28]
-# define PREFETCH_WRITE(RX)	prefetchw   [RX, 32]
 # define LOADX(DST,RX)		ld.ab	DST, [RX, 4]
 # define STOREX(SRC,RX)		st.ab	SRC, [RX, 4]
 # define ZOLSHFT		4
@ -41,8 +37,6 @@
 #endif

 ENTRY_CFI(memcpy)
-	prefetch [r1]		; Prefetch the read location
-	prefetchw [r0]		; Prefetch the write location
 	mov.f	0, r2
 ;;; if size is zero
 	jz.d	[blink]
@ -72,8 +66,6 @@ ENTRY_CFI(memcpy)
 	lpnz	@.Lcopy32_64bytes
 	;; LOOP START
 	LOADX (r6, r1)
-	PREFETCH_READ (r1)
-	PREFETCH_WRITE (r3)
 	LOADX (r8, r1)
 	LOADX (r10, r1)
 	LOADX (r4, r1)
@ -117,9 +109,7 @@ ENTRY_CFI(memcpy)
 	lpnz	@.Lcopy8bytes_1
 	;; LOOP START
 	ld.ab	r6, [r1, 4]
-	prefetch [r1, 28]	;Prefetch the next read location
 	ld.ab	r8, [r1,4]
-	prefetchw [r3, 32]	;Prefetch the next write location

 	SHIFT_1	(r7, r6, 24)
 	or	r7, r7, r5
@ -162,9 +152,7 @@ ENTRY_CFI(memcpy)
 	lpnz	@.Lcopy8bytes_2
 	;; LOOP START
 	ld.ab	r6, [r1, 4]
-	prefetch [r1, 28]	;Prefetch the next read location
 	ld.ab	r8, [r1,4]
-	prefetchw [r3, 32]	;Prefetch the next write location

 	SHIFT_1	(r7, r6, 16)
 	or	r7, r7, r5
@ -204,9 +192,7 @@ ENTRY_CFI(memcpy)
 	lpnz	@.Lcopy8bytes_3
 	;; LOOP START
 	ld.ab	r6, [r1, 4]
-	prefetch [r1, 28]	;Prefetch the next read location
 	ld.ab	r8, [r1,4]
-	prefetchw [r3, 32]	;Prefetch the next write location

 	SHIFT_1	(r7, r6, 8)
 	or	r7, r7, r5
--- a/arch/arc/lib/memset-archs.S
+++ b/arch/arc/lib/memset-archs.S
@ -7,11 +7,39 @@
 */

 #include <linux/linkage.h>
+#include <asm/cache.h>

-#undef PREALLOC_NOT_AVAIL
+/*
+ * The memset implementation below is optimized to use prefetchw and prealloc
+ * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
+ * If you want to implement optimized memset for other possible L1 data cache
+ * line lengths (32B and 128B) you should rewrite code carefully checking
+ * we don't call any prefetchw/prealloc instruction for L1 cache lines which
+ * don't belongs to memset area.
+ */
+
+#if L1_CACHE_SHIFT == 6
+
+.macro PREALLOC_INSTR	reg, off
+	prealloc	[\reg, \off]
+.endm
+
+.macro PREFETCHW_INSTR	reg, off
+	prefetchw	[\reg, \off]
+.endm
+
+#else
+
+.macro PREALLOC_INSTR
+.endm
+
+.macro PREFETCHW_INSTR
+.endm
+
+#endif

 ENTRY_CFI(memset)
-	prefetchw [r0]		; Prefetch the write location
+	PREFETCHW_INSTR	r0, 0	; Prefetch the first write location
 	mov.f	0, r2
 ;;; if size is zero
 	jz.d	[blink]
@ -48,11 +76,8 @@ ENTRY_CFI(memset)

 	lpnz	@.Lset64bytes
 	;; LOOP START
-#ifdef PREALLOC_NOT_AVAIL
-	prefetchw [r3, 64]	;Prefetch the next write location
-#else
-	prealloc  [r3, 64]
-#endif
+	PREALLOC_INSTR	r3, 64	; alloc next line w/o fetching
+
 #ifdef CONFIG_ARC_HAS_LL64
 	std.ab	r4, [r3, 8]
 	std.ab	r4, [r3, 8]
@ -85,7 +110,6 @@ ENTRY_CFI(memset)
 	lsr.f	lp_count, r2, 5 ;Last remaining  max 124 bytes
 	lpnz	.Lset32bytes
 	;; LOOP START
-	prefetchw   [r3, 32]	;Prefetch the next write location
 #ifdef CONFIG_ARC_HAS_LL64
 	std.ab	r4, [r3, 8]
 	std.ab	r4, [r3, 8]
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@ -840,7 +840,7 @@ void flush_cache_mm(struct mm_struct *mm)
 void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
 		      unsigned long pfn)
 {
-	unsigned int paddr = pfn << PAGE_SHIFT;
+	phys_addr_t paddr = pfn << PAGE_SHIFT;

 	u_vaddr &= PAGE_MASK;

@ -860,8 +860,9 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page,
 		     unsigned long u_vaddr)
 {
 	/* TBD: do we really need to clear the kernel mapping */
-	__flush_dcache_page(page_address(page), u_vaddr);
-	__flush_dcache_page(page_address(page), page_address(page));
+	__flush_dcache_page((phys_addr_t)page_address(page), u_vaddr);
+	__flush_dcache_page((phys_addr_t)page_address(page),
+			    (phys_addr_t)page_address(page));

 }

--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@ -890,9 +890,11 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 			  struct pt_regs *regs)
 {
 	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
-	unsigned int pd0[mmu->ways];
 	unsigned long flags;
-	int set;
+	int set, n_ways = mmu->ways;
+
+	n_ways = min(n_ways, 4);
+	BUG_ON(mmu->ways > 4);

 	local_irq_save(flags);

@ -900,9 +902,10 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 	for (set = 0; set < mmu->sets; set++) {

 		int is_valid, way;
+		unsigned int pd0[4];

 		/* read out all the ways of current set */
-		for (way = 0, is_valid = 0; way < mmu->ways; way++) {
+		for (way = 0, is_valid = 0; way < n_ways; way++) {
 			write_aux_reg(ARC_REG_TLBINDEX,
 					  SET_WAY_TO_IDX(mmu, set, way));
 			write_aux_reg(ARC_REG_TLBCOMMAND, TLBRead);
@ -916,14 +919,14 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 			continue;

 		/* Scan the set for duplicate ways: needs a nested loop */
-		for (way = 0; way < mmu->ways - 1; way++) {
+		for (way = 0; way < n_ways - 1; way++) {

 			int n;

 			if (!pd0[way])
 				continue;

-			for (n = way + 1; n < mmu->ways; n++) {
+			for (n = way + 1; n < n_ways; n++) {
 				if (pd0[way] != pd0[n])
 					continue;

--- a/arch/arc/plat-eznps/include/plat/ctop.h
+++ b/arch/arc/plat-eznps/include/plat/ctop.h
@ -21,6 +21,7 @@
 #error "Incorrect ctop.h include"
 #endif

+#include <linux/types.h>
 #include <soc/nps/common.h>

 /* core auxiliary registers */
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@ -1457,6 +1457,7 @@ config NR_CPUS
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs"
 	depends on SMP
+	select GENERIC_IRQ_MIGRATION
 	help
 	  Say Y here to experiment with turning CPUs off and on.  CPUs
 	  can be controlled through /sys/devices/system/cpu.
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@ -987,14 +987,21 @@ choice
 		  Say Y here if you want kernel low-level debugging support
 		  on SOCFPGA(Cyclone 5 and Arria 5) based platforms.

-	config DEBUG_SOCFPGA_UART1
+	config DEBUG_SOCFPGA_ARRIA10_UART1
 		depends on ARCH_SOCFPGA
-		bool "Use SOCFPGA UART1 for low-level debug"
+		bool "Use SOCFPGA Arria10 UART1 for low-level debug"
 		select DEBUG_UART_8250
 		help
 		  Say Y here if you want kernel low-level debugging support
 		  on SOCFPGA(Arria 10) based platforms.

+	config DEBUG_SOCFPGA_CYCLONE5_UART1
+		depends on ARCH_SOCFPGA
+		bool "Use SOCFPGA Cyclone 5 UART1 for low-level debug"
+		select DEBUG_UART_8250
+		help
+		  Say Y here if you want kernel low-level debugging support
+		  on SOCFPGA(Cyclone 5 and Arria 5) based platforms.

 	config DEBUG_SUN9I_UART0
 		bool "Kernel low-level debugging messages via sun9i UART0"
@ -1340,21 +1347,21 @@ config DEBUG_OMAP2PLUS_UART
 	depends on ARCH_OMAP2PLUS

 config DEBUG_IMX_UART_PORT
-	int "i.MX Debug UART Port Selection" if DEBUG_IMX1_UART || \
-						DEBUG_IMX25_UART || \
-						DEBUG_IMX21_IMX27_UART || \
-						DEBUG_IMX31_UART || \
-						DEBUG_IMX35_UART || \
-						DEBUG_IMX50_UART || \
-						DEBUG_IMX51_UART || \
-						DEBUG_IMX53_UART || \
-						DEBUG_IMX6Q_UART || \
-						DEBUG_IMX6SL_UART || \
-						DEBUG_IMX6SX_UART || \
-						DEBUG_IMX6UL_UART || \
-						DEBUG_IMX7D_UART
+	int "i.MX Debug UART Port Selection"
+	depends on DEBUG_IMX1_UART || \
+		   DEBUG_IMX25_UART || \
+		   DEBUG_IMX21_IMX27_UART || \
+		   DEBUG_IMX31_UART || \
+		   DEBUG_IMX35_UART || \
+		   DEBUG_IMX50_UART || \
+		   DEBUG_IMX51_UART || \
+		   DEBUG_IMX53_UART || \
+		   DEBUG_IMX6Q_UART || \
+		   DEBUG_IMX6SL_UART || \
+		   DEBUG_IMX6SX_UART || \
+		   DEBUG_IMX6UL_UART || \
+		   DEBUG_IMX7D_UART
 	default 1
-	depends on ARCH_MXC
 	help
 	  Choose UART port on which kernel low-level debug messages
 	  should be output.
@ -1534,7 +1541,8 @@ config DEBUG_UART_PHYS
 	default 0xfe800000 if ARCH_IOP32X
 	default 0xff690000 if DEBUG_RK32_UART2
 	default 0xffc02000 if DEBUG_SOCFPGA_UART0
-	default 0xffc02100 if DEBUG_SOCFPGA_UART1
+	default 0xffc02100 if DEBUG_SOCFPGA_ARRIA10_UART1
+	default 0xffc03000 if DEBUG_SOCFPGA_CYCLONE5_UART1
 	default 0xffd82340 if ARCH_IOP13XX
 	default 0xffe40000 if DEBUG_RCAR_GEN1_SCIF0
 	default 0xffe42000 if DEBUG_RCAR_GEN1_SCIF2
@ -1624,7 +1632,8 @@ config DEBUG_UART_VIRT
 	default 0xfeb30c00 if DEBUG_KEYSTONE_UART0
 	default 0xfeb31000 if DEBUG_KEYSTONE_UART1
 	default 0xfec02000 if DEBUG_SOCFPGA_UART0
-	default 0xfec02100 if DEBUG_SOCFPGA_UART1
+	default 0xfec02100 if DEBUG_SOCFPGA_ARRIA10_UART1
+	default 0xfec03000 if DEBUG_SOCFPGA_CYCLONE5_UART1
 	default 0xfec12000 if (DEBUG_MVEBU_UART0 || DEBUG_MVEBU_UART0_ALTERNATE) && ARCH_MVEBU
 	default 0xfec12100 if DEBUG_MVEBU_UART1_ALTERNATE
 	default 0xfec10000 if DEBUG_SIRFATLAS7_UART0
@ -1672,9 +1681,9 @@ config DEBUG_UART_8250_WORD
 	depends on DEBUG_LL_UART_8250 || DEBUG_UART_8250
 	depends on DEBUG_UART_8250_SHIFT >= 2
 	default y if DEBUG_PICOXCELL_UART || \
-		DEBUG_SOCFPGA_UART0 || DEBUG_SOCFPGA_UART1 || \
-		DEBUG_KEYSTONE_UART0 || DEBUG_KEYSTONE_UART1 || \
-		DEBUG_ALPINE_UART0 || \
+		DEBUG_SOCFPGA_UART0 || DEBUG_SOCFPGA_ARRIA10_UART1 || \
+		DEBUG_SOCFPGA_CYCLONE5_UART1 || DEBUG_KEYSTONE_UART0 || \
+		DEBUG_KEYSTONE_UART1 || DEBUG_ALPINE_UART0 || \
 		DEBUG_DAVINCI_DMx_UART0 || DEBUG_DAVINCI_DA8XX_UART1 || \
 		DEBUG_DAVINCI_DA8XX_UART2 || \
 		DEBUG_BCM_KONA_UART || DEBUG_RK32_UART2
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@ -104,7 +104,7 @@ tune-$(CONFIG_CPU_V6K)		=$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
 tune-y := $(tune-y)

 ifeq ($(CONFIG_AEABI),y)
-CFLAGS_ABI	:=-mabi=aapcs-linux -mno-thumb-interwork -mfpu=vfp
+CFLAGS_ABI	:=-mabi=aapcs-linux -mfpu=vfp
 else
 CFLAGS_ABI	:=$(call cc-option,-mapcs-32,-mabi=apcs-gnu) $(call cc-option,-mno-thumb-interwork,)
 endif
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@ -112,7 +112,7 @@ CFLAGS_fdt_ro.o := $(nossp_flags)
 CFLAGS_fdt_rw.o := $(nossp_flags)
 CFLAGS_fdt_wip.o := $(nossp_flags)

-ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj)
+ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin -I$(obj)
 asflags-y := -DZIMAGE

 # Supply kernel BSS size to the decompressor via a linker symbol.
--- a/arch/arm/boot/compressed/efi-header.S
+++ b/arch/arm/boot/compressed/efi-header.S
@ -17,14 +17,13 @@
 		@ there.
 		.inst	'M' | ('Z' << 8) | (0x1310 << 16)   @ tstne r0, #0x4d000
 #else
-		mov	r0, r0
+ AR_CLASS(	mov	r0, r0		)
+  M_CLASS(	nop.w			)
 #endif
 		.endm

 		.macro	__EFI_HEADER
 #ifdef CONFIG_EFI_STUB
-		b	__efi_start
-
 		.set	start_offset, __efi_start - start
 		.org	start + 0x3c
 		@
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@ -130,19 +130,22 @@ start:
 		.rept	7
 		__nop
 		.endr
-   ARM(		mov	r0, r0		)
-   ARM(		b	1f		)
- THUMB(		badr	r12, 1f		)
- THUMB(		bx	r12		)
+#ifndef CONFIG_THUMB2_KERNEL
+		mov	r0, r0
+#else
+ AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
+  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
+		.thumb
+#endif
+		W(b)	1f

 		.word	_magic_sig	@ Magic numbers to help the loader
 		.word	_magic_start	@ absolute load/run zImage address
 		.word	_magic_end	@ zImage end address
 		.word	0x04030201	@ endianness flag

- THUMB(		.thumb			)
-1:		__EFI_HEADER
-
+		__EFI_HEADER
+1:
 ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
 AR_CLASS(	mrs	r9, cpsr	)
 #ifdef CONFIG_ARM_VIRT_EXT
@ -1382,7 +1385,21 @@ ENTRY(efi_stub_entry)

 		@ Preserve return value of efi_entry() in r4
 		mov	r4, r0
-		bl	cache_clean_flush
+
+		@ our cache maintenance code relies on CP15 barrier instructions
+		@ but since we arrived here with the MMU and caches configured
+		@ by UEFI, we must check that the CP15BEN bit is set in SCTLR.
+		@ Note that this bit is RAO/WI on v6 and earlier, so the ISB in
+		@ the enable path will be executed on v7+ only.
+		mrc	p15, 0, r1, c1, c0, 0	@ read SCTLR
+		tst	r1, #(1 << 5)		@ CP15BEN bit set?
+		bne	0f
+		orr	r1, r1, #(1 << 5)	@ CP15 barrier instructions
+		mcr	p15, 0, r1, c1, c0, 0	@ write SCTLR
+ ARM(		.inst	0xf57ff06f		@ v7+ isb	)
+ THUMB(		isb						)
+
+0:		bl	cache_clean_flush
 		bl	cache_off

 		@ Set parameters for booting zImage according to boot protocol
--- a/arch/arm/boot/compressed/libfdt_env.h
+++ b/arch/arm/boot/compressed/libfdt_env.h
@ -1,10 +1,14 @@
 #ifndef _ARM_LIBFDT_ENV_H
 #define _ARM_LIBFDT_ENV_H

+#include <linux/limits.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <asm/byteorder.h>

+#define INT32_MAX	S32_MAX
+#define UINT32_MAX	U32_MAX
+
 typedef __be16 fdt16_t;
 typedef __be32 fdt32_t;
 typedef __be64 fdt64_t;
--- a/arch/arm/boot/dts/am335x-evm.dts
+++ b/arch/arm/boot/dts/am335x-evm.dts
@ -701,6 +701,7 @@
 	pinctrl-0 = <&cpsw_default>;
 	pinctrl-1 = <&cpsw_sleep>;
 	status = "okay";
+	slaves = <1>;
 };

 &davinci_mdio {
@ -708,15 +709,14 @@
 	pinctrl-0 = <&davinci_mdio_default>;
 	pinctrl-1 = <&davinci_mdio_sleep>;
 	status = "okay";
+
+	ethphy0: ethernet-phy@0 {
+		reg = <0>;
+	};
 };

 &cpsw_emac0 {
-	phy_id = <&davinci_mdio>, <0>;
-	phy-mode = "rgmii-txid";
-};
-
-&cpsw_emac1 {
-	phy_id = <&davinci_mdio>, <1>;
+	phy-handle = <&ethphy0>;
 	phy-mode = "rgmii-txid";
 };

--- a/arch/arm/boot/dts/am3517.dtsi
+++ b/arch/arm/boot/dts/am3517.dtsi
@ -74,6 +74,11 @@
 	};
 };

+/* Table Table 5-79 of the TRM shows 480ab000 is reserved */
+&usb_otg_hs {
+	status = "disabled";
+};
+
 &iva {
 	status = "disabled";
 };
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@ -1117,6 +1117,8 @@
 				ti,hwmods = "dss_dispc";
 				clocks = <&disp_clk>;
 				clock-names = "fck";
+
+				max-memory-bandwidth = <230000000>;
 			};

 			rfbi: rfbi@4832a800 {
--- a/arch/arm/boot/dts/am437x-gp-evm.dts
+++ b/arch/arm/boot/dts/am437x-gp-evm.dts
@ -79,7 +79,7 @@
 		};

 	lcd0: display {
-		compatible = "osddisplays,osd057T0559-34ts", "panel-dpi";
+		compatible = "osddisplays,osd070t1718-19ts", "panel-dpi";
 		label = "lcd";

 		panel-timing {
--- a/arch/arm/boot/dts/am437x-sk-evm.dts
+++ b/arch/arm/boot/dts/am437x-sk-evm.dts
@ -533,6 +533,8 @@

 		touchscreen-size-x = <480>;
 		touchscreen-size-y = <272>;
+
+		wakeup-source;
 	};

 	tlv320aic3106: tlv320aic3106@1b {
--- a/arch/arm/boot/dts/am43x-epos-evm.dts
+++ b/arch/arm/boot/dts/am43x-epos-evm.dts
@ -41,7 +41,7 @@
 	};

 	lcd0: display {
-		compatible = "osddisplays,osd057T0559-34ts", "panel-dpi";
+		compatible = "osddisplays,osd070t1718-19ts", "panel-dpi";
 		label = "lcd";

 		panel-timing {
--- a/arch/arm/boot/dts/arm-realview-eb.dtsi
+++ b/arch/arm/boot/dts/arm-realview-eb.dtsi
@ -334,7 +334,7 @@
 			clock-names = "uartclk", "apb_pclk";
 		};

-		ssp: ssp@1000d000 {
+		ssp: spi@1000d000 {
 			compatible = "arm,pl022", "arm,primecell";
 			reg = <0x1000d000 0x1000>;
 			clocks = <&sspclk>, <&pclk>;
--- a/Show more
+++ b/Show more