From f9555119f2b17e63e7d0a2c208c7a6c3eb1a85fc Mon Sep 17 00:00:00 2001 From: =?utf8?q?Fabian=20Gr=C3=BCnbichler?= Date: Tue, 21 Feb 2017 09:28:57 +0100 Subject: [PATCH] drop patches applied upstream --- ...le-on-IO-only-when-there-are-too-man.patch | 118 -------- 0002-Revert-mm-oom-rework-oom-detection.patch | 255 ------------------ ...ix-emulation-of-MOV-SS-null-selector.patch | 107 -------- Makefile | 3 - 4 files changed, 483 deletions(-) delete mode 100644 0001-Revert-mm-throttle-on-IO-only-when-there-are-too-man.patch delete mode 100644 0002-Revert-mm-oom-rework-oom-detection.patch delete mode 100644 CVE-2017-2583-KVM-x86-fix-emulation-of-MOV-SS-null-selector.patch diff --git a/0001-Revert-mm-throttle-on-IO-only-when-there-are-too-man.patch b/0001-Revert-mm-throttle-on-IO-only-when-there-are-too-man.patch deleted file mode 100644 index b4ff5a8..0000000 --- a/0001-Revert-mm-throttle-on-IO-only-when-there-are-too-man.patch +++ /dev/null @@ -1,118 +0,0 @@ -From 3168fc7faf603da9d523c9dffbec6fee5b1a8a04 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= -Date: Wed, 4 Jan 2017 11:29:00 +0100 -Subject: [PATCH 1/2] Revert "mm: throttle on IO only when there are too many - dirty and writeback pages" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This reverts commit 57e9ef475661f46769cad6c0ed9a13f0cec1dbd8. - -Signed-off-by: Fabian Grünbichler ---- - mm/backing-dev.c | 20 +++++++++++++++++--- - mm/page_alloc.c | 41 ++++------------------------------------- - 2 files changed, 21 insertions(+), 40 deletions(-) - -diff --git a/mm/backing-dev.c b/mm/backing-dev.c -index a1aef87..9ef80bf 100644 ---- a/mm/backing-dev.c -+++ b/mm/backing-dev.c -@@ -976,8 +976,9 @@ EXPORT_SYMBOL(congestion_wait); - * jiffies for either a BDI to exit congestion of the given @sync queue - * or a write to complete. - * -- * In the absence of zone congestion, cond_resched() is called to yield -- * the processor if necessary but otherwise does not sleep. -+ * In the absence of zone congestion, a short sleep or a cond_resched is -+ * performed to yield the processor and to allow other subsystems to make -+ * a forward progress. - * - * The return value is 0 if the sleep is for the full timeout. Otherwise, - * it is the number of jiffies that were still remaining when the function -@@ -997,7 +998,20 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout) - */ - if (atomic_read(&nr_wb_congested[sync]) == 0 || - !test_bit(ZONE_CONGESTED, &zone->flags)) { -- cond_resched(); -+ -+ /* -+ * Memory allocation/reclaim might be called from a WQ -+ * context and the current implementation of the WQ -+ * concurrency control doesn't recognize that a particular -+ * WQ is congested if the worker thread is looping without -+ * ever sleeping. Therefore we have to do a short sleep -+ * here rather than calling cond_resched(). -+ */ -+ if (current->flags & PF_WQ_WORKER) -+ schedule_timeout_uninterruptible(1); -+ else -+ cond_resched(); -+ - /* In case we scheduled, work out time remaining */ - ret = timeout - (jiffies - start); - if (ret < 0) -diff --git a/mm/page_alloc.c b/mm/page_alloc.c -index aadbd7e..f13b503 100644 ---- a/mm/page_alloc.c -+++ b/mm/page_alloc.c -@@ -3038,9 +3038,8 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, - for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, - ac->nodemask) { - unsigned long available; -- unsigned long reclaimable; - -- available = reclaimable = zone_reclaimable_pages(zone); -+ available = zone_reclaimable_pages(zone); - available -= DIV_ROUND_UP(no_progress_loops * available, - MAX_RECLAIM_RETRIES); - available += zone_page_state_snapshot(zone, NR_FREE_PAGES); -@@ -3050,41 +3049,9 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, - * available? - */ - if (__zone_watermark_ok(zone, order, min_wmark_pages(zone), -- ac->classzone_idx, alloc_flags, available)) { -- /* -- * If we didn't make any progress and have a lot of -- * dirty + writeback pages then we should wait for -- * an IO to complete to slow down the reclaim and -- * prevent from pre mature OOM -- */ -- if (!did_some_progress) { -- unsigned long writeback; -- unsigned long dirty; -- -- writeback = zone_page_state_snapshot(zone, -- NR_WRITEBACK); -- dirty = zone_page_state_snapshot(zone, NR_FILE_DIRTY); -- -- if (2*(writeback + dirty) > reclaimable) { -- congestion_wait(BLK_RW_ASYNC, HZ/10); -- return true; -- } -- } -- -- /* -- * Memory allocation/reclaim might be called from a WQ -- * context and the current implementation of the WQ -- * concurrency control doesn't recognize that -- * a particular WQ is congested if the worker thread is -- * looping without ever sleeping. Therefore we have to -- * do a short sleep here rather than calling -- * cond_resched(). -- */ -- if (current->flags & PF_WQ_WORKER) -- schedule_timeout_uninterruptible(1); -- else -- cond_resched(); -- -+ ac->high_zoneidx, alloc_flags, available)) { -+ /* Wait for some write requests to complete then retry */ -+ wait_iff_congested(zone, BLK_RW_ASYNC, HZ/50); - return true; - } - } --- -2.1.4 - diff --git a/0002-Revert-mm-oom-rework-oom-detection.patch b/0002-Revert-mm-oom-rework-oom-detection.patch deleted file mode 100644 index 5a1ec76..0000000 --- a/0002-Revert-mm-oom-rework-oom-detection.patch +++ /dev/null @@ -1,255 +0,0 @@ -From 6e2588df3dc3d1704eae939ed9c9425000f48069 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= -Date: Wed, 4 Jan 2017 11:29:26 +0100 -Subject: [PATCH 2/2] Revert "mm, oom: rework oom detection" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This reverts commit c630ec12d831521b0566481eb56d7257b051911e. - -Signed-off-by: Fabian Grünbichler ---- - include/linux/swap.h | 1 - - mm/page_alloc.c | 100 +++++---------------------------------------------- - mm/vmscan.c | 25 ++++++++++--- - 3 files changed, 29 insertions(+), 97 deletions(-) - -diff --git a/include/linux/swap.h b/include/linux/swap.h -index 1498c5a..d8ca2ea 100644 ---- a/include/linux/swap.h -+++ b/include/linux/swap.h -@@ -318,7 +318,6 @@ extern void lru_cache_add_active_or_unevictable(struct page *page, - struct vm_area_struct *vma); - - /* linux/mm/vmscan.c */ --extern unsigned long zone_reclaimable_pages(struct zone *zone); - extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, - gfp_t gfp_mask, nodemask_t *mask); - extern int __isolate_lru_page(struct page *page, isolate_mode_t mode); -diff --git a/mm/page_alloc.c b/mm/page_alloc.c -index f13b503..56319cf 100644 ---- a/mm/page_alloc.c -+++ b/mm/page_alloc.c -@@ -2988,77 +2988,6 @@ static inline bool is_thp_gfp_mask(gfp_t gfp_mask) - return (gfp_mask & (GFP_TRANSHUGE | __GFP_KSWAPD_RECLAIM)) == GFP_TRANSHUGE; - } - --/* -- * Maximum number of reclaim retries without any progress before OOM killer -- * is consider as the only way to move forward. -- */ --#define MAX_RECLAIM_RETRIES 16 -- --/* -- * Checks whether it makes sense to retry the reclaim to make a forward progress -- * for the given allocation request. -- * The reclaim feedback represented by did_some_progress (any progress during -- * the last reclaim round), pages_reclaimed (cumulative number of reclaimed -- * pages) and no_progress_loops (number of reclaim rounds without any progress -- * in a row) is considered as well as the reclaimable pages on the applicable -- * zone list (with a backoff mechanism which is a function of no_progress_loops). -- * -- * Returns true if a retry is viable or false to enter the oom path. -- */ --static inline bool --should_reclaim_retry(gfp_t gfp_mask, unsigned order, -- struct alloc_context *ac, int alloc_flags, -- bool did_some_progress, unsigned long pages_reclaimed, -- int no_progress_loops) --{ -- struct zone *zone; -- struct zoneref *z; -- -- /* -- * Make sure we converge to OOM if we cannot make any progress -- * several times in the row. -- */ -- if (no_progress_loops > MAX_RECLAIM_RETRIES) -- return false; -- -- if (order > PAGE_ALLOC_COSTLY_ORDER) { -- if (pages_reclaimed >= (1<zonelist, ac->high_zoneidx, -- ac->nodemask) { -- unsigned long available; -- -- available = zone_reclaimable_pages(zone); -- available -= DIV_ROUND_UP(no_progress_loops * available, -- MAX_RECLAIM_RETRIES); -- available += zone_page_state_snapshot(zone, NR_FREE_PAGES); -- -- /* -- * Would the allocation succeed if we reclaimed the whole -- * available? -- */ -- if (__zone_watermark_ok(zone, order, min_wmark_pages(zone), -- ac->high_zoneidx, alloc_flags, available)) { -- /* Wait for some write requests to complete then retry */ -- wait_iff_congested(zone, BLK_RW_ASYNC, HZ/50); -- return true; -- } -- } -- -- return false; --} -- - static inline struct page * - __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, - struct alloc_context *ac) -@@ -3071,7 +3000,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, - enum migrate_mode migration_mode = MIGRATE_ASYNC; - bool deferred_compaction = false; - int contended_compaction = COMPACT_CONTENDED_NONE; -- int no_progress_loops = 0; - - /* - * In the slowpath, we sanity check order to avoid ever trying to -@@ -3223,24 +3151,14 @@ retry: - if (gfp_mask & __GFP_NORETRY) - goto noretry; - -- /* -- * Do not retry costly high order allocations unless they are -- * __GFP_REPEAT -- */ -- if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT)) -- goto noretry; -- -- if (did_some_progress) { -- no_progress_loops = 0; -- pages_reclaimed += did_some_progress; -- } else { -- no_progress_loops++; -- } -- -- if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, -- did_some_progress > 0, pages_reclaimed, -- no_progress_loops)) -+ /* Keep reclaiming pages as long as there is reasonable progress */ -+ pages_reclaimed += did_some_progress; -+ if ((did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER) || -+ ((gfp_mask & __GFP_REPEAT) && pages_reclaimed < (1 << order))) { -+ /* Wait for some write requests to complete then retry */ -+ wait_iff_congested(ac->preferred_zone, BLK_RW_ASYNC, HZ/50); - goto retry; -+ } - - /* Reclaim has failed us, start killing things */ - page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress); -@@ -3248,10 +3166,8 @@ retry: - goto got_pg; - - /* Retry as long as the OOM killer is making progress */ -- if (did_some_progress) { -- no_progress_loops = 0; -+ if (did_some_progress) - goto retry; -- } - - noretry: - /* -diff --git a/mm/vmscan.c b/mm/vmscan.c -index 56f902d..3597160 100644 ---- a/mm/vmscan.c -+++ b/mm/vmscan.c -@@ -192,7 +192,7 @@ static bool sane_reclaim(struct scan_control *sc) - } - #endif - --unsigned long zone_reclaimable_pages(struct zone *zone) -+static unsigned long zone_reclaimable_pages(struct zone *zone) - { - unsigned long nr; - -@@ -2492,8 +2492,10 @@ static inline bool compaction_ready(struct zone *zone, int order) - * - * If a zone is deemed to be full of pinned pages then just give it a light - * scan then give up on it. -+ * -+ * Returns true if a zone was reclaimable. - */ --static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) -+static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) - { - struct zoneref *z; - struct zone *zone; -@@ -2501,6 +2503,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) - unsigned long nr_soft_scanned; - gfp_t orig_mask; - enum zone_type requested_highidx = gfp_zone(sc->gfp_mask); -+ bool reclaimable = false; - - /* - * If the number of buffer_heads in the machine exceeds the maximum -@@ -2565,10 +2568,17 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) - &nr_soft_scanned); - sc->nr_reclaimed += nr_soft_reclaimed; - sc->nr_scanned += nr_soft_scanned; -+ if (nr_soft_reclaimed) -+ reclaimable = true; - /* need some check for avoid more shrink_zone() */ - } - -- shrink_zone(zone, sc, zone_idx(zone) == classzone_idx); -+ if (shrink_zone(zone, sc, zone_idx(zone) == classzone_idx)) -+ reclaimable = true; -+ -+ if (global_reclaim(sc) && -+ !reclaimable && zone_reclaimable(zone)) -+ reclaimable = true; - } - - /* -@@ -2576,6 +2586,8 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) - * promoted it to __GFP_HIGHMEM. - */ - sc->gfp_mask = orig_mask; -+ -+ return reclaimable; - } - - /* -@@ -2600,6 +2612,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, - int initial_priority = sc->priority; - unsigned long total_scanned = 0; - unsigned long writeback_threshold; -+ bool zones_reclaimable; - retry: - delayacct_freepages_start(); - -@@ -2610,7 +2623,7 @@ retry: - vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup, - sc->priority); - sc->nr_scanned = 0; -- shrink_zones(zonelist, sc); -+ zones_reclaimable = shrink_zones(zonelist, sc); - - total_scanned += sc->nr_scanned; - if (sc->nr_reclaimed >= sc->nr_to_reclaim) -@@ -2657,6 +2670,10 @@ retry: - goto retry; - } - -+ /* Any of the zones still reclaimable? Don't OOM. */ -+ if (zones_reclaimable) -+ return 1; -+ - return 0; - } - --- -2.1.4 - diff --git a/CVE-2017-2583-KVM-x86-fix-emulation-of-MOV-SS-null-selector.patch b/CVE-2017-2583-KVM-x86-fix-emulation-of-MOV-SS-null-selector.patch deleted file mode 100644 index 3a984ed..0000000 --- a/CVE-2017-2583-KVM-x86-fix-emulation-of-MOV-SS-null-selector.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 33ab91103b3415e12457e3104f0e4517ce12d0f3 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Thu, 12 Jan 2017 15:02:32 +0100 -Subject: KVM: x86: fix emulation of "MOV SS, null selector" - -This is CVE-2017-2583. On Intel this causes a failed vmentry because -SS's type is neither 3 nor 7 (even though the manual says this check is -only done for usable SS, and the dmesg splat says that SS is unusable!). -On AMD it's worse: svm.c is confused and sets CPL to 0 in the vmcb. - -The fix fabricates a data segment descriptor when SS is set to a null -selector, so that CPL and SS.DPL are set correctly in the VMCS/vmcb. -Furthermore, only allow setting SS to a NULL selector if SS.RPL < 3; -this in turn ensures CPL < 3 because RPL must be equal to CPL. - -Thanks to Andy Lutomirski and Willy Tarreau for help in analyzing -the bug and deciphering the manuals. - -Reported-by: Xiaohan Zhang -Fixes: 79d5b4c3cd809c770d4bf9812635647016c56011 -Cc: stable@nongnu.org -Signed-off-by: Paolo Bonzini ---- - arch/x86/kvm/emulate.c | 48 ++++++++++++++++++++++++++++++++++++++---------- - 1 file changed, 38 insertions(+), 10 deletions(-) - -diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c -index f36d0fa..cedbba0 100644 ---- a/arch/x86/kvm/emulate.c -+++ b/arch/x86/kvm/emulate.c -@@ -1585,7 +1585,6 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, - &ctxt->exception); - } - --/* Does not support long mode */ - static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, - u16 selector, int seg, u8 cpl, - enum x86_transfer_type transfer, -@@ -1622,20 +1621,34 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, - - rpl = selector & 3; - -- /* NULL selector is not valid for TR, CS and SS (except for long mode) */ -- if ((seg == VCPU_SREG_CS -- || (seg == VCPU_SREG_SS -- && (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)) -- || seg == VCPU_SREG_TR) -- && null_selector) -- goto exception; -- - /* TR should be in GDT only */ - if (seg == VCPU_SREG_TR && (selector & (1 << 2))) - goto exception; - -- if (null_selector) /* for NULL selector skip all following checks */ -+ /* NULL selector is not valid for TR, CS and (except for long mode) SS */ -+ if (null_selector) { -+ if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR) -+ goto exception; -+ -+ if (seg == VCPU_SREG_SS) { -+ if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl) -+ goto exception; -+ -+ /* -+ * ctxt->ops->set_segment expects the CPL to be in -+ * SS.DPL, so fake an expand-up 32-bit data segment. -+ */ -+ seg_desc.type = 3; -+ seg_desc.p = 1; -+ seg_desc.s = 1; -+ seg_desc.dpl = cpl; -+ seg_desc.d = 1; -+ seg_desc.g = 1; -+ } -+ -+ /* Skip all following checks */ - goto load; -+ } - - ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr); - if (ret != X86EMUL_CONTINUE) -@@ -1751,6 +1764,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, - u16 selector, int seg) - { - u8 cpl = ctxt->ops->cpl(ctxt); -+ -+ /* -+ * None of MOV, POP and LSS can load a NULL selector in CPL=3, but -+ * they can load it at CPL<3 (Intel's manual says only LSS can, -+ * but it's wrong). -+ * -+ * However, the Intel manual says that putting IST=1/DPL=3 in -+ * an interrupt gate will result in SS=3 (the AMD manual instead -+ * says it doesn't), so allow SS=3 in __load_segment_descriptor -+ * and only forbid it here. -+ */ -+ if (seg == VCPU_SREG_SS && selector == 3 && -+ ctxt->mode == X86EMUL_MODE_PROT64) -+ return emulate_exception(ctxt, GP_VECTOR, 0, true); -+ - return __load_segment_descriptor(ctxt, selector, seg, cpl, - X86_TRANSFER_NONE, NULL); - } --- -cgit v0.12 - diff --git a/Makefile b/Makefile index 12e43d8..7fa9bc2 100644 --- a/Makefile +++ b/Makefile @@ -265,9 +265,6 @@ ${KERNEL_SRC}/README ${KERNEL_CFG_ORG}: ${KERNELSRCTAR} # IPoIB performance regression fix cd ${KERNEL_SRC}; patch -p1 < ../IB-ipoib-move-back-the-IB-LL-address-into-the-hard-header.patch cd ${KERNEL_SRC}; patch -p1 < ../cgroup-cpuset-add-cpuset.remap_cpus.patch - cd ${KERNEL_SRC}; patch -p1 < ../0001-Revert-mm-throttle-on-IO-only-when-there-are-too-man.patch - cd ${KERNEL_SRC}; patch -p1 < ../0002-Revert-mm-oom-rework-oom-detection.patch - cd ${KERNEL_SRC}; patch -p1 < ../CVE-2017-2583-KVM-x86-fix-emulation-of-MOV-SS-null-selector.patch cd ${KERNEL_SRC}; patch -p1 < ../CVE-2017-2596-kvm-page-reference-leakage-in-handle_vmon.patch cd ${KERNEL_SRC}; patch -p1 < ../CVE-2017-6074-dccp-fix-freeing-skb-too-early-for-IPV6_RECVPKTINFO.patch sed -i ${KERNEL_SRC}/Makefile -e 's/^EXTRAVERSION.*$$/EXTRAVERSION=${EXTRAVERSION}/' -- 2.39.2