From: Fabian Grünbichler Date: Wed, 21 Mar 2018 10:27:28 +0000 (+0100) Subject: rebase patches X-Git-Url: https://git.proxmox.com/?p=pve-kernel.git;a=commitdiff_plain;h=ecef40a21813ee7aee692e53b38e08fc975fbeb2 rebase patches Signed-off-by: Fabian Grünbichler --- diff --git a/patches/kernel/0003-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch b/patches/kernel/0003-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch index 969d5cb..c7fcb74 100644 --- a/patches/kernel/0003-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch +++ b/patches/kernel/0003-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch @@ -74,10 +74,10 @@ index 27ca3fbc47aa..5e3caff3fb49 100644 Safety option to keep boot IRQs enabled. This should never be necessary. diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c -index db82bef43b99..ed94ba0d0922 100644 +index 242040c87ce2..3926d5bf4d06 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c -@@ -3695,6 +3695,106 @@ static int __init pci_apply_final_quirks(void) +@@ -3702,6 +3702,106 @@ static int __init pci_apply_final_quirks(void) fs_initcall_sync(pci_apply_final_quirks); @@ -184,7 +184,7 @@ index db82bef43b99..ed94ba0d0922 100644 /* * Following are device-specific reset methods which can be used to * reset a single function if other methods (e.g. FLR, PM D0->D3) are -@@ -4527,6 +4627,7 @@ static const struct pci_dev_acs_enabled { +@@ -4534,6 +4634,7 @@ static const struct pci_dev_acs_enabled { { PCI_VENDOR_ID_CAVIUM, PCI_ANY_ID, pci_quirk_cavium_acs }, /* APM X-Gene */ { PCI_VENDOR_ID_AMCC, 0xE004, pci_quirk_xgene_acs }, diff --git a/patches/kernel/0004-kvm-disable-default-dynamic-halt-polling-growth.patch b/patches/kernel/0004-kvm-disable-default-dynamic-halt-polling-growth.patch index aa5f518..adf9ef7 100644 --- a/patches/kernel/0004-kvm-disable-default-dynamic-halt-polling-growth.patch +++ b/patches/kernel/0004-kvm-disable-default-dynamic-halt-polling-growth.patch @@ -12,7 +12,7 @@ Signed-off-by: Fabian Grünbichler 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c -index 210bf820385a..5b7e582f3742 100644 +index e536977e7b6d..4c63296eb5a8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -77,7 +77,7 @@ module_param(halt_poll_ns, uint, 0644); diff --git a/patches/kernel/0005-ocfs2-make-metadata-estimation-accurate-and-clear.patch b/patches/kernel/0005-ocfs2-make-metadata-estimation-accurate-and-clear.patch new file mode 100644 index 0000000..60e1f42 --- /dev/null +++ b/patches/kernel/0005-ocfs2-make-metadata-estimation-accurate-and-clear.patch @@ -0,0 +1,61 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Changwei Ge +Date: Wed, 31 Jan 2018 16:15:02 -0800 +Subject: [PATCH] ocfs2: make metadata estimation accurate and clear +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Current code assume that ::w_unwritten_list always has only one item on. +This is not right and hard to get understood. So improve how to count +unwritten item. + +Link: http://lkml.kernel.org/r/1515479070-32653-1-git-send-email-ge.changwei@h3c.com +Signed-off-by: Changwei Ge +Reported-by: John Lightsey +Tested-by: John Lightsey +Cc: Mark Fasheh +Cc: Joseph Qi +Cc: Junxiao Bi +Cc: Joel Becker +Cc: Changwei Ge +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +(cherry picked from commit 63de8bd9328bf2a778fc277503da163ae3defa3c) +Signed-off-by: Fabian Grünbichler +--- + fs/ocfs2/aops.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c +index d1516327b787..256986aca8df 100644 +--- a/fs/ocfs2/aops.c ++++ b/fs/ocfs2/aops.c +@@ -797,6 +797,7 @@ struct ocfs2_write_ctxt { + struct ocfs2_cached_dealloc_ctxt w_dealloc; + + struct list_head w_unwritten_list; ++ unsigned int w_unwritten_count; + }; + + void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) +@@ -1386,6 +1387,7 @@ static int ocfs2_unwritten_check(struct inode *inode, + desc->c_clear_unwritten = 0; + list_add_tail(&new->ue_ip_node, &oi->ip_unwritten_list); + list_add_tail(&new->ue_node, &wc->w_unwritten_list); ++ wc->w_unwritten_count++; + new = NULL; + unlock: + spin_unlock(&oi->ip_lock); +@@ -2256,7 +2258,7 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, + ue->ue_phys = desc->c_phys; + + list_splice_tail_init(&wc->w_unwritten_list, &dwc->dw_zero_list); +- dwc->dw_zero_count++; ++ dwc->dw_zero_count += wc->w_unwritten_count; + } + + ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, wc); +-- +2.14.2 + diff --git a/patches/kernel/0005-sctp-fix-dst-refcnt-leak-in-sctp_v4_get_dst.patch b/patches/kernel/0005-sctp-fix-dst-refcnt-leak-in-sctp_v4_get_dst.patch deleted file mode 100644 index d1a4cd6..0000000 --- a/patches/kernel/0005-sctp-fix-dst-refcnt-leak-in-sctp_v4_get_dst.patch +++ /dev/null @@ -1,89 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Tommi Rantala -Date: Mon, 5 Feb 2018 21:48:14 +0200 -Subject: [PATCH] sctp: fix dst refcnt leak in sctp_v4_get_dst -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Fix dst reference count leak in sctp_v4_get_dst() introduced in commit -410f03831 ("sctp: add routing output fallback"): - -When walking the address_list, successive ip_route_output_key() calls -may return the same rt->dst with the reference incremented on each call. - -The code would not decrement the dst refcount when the dst pointer was -identical from the previous iteration, causing the dst refcnt leak. - -Testcase: - ip netns add TEST - ip netns exec TEST ip link set lo up - ip link add dummy0 type dummy - ip link add dummy1 type dummy - ip link add dummy2 type dummy - ip link set dev dummy0 netns TEST - ip link set dev dummy1 netns TEST - ip link set dev dummy2 netns TEST - ip netns exec TEST ip addr add 192.168.1.1/24 dev dummy0 - ip netns exec TEST ip link set dummy0 up - ip netns exec TEST ip addr add 192.168.1.2/24 dev dummy1 - ip netns exec TEST ip link set dummy1 up - ip netns exec TEST ip addr add 192.168.1.3/24 dev dummy2 - ip netns exec TEST ip link set dummy2 up - ip netns exec TEST sctp_test -H 192.168.1.2 -P 20002 -h 192.168.1.1 -p 20000 -s -B 192.168.1.3 - ip netns del TEST - -In 4.4 and 4.9 kernels this results to: - [ 354.179591] unregister_netdevice: waiting for lo to become free. Usage count = 1 - [ 364.419674] unregister_netdevice: waiting for lo to become free. Usage count = 1 - [ 374.663664] unregister_netdevice: waiting for lo to become free. Usage count = 1 - [ 384.903717] unregister_netdevice: waiting for lo to become free. Usage count = 1 - [ 395.143724] unregister_netdevice: waiting for lo to become free. Usage count = 1 - [ 405.383645] unregister_netdevice: waiting for lo to become free. Usage count = 1 - ... - -Fixes: 410f03831 ("sctp: add routing output fallback") -Fixes: 0ca50d12f ("sctp: fix src address selection if using secondary addresses") -Signed-off-by: Tommi Rantala -Acked-by: Marcelo Ricardo Leitner -Acked-by: Neil Horman -Signed-off-by: David S. Miller -Signed-off-by: Fabian Grünbichler ---- - net/sctp/protocol.c | 10 ++++------ - 1 file changed, 4 insertions(+), 6 deletions(-) - -diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c -index 6a38c2503649..91813e686c67 100644 ---- a/net/sctp/protocol.c -+++ b/net/sctp/protocol.c -@@ -514,22 +514,20 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, - if (IS_ERR(rt)) - continue; - -- if (!dst) -- dst = &rt->dst; -- - /* Ensure the src address belongs to the output - * interface. - */ - odev = __ip_dev_find(sock_net(sk), laddr->a.v4.sin_addr.s_addr, - false); - if (!odev || odev->ifindex != fl4->flowi4_oif) { -- if (&rt->dst != dst) -+ if (!dst) -+ dst = &rt->dst; -+ else - dst_release(&rt->dst); - continue; - } - -- if (dst != &rt->dst) -- dst_release(dst); -+ dst_release(dst); - dst = &rt->dst; - break; - } --- -2.14.2 - diff --git a/patches/kernel/0006-ocfs2-try-to-reuse-extent-block-in-dealloc-without-m.patch b/patches/kernel/0006-ocfs2-try-to-reuse-extent-block-in-dealloc-without-m.patch new file mode 100644 index 0000000..b2bba0b --- /dev/null +++ b/patches/kernel/0006-ocfs2-try-to-reuse-extent-block-in-dealloc-without-m.patch @@ -0,0 +1,370 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Changwei Ge +Date: Wed, 31 Jan 2018 16:15:06 -0800 +Subject: [PATCH] ocfs2: try to reuse extent block in dealloc without + meta_alloc +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +A crash issue was reported by John Lightsey with a call trace as follows: + + ocfs2_split_extent+0x1ad3/0x1b40 [ocfs2] + ocfs2_change_extent_flag+0x33a/0x470 [ocfs2] + ocfs2_mark_extent_written+0x172/0x220 [ocfs2] + ocfs2_dio_end_io+0x62d/0x910 [ocfs2] + dio_complete+0x19a/0x1a0 + do_blockdev_direct_IO+0x19dd/0x1eb0 + __blockdev_direct_IO+0x43/0x50 + ocfs2_direct_IO+0x8f/0xa0 [ocfs2] + generic_file_direct_write+0xb2/0x170 + __generic_file_write_iter+0xc3/0x1b0 + ocfs2_file_write_iter+0x4bb/0xca0 [ocfs2] + __vfs_write+0xae/0xf0 + vfs_write+0xb8/0x1b0 + SyS_write+0x4f/0xb0 + system_call_fastpath+0x16/0x75 + +The BUG code told that extent tree wants to grow but no metadata was +reserved ahead of time. From my investigation into this issue, the root +cause it that although enough metadata is not reserved, there should be +enough for following use. Rightmost extent is merged into its left one +due to a certain times of marking extent written. Because during +marking extent written, we got many physically continuous extents. At +last, an empty extent showed up and the rightmost path is removed from +extent tree. + +Add a new mechanism to reuse extent block cached in dealloc which were +just unlinked from extent tree to solve this crash issue. + +Criteria is that during marking extents *written*, if extent rotation +and merging results in unlinking extent with growing extent tree later +without any metadata reserved ahead of time, try to reuse those extents +in dealloc in which deleted extents are cached. + +Also, this patch addresses the issue John reported that ::dw_zero_count +is not calculated properly. + +After applying this patch, the issue John reported was gone. Thanks for +the reproducer provided by John. And this patch has passed +ocfs2-test(29 cases) suite running by New H3C Group. + +[ge.changwei@h3c.com: fix static checker warnning] + Link: http://lkml.kernel.org/r/63ADC13FD55D6546B7DECE290D39E373F29196AE@H3CMLB12-EX.srv.huawei-3com.com +[akpm@linux-foundation.org: brelse(NULL) is legal] +Link: http://lkml.kernel.org/r/1515479070-32653-2-git-send-email-ge.changwei@h3c.com +Signed-off-by: Changwei Ge +Reported-by: John Lightsey +Tested-by: John Lightsey +Cc: Joel Becker +Cc: Joseph Qi +Cc: Junxiao Bi +Cc: Dan Carpenter +Cc: Mark Fasheh +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +(cherry picked from commit 71a36944042b7d9dd71f6a5d1c5ea1c2353b5d42) +Signed-off-by: Fabian Grünbichler +--- + fs/ocfs2/alloc.h | 1 + + fs/ocfs2/alloc.c | 206 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- + fs/ocfs2/aops.c | 6 ++ + 3 files changed, 203 insertions(+), 10 deletions(-) + +diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h +index 27b75cf32cfa..250bcacdf9e9 100644 +--- a/fs/ocfs2/alloc.h ++++ b/fs/ocfs2/alloc.h +@@ -61,6 +61,7 @@ struct ocfs2_extent_tree { + ocfs2_journal_access_func et_root_journal_access; + void *et_object; + unsigned int et_max_leaf_clusters; ++ struct ocfs2_cached_dealloc_ctxt *et_dealloc; + }; + + /* +diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c +index ab5105f9767e..2f2c76193f54 100644 +--- a/fs/ocfs2/alloc.c ++++ b/fs/ocfs2/alloc.c +@@ -165,6 +165,13 @@ static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et, + struct ocfs2_extent_rec *rec); + static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et); + static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et); ++ ++static int ocfs2_reuse_blk_from_dealloc(handle_t *handle, ++ struct ocfs2_extent_tree *et, ++ struct buffer_head **new_eb_bh, ++ int blk_wanted, int *blk_given); ++static int ocfs2_is_dealloc_empty(struct ocfs2_extent_tree *et); ++ + static const struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = { + .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk, + .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk, +@@ -448,6 +455,7 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, + if (!obj) + obj = (void *)bh->b_data; + et->et_object = obj; ++ et->et_dealloc = NULL; + + et->et_ops->eo_fill_root_el(et); + if (!et->et_ops->eo_fill_max_leaf_clusters) +@@ -1158,7 +1166,7 @@ static int ocfs2_add_branch(handle_t *handle, + struct buffer_head **last_eb_bh, + struct ocfs2_alloc_context *meta_ac) + { +- int status, new_blocks, i; ++ int status, new_blocks, i, block_given = 0; + u64 next_blkno, new_last_eb_blk; + struct buffer_head *bh; + struct buffer_head **new_eb_bhs = NULL; +@@ -1213,11 +1221,31 @@ static int ocfs2_add_branch(handle_t *handle, + goto bail; + } + +- status = ocfs2_create_new_meta_bhs(handle, et, new_blocks, +- meta_ac, new_eb_bhs); +- if (status < 0) { +- mlog_errno(status); +- goto bail; ++ /* Firstyly, try to reuse dealloc since we have already estimated how ++ * many extent blocks we may use. ++ */ ++ if (!ocfs2_is_dealloc_empty(et)) { ++ status = ocfs2_reuse_blk_from_dealloc(handle, et, ++ new_eb_bhs, new_blocks, ++ &block_given); ++ if (status < 0) { ++ mlog_errno(status); ++ goto bail; ++ } ++ } ++ ++ BUG_ON(block_given > new_blocks); ++ ++ if (block_given < new_blocks) { ++ BUG_ON(!meta_ac); ++ status = ocfs2_create_new_meta_bhs(handle, et, ++ new_blocks - block_given, ++ meta_ac, ++ &new_eb_bhs[block_given]); ++ if (status < 0) { ++ mlog_errno(status); ++ goto bail; ++ } + } + + /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be +@@ -1340,15 +1368,25 @@ static int ocfs2_shift_tree_depth(handle_t *handle, + struct ocfs2_alloc_context *meta_ac, + struct buffer_head **ret_new_eb_bh) + { +- int status, i; ++ int status, i, block_given = 0; + u32 new_clusters; + struct buffer_head *new_eb_bh = NULL; + struct ocfs2_extent_block *eb; + struct ocfs2_extent_list *root_el; + struct ocfs2_extent_list *eb_el; + +- status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac, +- &new_eb_bh); ++ if (!ocfs2_is_dealloc_empty(et)) { ++ status = ocfs2_reuse_blk_from_dealloc(handle, et, ++ &new_eb_bh, 1, ++ &block_given); ++ } else if (meta_ac) { ++ status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac, ++ &new_eb_bh); ++ ++ } else { ++ BUG(); ++ } ++ + if (status < 0) { + mlog_errno(status); + goto bail; +@@ -1511,7 +1549,7 @@ static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et, + int depth = le16_to_cpu(el->l_tree_depth); + struct buffer_head *bh = NULL; + +- BUG_ON(meta_ac == NULL); ++ BUG_ON(meta_ac == NULL && ocfs2_is_dealloc_empty(et)); + + shift = ocfs2_find_branch_target(et, &bh); + if (shift < 0) { +@@ -6585,6 +6623,154 @@ ocfs2_find_per_slot_free_list(int type, + return fl; + } + ++static struct ocfs2_per_slot_free_list * ++ocfs2_find_preferred_free_list(int type, ++ int preferred_slot, ++ int *real_slot, ++ struct ocfs2_cached_dealloc_ctxt *ctxt) ++{ ++ struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator; ++ ++ while (fl) { ++ if (fl->f_inode_type == type && fl->f_slot == preferred_slot) { ++ *real_slot = fl->f_slot; ++ return fl; ++ } ++ ++ fl = fl->f_next_suballocator; ++ } ++ ++ /* If we can't find any free list matching preferred slot, just use ++ * the first one. ++ */ ++ fl = ctxt->c_first_suballocator; ++ *real_slot = fl->f_slot; ++ ++ return fl; ++} ++ ++/* Return Value 1 indicates empty */ ++static int ocfs2_is_dealloc_empty(struct ocfs2_extent_tree *et) ++{ ++ struct ocfs2_per_slot_free_list *fl = NULL; ++ ++ if (!et->et_dealloc) ++ return 1; ++ ++ fl = et->et_dealloc->c_first_suballocator; ++ if (!fl) ++ return 1; ++ ++ if (!fl->f_first) ++ return 1; ++ ++ return 0; ++} ++ ++/* If extent was deleted from tree due to extent rotation and merging, and ++ * no metadata is reserved ahead of time. Try to reuse some extents ++ * just deleted. This is only used to reuse extent blocks. ++ * It is supposed to find enough extent blocks in dealloc if our estimation ++ * on metadata is accurate. ++ */ ++static int ocfs2_reuse_blk_from_dealloc(handle_t *handle, ++ struct ocfs2_extent_tree *et, ++ struct buffer_head **new_eb_bh, ++ int blk_wanted, int *blk_given) ++{ ++ int i, status = 0, real_slot; ++ struct ocfs2_cached_dealloc_ctxt *dealloc; ++ struct ocfs2_per_slot_free_list *fl; ++ struct ocfs2_cached_block_free *bf; ++ struct ocfs2_extent_block *eb; ++ struct ocfs2_super *osb = ++ OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci)); ++ ++ *blk_given = 0; ++ ++ /* If extent tree doesn't have a dealloc, this is not faulty. Just ++ * tell upper caller dealloc can't provide any block and it should ++ * ask for alloc to claim more space. ++ */ ++ dealloc = et->et_dealloc; ++ if (!dealloc) ++ goto bail; ++ ++ for (i = 0; i < blk_wanted; i++) { ++ /* Prefer to use local slot */ ++ fl = ocfs2_find_preferred_free_list(EXTENT_ALLOC_SYSTEM_INODE, ++ osb->slot_num, &real_slot, ++ dealloc); ++ /* If no more block can be reused, we should claim more ++ * from alloc. Just return here normally. ++ */ ++ if (!fl) { ++ status = 0; ++ break; ++ } ++ ++ bf = fl->f_first; ++ fl->f_first = bf->free_next; ++ ++ new_eb_bh[i] = sb_getblk(osb->sb, bf->free_blk); ++ if (new_eb_bh[i] == NULL) { ++ status = -ENOMEM; ++ mlog_errno(status); ++ goto bail; ++ } ++ ++ mlog(0, "Reusing block(%llu) from " ++ "dealloc(local slot:%d, real slot:%d)\n", ++ bf->free_blk, osb->slot_num, real_slot); ++ ++ ocfs2_set_new_buffer_uptodate(et->et_ci, new_eb_bh[i]); ++ ++ status = ocfs2_journal_access_eb(handle, et->et_ci, ++ new_eb_bh[i], ++ OCFS2_JOURNAL_ACCESS_CREATE); ++ if (status < 0) { ++ mlog_errno(status); ++ goto bail; ++ } ++ ++ memset(new_eb_bh[i]->b_data, 0, osb->sb->s_blocksize); ++ eb = (struct ocfs2_extent_block *) new_eb_bh[i]->b_data; ++ ++ /* We can't guarantee that buffer head is still cached, so ++ * polutlate the extent block again. ++ */ ++ strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); ++ eb->h_blkno = cpu_to_le64(bf->free_blk); ++ eb->h_fs_generation = cpu_to_le32(osb->fs_generation); ++ eb->h_suballoc_slot = cpu_to_le16(real_slot); ++ eb->h_suballoc_loc = cpu_to_le64(bf->free_bg); ++ eb->h_suballoc_bit = cpu_to_le16(bf->free_bit); ++ eb->h_list.l_count = ++ cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); ++ ++ /* We'll also be dirtied by the caller, so ++ * this isn't absolutely necessary. ++ */ ++ ocfs2_journal_dirty(handle, new_eb_bh[i]); ++ ++ if (!fl->f_first) { ++ dealloc->c_first_suballocator = fl->f_next_suballocator; ++ kfree(fl); ++ } ++ kfree(bf); ++ } ++ ++ *blk_given = i; ++ ++bail: ++ if (unlikely(status < 0)) { ++ for (i = 0; i < blk_wanted; i++) ++ brelse(new_eb_bh[i]); ++ } ++ ++ return status; ++} ++ + int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, + int type, int slot, u64 suballoc, + u64 blkno, unsigned int bit) +diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c +index 256986aca8df..e8e205bf2e41 100644 +--- a/fs/ocfs2/aops.c ++++ b/fs/ocfs2/aops.c +@@ -2332,6 +2332,12 @@ static int ocfs2_dio_end_io_write(struct inode *inode, + + ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); + ++ /* Attach dealloc with extent tree in case that we may reuse extents ++ * which are already unlinked from current extent tree due to extent ++ * rotation and merging. ++ */ ++ et.et_dealloc = &dealloc; ++ + ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2, + &data_ac, &meta_ac); + if (ret) { +-- +2.14.2 + diff --git a/patches/kernel/0006-sctp-fix-dst-refcnt-leak-in-sctp_v6_get_dst.patch b/patches/kernel/0006-sctp-fix-dst-refcnt-leak-in-sctp_v6_get_dst.patch deleted file mode 100644 index ea351e7..0000000 --- a/patches/kernel/0006-sctp-fix-dst-refcnt-leak-in-sctp_v6_get_dst.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Alexey Kodanev -Date: Mon, 5 Feb 2018 15:10:35 +0300 -Subject: [PATCH] sctp: fix dst refcnt leak in sctp_v6_get_dst() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When going through the bind address list in sctp_v6_get_dst() and -the previously found address is better ('matchlen > bmatchlen'), -the code continues to the next iteration without releasing currently -held destination. - -Fix it by releasing 'bdst' before continue to the next iteration, and -instead of introducing one more '!IS_ERR(bdst)' check for dst_release(), -move the already existed one right after ip6_dst_lookup_flow(), i.e. we -shouldn't proceed further if we get an error for the route lookup. - -Fixes: dbc2b5e9a09e ("sctp: fix src address selection if using secondary addresses for ipv6") -Signed-off-by: Alexey Kodanev -Acked-by: Neil Horman -Acked-by: Marcelo Ricardo Leitner -Signed-off-by: David S. Miller -Signed-off-by: Fabian Grünbichler ---- - net/sctp/ipv6.c | 10 +++++++--- - 1 file changed, 7 insertions(+), 3 deletions(-) - -diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c -index 5d4c15bf66d2..e35d4f73d2df 100644 ---- a/net/sctp/ipv6.c -+++ b/net/sctp/ipv6.c -@@ -326,8 +326,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, - final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); - bdst = ip6_dst_lookup_flow(sk, fl6, final_p); - -- if (!IS_ERR(bdst) && -- ipv6_chk_addr(dev_net(bdst->dev), -+ if (IS_ERR(bdst)) -+ continue; -+ -+ if (ipv6_chk_addr(dev_net(bdst->dev), - &laddr->a.v6.sin6_addr, bdst->dev, 1)) { - if (!IS_ERR_OR_NULL(dst)) - dst_release(dst); -@@ -336,8 +338,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, - } - - bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); -- if (matchlen > bmatchlen) -+ if (matchlen > bmatchlen) { -+ dst_release(bdst); - continue; -+ } - - if (!IS_ERR_OR_NULL(dst)) - dst_release(dst); --- -2.14.2 - diff --git a/patches/kernel/0007-ocfs2-make-metadata-estimation-accurate-and-clear.patch b/patches/kernel/0007-ocfs2-make-metadata-estimation-accurate-and-clear.patch deleted file mode 100644 index 60e1f42..0000000 --- a/patches/kernel/0007-ocfs2-make-metadata-estimation-accurate-and-clear.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Changwei Ge -Date: Wed, 31 Jan 2018 16:15:02 -0800 -Subject: [PATCH] ocfs2: make metadata estimation accurate and clear -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Current code assume that ::w_unwritten_list always has only one item on. -This is not right and hard to get understood. So improve how to count -unwritten item. - -Link: http://lkml.kernel.org/r/1515479070-32653-1-git-send-email-ge.changwei@h3c.com -Signed-off-by: Changwei Ge -Reported-by: John Lightsey -Tested-by: John Lightsey -Cc: Mark Fasheh -Cc: Joseph Qi -Cc: Junxiao Bi -Cc: Joel Becker -Cc: Changwei Ge -Signed-off-by: Andrew Morton -Signed-off-by: Linus Torvalds -(cherry picked from commit 63de8bd9328bf2a778fc277503da163ae3defa3c) -Signed-off-by: Fabian Grünbichler ---- - fs/ocfs2/aops.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c -index d1516327b787..256986aca8df 100644 ---- a/fs/ocfs2/aops.c -+++ b/fs/ocfs2/aops.c -@@ -797,6 +797,7 @@ struct ocfs2_write_ctxt { - struct ocfs2_cached_dealloc_ctxt w_dealloc; - - struct list_head w_unwritten_list; -+ unsigned int w_unwritten_count; - }; - - void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) -@@ -1386,6 +1387,7 @@ static int ocfs2_unwritten_check(struct inode *inode, - desc->c_clear_unwritten = 0; - list_add_tail(&new->ue_ip_node, &oi->ip_unwritten_list); - list_add_tail(&new->ue_node, &wc->w_unwritten_list); -+ wc->w_unwritten_count++; - new = NULL; - unlock: - spin_unlock(&oi->ip_lock); -@@ -2256,7 +2258,7 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, - ue->ue_phys = desc->c_phys; - - list_splice_tail_init(&wc->w_unwritten_list, &dwc->dw_zero_list); -- dwc->dw_zero_count++; -+ dwc->dw_zero_count += wc->w_unwritten_count; - } - - ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, wc); --- -2.14.2 - diff --git a/patches/kernel/0008-ocfs2-try-to-reuse-extent-block-in-dealloc-without-m.patch b/patches/kernel/0008-ocfs2-try-to-reuse-extent-block-in-dealloc-without-m.patch deleted file mode 100644 index b2bba0b..0000000 --- a/patches/kernel/0008-ocfs2-try-to-reuse-extent-block-in-dealloc-without-m.patch +++ /dev/null @@ -1,370 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Changwei Ge -Date: Wed, 31 Jan 2018 16:15:06 -0800 -Subject: [PATCH] ocfs2: try to reuse extent block in dealloc without - meta_alloc -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -A crash issue was reported by John Lightsey with a call trace as follows: - - ocfs2_split_extent+0x1ad3/0x1b40 [ocfs2] - ocfs2_change_extent_flag+0x33a/0x470 [ocfs2] - ocfs2_mark_extent_written+0x172/0x220 [ocfs2] - ocfs2_dio_end_io+0x62d/0x910 [ocfs2] - dio_complete+0x19a/0x1a0 - do_blockdev_direct_IO+0x19dd/0x1eb0 - __blockdev_direct_IO+0x43/0x50 - ocfs2_direct_IO+0x8f/0xa0 [ocfs2] - generic_file_direct_write+0xb2/0x170 - __generic_file_write_iter+0xc3/0x1b0 - ocfs2_file_write_iter+0x4bb/0xca0 [ocfs2] - __vfs_write+0xae/0xf0 - vfs_write+0xb8/0x1b0 - SyS_write+0x4f/0xb0 - system_call_fastpath+0x16/0x75 - -The BUG code told that extent tree wants to grow but no metadata was -reserved ahead of time. From my investigation into this issue, the root -cause it that although enough metadata is not reserved, there should be -enough for following use. Rightmost extent is merged into its left one -due to a certain times of marking extent written. Because during -marking extent written, we got many physically continuous extents. At -last, an empty extent showed up and the rightmost path is removed from -extent tree. - -Add a new mechanism to reuse extent block cached in dealloc which were -just unlinked from extent tree to solve this crash issue. - -Criteria is that during marking extents *written*, if extent rotation -and merging results in unlinking extent with growing extent tree later -without any metadata reserved ahead of time, try to reuse those extents -in dealloc in which deleted extents are cached. - -Also, this patch addresses the issue John reported that ::dw_zero_count -is not calculated properly. - -After applying this patch, the issue John reported was gone. Thanks for -the reproducer provided by John. And this patch has passed -ocfs2-test(29 cases) suite running by New H3C Group. - -[ge.changwei@h3c.com: fix static checker warnning] - Link: http://lkml.kernel.org/r/63ADC13FD55D6546B7DECE290D39E373F29196AE@H3CMLB12-EX.srv.huawei-3com.com -[akpm@linux-foundation.org: brelse(NULL) is legal] -Link: http://lkml.kernel.org/r/1515479070-32653-2-git-send-email-ge.changwei@h3c.com -Signed-off-by: Changwei Ge -Reported-by: John Lightsey -Tested-by: John Lightsey -Cc: Joel Becker -Cc: Joseph Qi -Cc: Junxiao Bi -Cc: Dan Carpenter -Cc: Mark Fasheh -Signed-off-by: Andrew Morton -Signed-off-by: Linus Torvalds -(cherry picked from commit 71a36944042b7d9dd71f6a5d1c5ea1c2353b5d42) -Signed-off-by: Fabian Grünbichler ---- - fs/ocfs2/alloc.h | 1 + - fs/ocfs2/alloc.c | 206 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- - fs/ocfs2/aops.c | 6 ++ - 3 files changed, 203 insertions(+), 10 deletions(-) - -diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h -index 27b75cf32cfa..250bcacdf9e9 100644 ---- a/fs/ocfs2/alloc.h -+++ b/fs/ocfs2/alloc.h -@@ -61,6 +61,7 @@ struct ocfs2_extent_tree { - ocfs2_journal_access_func et_root_journal_access; - void *et_object; - unsigned int et_max_leaf_clusters; -+ struct ocfs2_cached_dealloc_ctxt *et_dealloc; - }; - - /* -diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c -index ab5105f9767e..2f2c76193f54 100644 ---- a/fs/ocfs2/alloc.c -+++ b/fs/ocfs2/alloc.c -@@ -165,6 +165,13 @@ static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et, - struct ocfs2_extent_rec *rec); - static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et); - static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et); -+ -+static int ocfs2_reuse_blk_from_dealloc(handle_t *handle, -+ struct ocfs2_extent_tree *et, -+ struct buffer_head **new_eb_bh, -+ int blk_wanted, int *blk_given); -+static int ocfs2_is_dealloc_empty(struct ocfs2_extent_tree *et); -+ - static const struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = { - .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk, - .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk, -@@ -448,6 +455,7 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, - if (!obj) - obj = (void *)bh->b_data; - et->et_object = obj; -+ et->et_dealloc = NULL; - - et->et_ops->eo_fill_root_el(et); - if (!et->et_ops->eo_fill_max_leaf_clusters) -@@ -1158,7 +1166,7 @@ static int ocfs2_add_branch(handle_t *handle, - struct buffer_head **last_eb_bh, - struct ocfs2_alloc_context *meta_ac) - { -- int status, new_blocks, i; -+ int status, new_blocks, i, block_given = 0; - u64 next_blkno, new_last_eb_blk; - struct buffer_head *bh; - struct buffer_head **new_eb_bhs = NULL; -@@ -1213,11 +1221,31 @@ static int ocfs2_add_branch(handle_t *handle, - goto bail; - } - -- status = ocfs2_create_new_meta_bhs(handle, et, new_blocks, -- meta_ac, new_eb_bhs); -- if (status < 0) { -- mlog_errno(status); -- goto bail; -+ /* Firstyly, try to reuse dealloc since we have already estimated how -+ * many extent blocks we may use. -+ */ -+ if (!ocfs2_is_dealloc_empty(et)) { -+ status = ocfs2_reuse_blk_from_dealloc(handle, et, -+ new_eb_bhs, new_blocks, -+ &block_given); -+ if (status < 0) { -+ mlog_errno(status); -+ goto bail; -+ } -+ } -+ -+ BUG_ON(block_given > new_blocks); -+ -+ if (block_given < new_blocks) { -+ BUG_ON(!meta_ac); -+ status = ocfs2_create_new_meta_bhs(handle, et, -+ new_blocks - block_given, -+ meta_ac, -+ &new_eb_bhs[block_given]); -+ if (status < 0) { -+ mlog_errno(status); -+ goto bail; -+ } - } - - /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be -@@ -1340,15 +1368,25 @@ static int ocfs2_shift_tree_depth(handle_t *handle, - struct ocfs2_alloc_context *meta_ac, - struct buffer_head **ret_new_eb_bh) - { -- int status, i; -+ int status, i, block_given = 0; - u32 new_clusters; - struct buffer_head *new_eb_bh = NULL; - struct ocfs2_extent_block *eb; - struct ocfs2_extent_list *root_el; - struct ocfs2_extent_list *eb_el; - -- status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac, -- &new_eb_bh); -+ if (!ocfs2_is_dealloc_empty(et)) { -+ status = ocfs2_reuse_blk_from_dealloc(handle, et, -+ &new_eb_bh, 1, -+ &block_given); -+ } else if (meta_ac) { -+ status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac, -+ &new_eb_bh); -+ -+ } else { -+ BUG(); -+ } -+ - if (status < 0) { - mlog_errno(status); - goto bail; -@@ -1511,7 +1549,7 @@ static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et, - int depth = le16_to_cpu(el->l_tree_depth); - struct buffer_head *bh = NULL; - -- BUG_ON(meta_ac == NULL); -+ BUG_ON(meta_ac == NULL && ocfs2_is_dealloc_empty(et)); - - shift = ocfs2_find_branch_target(et, &bh); - if (shift < 0) { -@@ -6585,6 +6623,154 @@ ocfs2_find_per_slot_free_list(int type, - return fl; - } - -+static struct ocfs2_per_slot_free_list * -+ocfs2_find_preferred_free_list(int type, -+ int preferred_slot, -+ int *real_slot, -+ struct ocfs2_cached_dealloc_ctxt *ctxt) -+{ -+ struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator; -+ -+ while (fl) { -+ if (fl->f_inode_type == type && fl->f_slot == preferred_slot) { -+ *real_slot = fl->f_slot; -+ return fl; -+ } -+ -+ fl = fl->f_next_suballocator; -+ } -+ -+ /* If we can't find any free list matching preferred slot, just use -+ * the first one. -+ */ -+ fl = ctxt->c_first_suballocator; -+ *real_slot = fl->f_slot; -+ -+ return fl; -+} -+ -+/* Return Value 1 indicates empty */ -+static int ocfs2_is_dealloc_empty(struct ocfs2_extent_tree *et) -+{ -+ struct ocfs2_per_slot_free_list *fl = NULL; -+ -+ if (!et->et_dealloc) -+ return 1; -+ -+ fl = et->et_dealloc->c_first_suballocator; -+ if (!fl) -+ return 1; -+ -+ if (!fl->f_first) -+ return 1; -+ -+ return 0; -+} -+ -+/* If extent was deleted from tree due to extent rotation and merging, and -+ * no metadata is reserved ahead of time. Try to reuse some extents -+ * just deleted. This is only used to reuse extent blocks. -+ * It is supposed to find enough extent blocks in dealloc if our estimation -+ * on metadata is accurate. -+ */ -+static int ocfs2_reuse_blk_from_dealloc(handle_t *handle, -+ struct ocfs2_extent_tree *et, -+ struct buffer_head **new_eb_bh, -+ int blk_wanted, int *blk_given) -+{ -+ int i, status = 0, real_slot; -+ struct ocfs2_cached_dealloc_ctxt *dealloc; -+ struct ocfs2_per_slot_free_list *fl; -+ struct ocfs2_cached_block_free *bf; -+ struct ocfs2_extent_block *eb; -+ struct ocfs2_super *osb = -+ OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci)); -+ -+ *blk_given = 0; -+ -+ /* If extent tree doesn't have a dealloc, this is not faulty. Just -+ * tell upper caller dealloc can't provide any block and it should -+ * ask for alloc to claim more space. -+ */ -+ dealloc = et->et_dealloc; -+ if (!dealloc) -+ goto bail; -+ -+ for (i = 0; i < blk_wanted; i++) { -+ /* Prefer to use local slot */ -+ fl = ocfs2_find_preferred_free_list(EXTENT_ALLOC_SYSTEM_INODE, -+ osb->slot_num, &real_slot, -+ dealloc); -+ /* If no more block can be reused, we should claim more -+ * from alloc. Just return here normally. -+ */ -+ if (!fl) { -+ status = 0; -+ break; -+ } -+ -+ bf = fl->f_first; -+ fl->f_first = bf->free_next; -+ -+ new_eb_bh[i] = sb_getblk(osb->sb, bf->free_blk); -+ if (new_eb_bh[i] == NULL) { -+ status = -ENOMEM; -+ mlog_errno(status); -+ goto bail; -+ } -+ -+ mlog(0, "Reusing block(%llu) from " -+ "dealloc(local slot:%d, real slot:%d)\n", -+ bf->free_blk, osb->slot_num, real_slot); -+ -+ ocfs2_set_new_buffer_uptodate(et->et_ci, new_eb_bh[i]); -+ -+ status = ocfs2_journal_access_eb(handle, et->et_ci, -+ new_eb_bh[i], -+ OCFS2_JOURNAL_ACCESS_CREATE); -+ if (status < 0) { -+ mlog_errno(status); -+ goto bail; -+ } -+ -+ memset(new_eb_bh[i]->b_data, 0, osb->sb->s_blocksize); -+ eb = (struct ocfs2_extent_block *) new_eb_bh[i]->b_data; -+ -+ /* We can't guarantee that buffer head is still cached, so -+ * polutlate the extent block again. -+ */ -+ strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); -+ eb->h_blkno = cpu_to_le64(bf->free_blk); -+ eb->h_fs_generation = cpu_to_le32(osb->fs_generation); -+ eb->h_suballoc_slot = cpu_to_le16(real_slot); -+ eb->h_suballoc_loc = cpu_to_le64(bf->free_bg); -+ eb->h_suballoc_bit = cpu_to_le16(bf->free_bit); -+ eb->h_list.l_count = -+ cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); -+ -+ /* We'll also be dirtied by the caller, so -+ * this isn't absolutely necessary. -+ */ -+ ocfs2_journal_dirty(handle, new_eb_bh[i]); -+ -+ if (!fl->f_first) { -+ dealloc->c_first_suballocator = fl->f_next_suballocator; -+ kfree(fl); -+ } -+ kfree(bf); -+ } -+ -+ *blk_given = i; -+ -+bail: -+ if (unlikely(status < 0)) { -+ for (i = 0; i < blk_wanted; i++) -+ brelse(new_eb_bh[i]); -+ } -+ -+ return status; -+} -+ - int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, - int type, int slot, u64 suballoc, - u64 blkno, unsigned int bit) -diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c -index 256986aca8df..e8e205bf2e41 100644 ---- a/fs/ocfs2/aops.c -+++ b/fs/ocfs2/aops.c -@@ -2332,6 +2332,12 @@ static int ocfs2_dio_end_io_write(struct inode *inode, - - ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); - -+ /* Attach dealloc with extent tree in case that we may reuse extents -+ * which are already unlinked from current extent tree due to extent -+ * rotation and merging. -+ */ -+ et.et_dealloc = &dealloc; -+ - ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2, - &data_ac, &meta_ac); - if (ret) { --- -2.14.2 -