--- /dev/null
+dm-dust
+=======
+
+This target emulates the behavior of bad sectors at arbitrary
+locations, and the ability to enable the emulation of the failures
+at an arbitrary time.
+
+This target behaves similarly to a linear target. At a given time,
+the user can send a message to the target to start failing read
+requests on specific blocks (to emulate the behavior of a hard disk
+drive with bad sectors).
+
+When the failure behavior is enabled (i.e.: when the output of
+"dmsetup status" displays "fail_read_on_bad_block"), reads of blocks
+in the "bad block list" will fail with EIO ("Input/output error").
+
+Writes of blocks in the "bad block list will result in the following:
+
+1. Remove the block from the "bad block list".
+2. Successfully complete the write.
+
+This emulates the "remapped sector" behavior of a drive with bad
+sectors.
+
+Normally, a drive that is encountering bad sectors will most likely
+encounter more bad sectors, at an unknown time or location.
+With dm-dust, the user can use the "addbadblock" and "removebadblock"
+messages to add arbitrary bad blocks at new locations, and the
+"enable" and "disable" messages to modulate the state of whether the
+configured "bad blocks" will be treated as bad, or bypassed.
+This allows the pre-writing of test data and metadata prior to
+simulating a "failure" event where bad sectors start to appear.
+
+Table parameters:
+-----------------
+<device_path> <offset> <blksz>
+
+Mandatory parameters:
+ <device_path>: path to the block device.
+ <offset>: offset to data area from start of device_path
+ <blksz>: block size in bytes
+ (minimum 512, maximum 1073741824, must be a power of 2)
+
+Usage instructions:
+-------------------
+
+First, find the size (in 512-byte sectors) of the device to be used:
+
+$ sudo blockdev --getsz /dev/vdb1
+33552384
+
+Create the dm-dust device:
+(For a device with a block size of 512 bytes)
+$ sudo dmsetup create dust1 --table '0 33552384 dust /dev/vdb1 0 512'
+
+(For a device with a block size of 4096 bytes)
+$ sudo dmsetup create dust1 --table '0 33552384 dust /dev/vdb1 0 4096'
+
+Check the status of the read behavior ("bypass" indicates that all I/O
+will be passed through to the underlying device):
+$ sudo dmsetup status dust1
+0 33552384 dust 252:17 bypass
+
+$ sudo dd if=/dev/mapper/dust1 of=/dev/null bs=512 count=128 iflag=direct
+128+0 records in
+128+0 records out
+
+$ sudo dd if=/dev/zero of=/dev/mapper/dust1 bs=512 count=128 oflag=direct
+128+0 records in
+128+0 records out
+
+Adding and removing bad blocks:
+-------------------------------
+
+At any time (i.e.: whether the device has the "bad block" emulation
+enabled or disabled), bad blocks may be added or removed from the
+device via the "addbadblock" and "removebadblock" messages:
+
+$ sudo dmsetup message dust1 0 addbadblock 60
+kernel: device-mapper: dust: badblock added at block 60
+
+$ sudo dmsetup message dust1 0 addbadblock 67
+kernel: device-mapper: dust: badblock added at block 67
+
+$ sudo dmsetup message dust1 0 addbadblock 72
+kernel: device-mapper: dust: badblock added at block 72
+
+These bad blocks will be stored in the "bad block list".
+While the device is in "bypass" mode, reads and writes will succeed:
+
+$ sudo dmsetup status dust1
+0 33552384 dust 252:17 bypass
+
+Enabling block read failures:
+-----------------------------
+
+To enable the "fail read on bad block" behavior, send the "enable" message:
+
+$ sudo dmsetup message dust1 0 enable
+kernel: device-mapper: dust: enabling read failures on bad sectors
+
+$ sudo dmsetup status dust1
+0 33552384 dust 252:17 fail_read_on_bad_block
+
+With the device in "fail read on bad block" mode, attempting to read a
+block will encounter an "Input/output error":
+
+$ sudo dd if=/dev/mapper/dust1 of=/dev/null bs=512 count=1 skip=67 iflag=direct
+dd: error reading '/dev/mapper/dust1': Input/output error
+0+0 records in
+0+0 records out
+0 bytes copied, 0.00040651 s, 0.0 kB/s
+
+...and writing to the bad blocks will remove the blocks from the list,
+therefore emulating the "remap" behavior of hard disk drives:
+
+$ sudo dd if=/dev/zero of=/dev/mapper/dust1 bs=512 count=128 oflag=direct
+128+0 records in
+128+0 records out
+
+kernel: device-mapper: dust: block 60 removed from badblocklist by write
+kernel: device-mapper: dust: block 67 removed from badblocklist by write
+kernel: device-mapper: dust: block 72 removed from badblocklist by write
+kernel: device-mapper: dust: block 87 removed from badblocklist by write
+
+Bad block add/remove error handling:
+------------------------------------
+
+Attempting to add a bad block that already exists in the list will
+result in an "Invalid argument" error, as well as a helpful message:
+
+$ sudo dmsetup message dust1 0 addbadblock 88
+device-mapper: message ioctl on dust1 failed: Invalid argument
+kernel: device-mapper: dust: block 88 already in badblocklist
+
+Attempting to remove a bad block that doesn't exist in the list will
+result in an "Invalid argument" error, as well as a helpful message:
+
+$ sudo dmsetup message dust1 0 removebadblock 87
+device-mapper: message ioctl on dust1 failed: Invalid argument
+kernel: device-mapper: dust: block 87 not found in badblocklist
+
+Counting the number of bad blocks in the bad block list:
+--------------------------------------------------------
+
+To count the number of bad blocks configured in the device, run the
+following message command:
+
+$ sudo dmsetup message dust1 0 countbadblocks
+
+A message will print with the number of bad blocks currently
+configured on the device:
+
+kernel: device-mapper: dust: countbadblocks: 895 badblock(s) found
+
+Querying for specific bad blocks:
+---------------------------------
+
+To find out if a specific block is in the bad block list, run the
+following message command:
+
+$ sudo dmsetup message dust1 0 queryblock 72
+
+The following message will print if the block is in the list:
+device-mapper: dust: queryblock: block 72 found in badblocklist
+
+The following message will print if the block is in the list:
+device-mapper: dust: queryblock: block 72 not found in badblocklist
+
+The "queryblock" message command will work in both the "enabled"
+and "disabled" modes, allowing the verification of whether a block
+will be treated as "bad" without having to issue I/O to the device,
+or having to "enable" the bad block emulation.
+
+Clearing the bad block list:
+----------------------------
+
+To clear the bad block list (without needing to individually run
+a "removebadblock" message command for every block), run the
+following message command:
+
+$ sudo dmsetup message dust1 0 clearbadblocks
+
+After clearing the bad block list, the following message will appear:
+
+kernel: device-mapper: dust: clearbadblocks: badblocks cleared
+
+If there were no bad blocks to clear, the following message will
+appear:
+
+kernel: device-mapper: dust: clearbadblocks: no badblocks found
+
+Message commands list:
+----------------------
+
+Below is a list of the messages that can be sent to a dust device:
+
+Operations on blocks (requires a <blknum> argument):
+
+addbadblock <blknum>
+queryblock <blknum>
+removebadblock <blknum>
+
+...where <blknum> is a block number within range of the device
+ (corresponding to the block size of the device.)
+
+Single argument message commands:
+
+countbadblocks
+clearbadblocks
+disable
+enable
+quiet
+
+Device removal:
+---------------
+
+When finished, remove the device via the "dmsetup remove" command:
+
+$ sudo dmsetup remove dust1
+
+Quiet mode:
+-----------
+
+On test runs with many bad blocks, it may be desirable to avoid
+excessive logging (from bad blocks added, removed, or "remapped").
+This can be done by enabling "quiet mode" via the following message:
+
+$ sudo dmsetup message dust1 0 quiet
+
+This will suppress log messages from add / remove / removed by write
+operations. Log messages from "countbadblocks" or "queryblock"
+message commands will still print in quiet mode.
+
+The status of quiet mode can be seen by running "dmsetup status":
+
+$ sudo dmsetup status dust1
+0 33552384 dust 252:17 fail_read_on_bad_block quiet
+
+To disable quiet mode, send the "quiet" message again:
+
+$ sudo dmsetup message dust1 0 quiet
+
+$ sudo dmsetup status dust1
+0 33552384 dust 252:17 fail_read_on_bad_block verbose
+
+(The presence of "verbose" indicates normal logging.)
+
+"Why not...?"
+-------------
+
+scsi_debug has a "medium error" mode that can fail reads on one
+specified sector (sector 0x1234, hardcoded in the source code), but
+it uses RAM for the persistent storage, which drastically decreases
+the potential device size.
+
+dm-flakey fails all I/O from all block locations at a specified time
+frequency, and not a given point in time.
+
+When a bad sector occurs on a hard disk drive, reads to that sector
+are failed by the device, usually resulting in an error code of EIO
+("I/O error") or ENODATA ("No data available"). However, a write to
+the sector may succeed, and result in the sector becoming readable
+after the device controller no longer experiences errors reading the
+sector (or after a reallocation of the sector). However, there may
+be bad sectors that occur on the device in the future, in a different,
+unpredictable location.
+
+This target seeks to provide a device that can exhibit the behavior
+of a bad sector at a known sector location, at a known time, based
+on a large storage device (at least tens of gigabytes, not occupying
+system memory).
mode, the dm-integrity target can be used to detect silent data
corruption on the disk or in the I/O path.
+There's an alternate mode of operation where dm-integrity uses bitmap
+instead of a journal. If a bit in the bitmap is 1, the corresponding
+region's data and integrity tags are not synchronized - if the machine
+crashes, the unsynchronized regions will be recalculated. The bitmap mode
+is faster than the journal mode, because we don't have to write the data
+twice, but it is also less reliable, because if data corruption happens
+when the machine crashes, it may not be detected.
When loading the target for the first time, the kernel driver will format
the device. But it will only format the device if the superblock contains
either both data and tag or none of them are written. The
journaled mode degrades write throughput twice because the
data have to be written twice.
+ B - bitmap mode - data and metadata are written without any
+ synchronization, the driver maintains a bitmap of dirty
+ regions where data and metadata don't match. This mode can
+ only be used with internal hash.
R - recovery mode - in this mode, journal is not replayed,
checksums are not checked and writes to the device are not
allowed. This mode is useful for data recovery if the
a power of two. If the device is already formatted, the value from
the superblock is used.
+meta_device:device
+ Don't interleave the data and metadata on on device. Use a
+ separate device for metadata.
+
buffer_sectors:number
The number of sectors in one buffer. The value is rounded down to
a power of two.
Supported values are 512, 1024, 2048 and 4096 bytes. If not
specified the default block size is 512 bytes.
+sectors_per_bit:number
+ In the bitmap mode, this parameter specifies the number of
+ 512-byte sectors that corresponds to one bitmap bit.
+
+bitmap_flush_interval:number
+ The bitmap flush interval in milliseconds. The metadata buffers
+ are synchronized when this interval expires.
+
+
The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can
be changed when reloading the target (load an inactive table and swap the
tables with suspend and resume). The other arguments should not be changed
provides (i.e. the size of the device minus the size of all
metadata and padding). The user of this target should not send
bios that access data beyond the "provided data sectors" limit.
- * flags - a flag is set if journal_mac is used
+ * flags
+ SB_FLAG_HAVE_JOURNAL_MAC - a flag is set if journal_mac is used
+ SB_FLAG_RECALCULATING - recalculating is in progress
+ SB_FLAG_DIRTY_BITMAP - journal area contains the bitmap of dirty
+ blocks
+ * log2(sectors per block)
+ * a position where recalculating finished
* journal
The journal is divided into sections, each section contains:
* metadata area (4kiB), it contains journal entries
+++ /dev/null
-Device tree binding vendor prefix registry. Keep list in alphabetical order.
-
-This isn't an exhaustive list, but you should add new prefixes to it before
-using them to avoid name-space collisions.
-
-abilis Abilis Systems
-abracon Abracon Corporation
-actions Actions Semiconductor Co., Ltd.
-active-semi Active-Semi International Inc
-ad Avionic Design GmbH
-adafruit Adafruit Industries, LLC
-adapteva Adapteva, Inc.
-adaptrum Adaptrum, Inc.
-adh AD Holdings Plc.
-adi Analog Devices, Inc.
-advantech Advantech Corporation
-aeroflexgaisler Aeroflex Gaisler AB
-al Annapurna Labs
-allo Allo.com
-allwinner Allwinner Technology Co., Ltd.
-alphascale AlphaScale Integrated Circuits Systems, Inc.
-altr Altera Corp.
-amarula Amarula Solutions
-amazon Amazon.com, Inc.
-amcc Applied Micro Circuits Corporation (APM, formally AMCC)
-amd Advanced Micro Devices (AMD), Inc.
-amediatech Shenzhen Amediatech Technology Co., Ltd
-amlogic Amlogic, Inc.
-ampire Ampire Co., Ltd.
-ams AMS AG
-amstaos AMS-Taos Inc.
-analogix Analogix Semiconductor, Inc.
-andestech Andes Technology Corporation
-apm Applied Micro Circuits Corporation (APM)
-aptina Aptina Imaging
-arasan Arasan Chip Systems
-archermind ArcherMind Technology (Nanjing) Co., Ltd.
-arctic Arctic Sand
-arcx arcx Inc. / Archronix Inc.
-aries Aries Embedded GmbH
-arm ARM Ltd.
-armadeus ARMadeus Systems SARL
-arrow Arrow Electronics
-artesyn Artesyn Embedded Technologies Inc.
-asahi-kasei Asahi Kasei Corp.
-aspeed ASPEED Technology Inc.
-asus AsusTek Computer Inc.
-atlas Atlas Scientific LLC
-atmel Atmel Corporation
-auo AU Optronics Corporation
-auvidea Auvidea GmbH
-avago Avago Technologies
-avia avia semiconductor
-avic Shanghai AVIC Optoelectronics Co., Ltd.
-avnet Avnet, Inc.
-axentia Axentia Technologies AB
-axis Axis Communications AB
-azoteq Azoteq (Pty) Ltd
-azw Shenzhen AZW Technology Co., Ltd.
-bananapi BIPAI KEJI LIMITED
-bhf Beckhoff Automation GmbH & Co. KG
-bitmain Bitmain Technologies
-boe BOE Technology Group Co., Ltd.
-bosch Bosch Sensortec GmbH
-boundary Boundary Devices Inc.
-brcm Broadcom Corporation
-buffalo Buffalo, Inc.
-bticino Bticino International
-calxeda Calxeda
-capella Capella Microsystems, Inc
-cascoda Cascoda, Ltd.
-catalyst Catalyst Semiconductor, Inc.
-cavium Cavium, Inc.
-cdns Cadence Design Systems Inc.
-cdtech CDTech(H.K.) Electronics Limited
-ceva Ceva, Inc.
-chipidea Chipidea, Inc
-chipone ChipOne
-chipspark ChipSPARK
-chrp Common Hardware Reference Platform
-chunghwa Chunghwa Picture Tubes Ltd.
-ciaa Computadora Industrial Abierta Argentina
-cirrus Cirrus Logic, Inc.
-cloudengines Cloud Engines, Inc.
-cnm Chips&Media, Inc.
-cnxt Conexant Systems, Inc.
-compulab CompuLab Ltd.
-cortina Cortina Systems, Inc.
-cosmic Cosmic Circuits
-crane Crane Connectivity Solutions
-creative Creative Technology Ltd
-crystalfontz Crystalfontz America, Inc.
-csky Hangzhou C-SKY Microsystems Co., Ltd
-cubietech Cubietech, Ltd.
-cypress Cypress Semiconductor Corporation
-cznic CZ.NIC, z.s.p.o.
-dallas Maxim Integrated Products (formerly Dallas Semiconductor)
-dataimage DataImage, Inc.
-davicom DAVICOM Semiconductor, Inc.
-delta Delta Electronics, Inc.
-denx Denx Software Engineering
-devantech Devantech, Ltd.
-dh DH electronics GmbH
-digi Digi International Inc.
-digilent Diglent, Inc.
-dioo Dioo Microcircuit Co., Ltd
-dlc DLC Display Co., Ltd.
-dlg Dialog Semiconductor
-dlink D-Link Corporation
-dmo Data Modul AG
-domintech Domintech Co., Ltd.
-dongwoon Dongwoon Anatech
-dptechnics DPTechnics
-dragino Dragino Technology Co., Limited
-ea Embedded Artists AB
-ebs-systart EBS-SYSTART GmbH
-ebv EBV Elektronik
-eckelmann Eckelmann AG
-edt Emerging Display Technologies
-eeti eGalax_eMPIA Technology Inc
-elan Elan Microelectronic Corp.
-elgin Elgin S/A.
-embest Shenzhen Embest Technology Co., Ltd.
-emlid Emlid, Ltd.
-emmicro EM Microelectronic
-emtrion emtrion GmbH
-endless Endless Mobile, Inc.
-energymicro Silicon Laboratories (formerly Energy Micro AS)
-engicam Engicam S.r.l.
-epcos EPCOS AG
-epfl Ecole Polytechnique Fédérale de Lausanne
-epson Seiko Epson Corp.
-est ESTeem Wireless Modems
-ettus NI Ettus Research
-eukrea Eukréa Electromatique
-everest Everest Semiconductor Co. Ltd.
-everspin Everspin Technologies, Inc.
-exar Exar Corporation
-excito Excito
-ezchip EZchip Semiconductor
-facebook Facebook
-fairphone Fairphone B.V.
-faraday Faraday Technology Corporation
-fastrax Fastrax Oy
-fcs Fairchild Semiconductor
-feiyang Shenzhen Fly Young Technology Co.,LTD.
-firefly Firefly
-focaltech FocalTech Systems Co.,Ltd
-friendlyarm Guangzhou FriendlyARM Computer Tech Co., Ltd
-fsl Freescale Semiconductor
-fujitsu Fujitsu Ltd.
-gateworks Gateworks Corporation
-gcw Game Consoles Worldwide
-ge General Electric Company
-geekbuying GeekBuying
-gef GE Fanuc Intelligent Platforms Embedded Systems, Inc.
-GEFanuc GE Fanuc Intelligent Platforms Embedded Systems, Inc.
-geniatech Geniatech, Inc.
-giantec Giantec Semiconductor, Inc.
-giantplus Giantplus Technology Co., Ltd.
-globalscale Globalscale Technologies, Inc.
-globaltop GlobalTop Technology, Inc.
-gmt Global Mixed-mode Technology, Inc.
-goodix Shenzhen Huiding Technology Co., Ltd.
-google Google, Inc.
-grinn Grinn
-grmn Garmin Limited
-gumstix Gumstix, Inc.
-gw Gateworks Corporation
-hannstar HannStar Display Corporation
-haoyu Haoyu Microelectronic Co. Ltd.
-hardkernel Hardkernel Co., Ltd
-hideep HiDeep Inc.
-himax Himax Technologies, Inc.
-hisilicon Hisilicon Limited.
-hit Hitachi Ltd.
-hitex Hitex Development Tools
-holt Holt Integrated Circuits, Inc.
-honeywell Honeywell
-hp Hewlett Packard
-holtek Holtek Semiconductor, Inc.
-hwacom HwaCom Systems Inc.
-i2se I2SE GmbH
-ibm International Business Machines (IBM)
-icplus IC Plus Corp.
-idt Integrated Device Technologies, Inc.
-ifi Ingenieurburo Fur Ic-Technologie (I/F/I)
-ilitek ILI Technology Corporation (ILITEK)
-img Imagination Technologies Ltd.
-infineon Infineon Technologies
-inforce Inforce Computing
-ingenic Ingenic Semiconductor
-innolux Innolux Corporation
-inside-secure INSIDE Secure
-intel Intel Corporation
-intercontrol Inter Control Group
-invensense InvenSense Inc.
-inversepath Inverse Path
-iom Iomega Corporation
-isee ISEE 2007 S.L.
-isil Intersil
-issi Integrated Silicon Solutions Inc.
-itead ITEAD Intelligent Systems Co.Ltd
-iwave iWave Systems Technologies Pvt. Ltd.
-jdi Japan Display Inc.
-jedec JEDEC Solid State Technology Association
-jianda Jiandangjing Technology Co., Ltd.
-karo Ka-Ro electronics GmbH
-keithkoep Keith & Koep GmbH
-keymile Keymile GmbH
-khadas Khadas
-kiebackpeter Kieback & Peter GmbH
-kinetic Kinetic Technologies
-kingdisplay King & Display Technology Co., Ltd.
-kingnovel Kingnovel Technology Co., Ltd.
-kionix Kionix, Inc.
-kobo Rakuten Kobo Inc.
-koe Kaohsiung Opto-Electronics Inc.
-kosagi Sutajio Ko-Usagi PTE Ltd.
-kyo Kyocera Corporation
-lacie LaCie
-laird Laird PLC
-lantiq Lantiq Semiconductor
-lattice Lattice Semiconductor
-lego LEGO Systems A/S
-lemaker Shenzhen LeMaker Technology Co., Ltd.
-lenovo Lenovo Group Ltd.
-lg LG Corporation
-libretech Shenzhen Libre Technology Co., Ltd
-licheepi Lichee Pi
-linaro Linaro Limited
-linksys Belkin International, Inc. (Linksys)
-linux Linux-specific binding
-linx Linx Technologies
-lltc Linear Technology Corporation
-logicpd Logic PD, Inc.
-lsi LSI Corp. (LSI Logic)
-lwn Liebherr-Werk Nenzing GmbH
-macnica Macnica Americas
-marvell Marvell Technology Group Ltd.
-maxbotix MaxBotix Inc.
-maxim Maxim Integrated Products
-mbvl Mobiveil Inc.
-mcube mCube
-meas Measurement Specialties
-mediatek MediaTek Inc.
-megachips MegaChips
-mele Shenzhen MeLE Digital Technology Ltd.
-melexis Melexis N.V.
-melfas MELFAS Inc.
-mellanox Mellanox Technologies
-memsic MEMSIC Inc.
-menlo Menlo Systems GmbH
-merrii Merrii Technology Co., Ltd.
-micrel Micrel Inc.
-microchip Microchip Technology Inc.
-microcrystal Micro Crystal AG
-micron Micron Technology Inc.
-mikroe MikroElektronika d.o.o.
-minix MINIX Technology Ltd.
-miramems MiraMEMS Sensing Technology Co., Ltd.
-mitsubishi Mitsubishi Electric Corporation
-mosaixtech Mosaix Technologies, Inc.
-motorola Motorola, Inc.
-moxa Moxa Inc.
-mpl MPL AG
-mqmaker mqmaker Inc.
-mscc Microsemi Corporation
-msi Micro-Star International Co. Ltd.
-mti Imagination Technologies Ltd. (formerly MIPS Technologies Inc.)
-multi-inno Multi-Inno Technology Co.,Ltd
-mundoreader Mundo Reader S.L.
-murata Murata Manufacturing Co., Ltd.
-mxicy Macronix International Co., Ltd.
-myir MYIR Tech Limited
-national National Semiconductor
-nec NEC LCD Technologies, Ltd.
-neonode Neonode Inc.
-netgear NETGEAR
-netlogic Broadcom Corporation (formerly NetLogic Microsystems)
-netron-dy Netron DY
-netxeon Shenzhen Netxeon Technology CO., LTD
-nexbox Nexbox
-nextthing Next Thing Co.
-newhaven Newhaven Display International
-ni National Instruments
-nintendo Nintendo
-nlt NLT Technologies, Ltd.
-nokia Nokia
-nordic Nordic Semiconductor
-novtech NovTech, Inc.
-nutsboard NutsBoard
-nuvoton Nuvoton Technology Corporation
-nvd New Vision Display
-nvidia NVIDIA
-nxp NXP Semiconductors
-oceanic Oceanic Systems (UK) Ltd.
-okaya Okaya Electric America, Inc.
-oki Oki Electric Industry Co., Ltd.
-olimex OLIMEX Ltd.
-olpc One Laptop Per Child
-onion Onion Corporation
-onnn ON Semiconductor Corp.
-ontat On Tat Industrial Company
-opalkelly Opal Kelly Incorporated
-opencores OpenCores.org
-openrisc OpenRISC.io
-option Option NV
-oranth Shenzhen Oranth Technology Co., Ltd.
-ORCL Oracle Corporation
-orisetech Orise Technology
-ortustech Ortus Technology Co., Ltd.
-osddisplays OSD Displays
-ovti OmniVision Technologies
-oxsemi Oxford Semiconductor, Ltd.
-panasonic Panasonic Corporation
-parade Parade Technologies Inc.
-pda Precision Design Associates, Inc.
-pericom Pericom Technology Inc.
-pervasive Pervasive Displays, Inc.
-phicomm PHICOMM Co., Ltd.
-phytec PHYTEC Messtechnik GmbH
-picochip Picochip Ltd
-pine64 Pine64
-pixcir PIXCIR MICROELECTRONICS Co., Ltd
-plantower Plantower Co., Ltd
-plathome Plat'Home Co., Ltd.
-plda PLDA
-plx Broadcom Corporation (formerly PLX Technology)
-pni PNI Sensor Corporation
-portwell Portwell Inc.
-poslab Poslab Technology Co., Ltd.
-powervr PowerVR (deprecated, use img)
-probox2 PROBOX2 (by W2COMP Co., Ltd.)
-pulsedlight PulsedLight, Inc
-qca Qualcomm Atheros, Inc.
-qcom Qualcomm Technologies, Inc
-qemu QEMU, a generic and open source machine emulator and virtualizer
-qi Qi Hardware
-qiaodian QiaoDian XianShi Corporation
-qnap QNAP Systems, Inc.
-radxa Radxa
-raidsonic RaidSonic Technology GmbH
-ralink Mediatek/Ralink Technology Corp.
-ramtron Ramtron International
-raspberrypi Raspberry Pi Foundation
-raydium Raydium Semiconductor Corp.
-rda Unisoc Communications, Inc.
-realtek Realtek Semiconductor Corp.
-renesas Renesas Electronics Corporation
-richtek Richtek Technology Corporation
-ricoh Ricoh Co. Ltd.
-rikomagic Rikomagic Tech Corp. Ltd
-riscv RISC-V Foundation
-rockchip Fuzhou Rockchip Electronics Co., Ltd
-rocktech ROCKTECH DISPLAYS LIMITED
-rohm ROHM Semiconductor Co., Ltd
-ronbo Ronbo Electronics
-roofull Shenzhen Roofull Technology Co, Ltd
-samsung Samsung Semiconductor
-samtec Samtec/Softing company
-sancloud Sancloud Ltd
-sandisk Sandisk Corporation
-sbs Smart Battery System
-schindler Schindler
-seagate Seagate Technology PLC
-seirobotics Shenzhen SEI Robotics Co., Ltd
-semtech Semtech Corporation
-sensirion Sensirion AG
-sff Small Form Factor Committee
-sgd Solomon Goldentek Display Corporation
-sgx SGX Sensortech
-sharp Sharp Corporation
-shimafuji Shimafuji Electric, Inc.
-si-en Si-En Technology Ltd.
-si-linux Silicon Linux Corporation
-sifive SiFive, Inc.
-sigma Sigma Designs, Inc.
-sii Seiko Instruments, Inc.
-sil Silicon Image
-silabs Silicon Laboratories
-silead Silead Inc.
-silergy Silergy Corp.
-siliconmitus Silicon Mitus, Inc.
-simtek
-sirf SiRF Technology, Inc.
-sis Silicon Integrated Systems Corp.
-sitronix Sitronix Technology Corporation
-skyworks Skyworks Solutions, Inc.
-smsc Standard Microsystems Corporation
-snps Synopsys, Inc.
-socionext Socionext Inc.
-solidrun SolidRun
-solomon Solomon Systech Limited
-sony Sony Corporation
-spansion Spansion Inc.
-sprd Spreadtrum Communications Inc.
-sst Silicon Storage Technology, Inc.
-st STMicroelectronics
-starry Starry Electronic Technology (ShenZhen) Co., LTD
-startek Startek
-ste ST-Ericsson
-stericsson ST-Ericsson
-summit Summit microelectronics
-sunchip Shenzhen Sunchip Technology Co., Ltd
-SUNW Sun Microsystems, Inc
-swir Sierra Wireless
-syna Synaptics Inc.
-synology Synology, Inc.
-tbs TBS Technologies
-tbs-biometrics Touchless Biometric Systems AG
-tcg Trusted Computing Group
-tcl Toby Churchill Ltd.
-technexion TechNexion
-technologic Technologic Systems
-tempo Tempo Semiconductor
-techstar Shenzhen Techstar Electronics Co., Ltd.
-terasic Terasic Inc.
-thine THine Electronics, Inc.
-ti Texas Instruments
-tianma Tianma Micro-electronics Co., Ltd.
-tlm Trusted Logic Mobility
-tmt Tecon Microprocessor Technologies, LLC.
-topeet Topeet
-toradex Toradex AG
-toshiba Toshiba Corporation
-toumaz Toumaz
-tpk TPK U.S.A. LLC
-tplink TP-LINK Technologies Co., Ltd.
-tpo TPO
-tq TQ Systems GmbH
-tronfy Tronfy
-tronsmart Tronsmart
-truly Truly Semiconductors Limited
-tsd Theobroma Systems Design und Consulting GmbH
-tyan Tyan Computer Corporation
-u-blox u-blox
-ucrobotics uCRobotics
-ubnt Ubiquiti Networks
-udoo Udoo
-uniwest United Western Technologies Corp (UniWest)
-upisemi uPI Semiconductor Corp.
-urt United Radiant Technology Corporation
-usi Universal Scientific Industrial Co., Ltd.
-v3 V3 Semiconductor
-vamrs Vamrs Ltd.
-variscite Variscite Ltd.
-via VIA Technologies, Inc.
-virtio Virtual I/O Device Specification, developed by the OASIS consortium
-vishay Vishay Intertechnology, Inc
-vitesse Vitesse Semiconductor Corporation
-vivante Vivante Corporation
-vocore VoCore Studio
-voipac Voipac Technologies s.r.o.
-vot Vision Optical Technology Co., Ltd.
-wd Western Digital Corp.
-wetek WeTek Electronics, limited.
-wexler Wexler
-whwave Shenzhen whwave Electronics, Inc.
-wi2wi Wi2Wi, Inc.
-winbond Winbond Electronics corp.
-winstar Winstar Display Corp.
-wlf Wolfson Microelectronics
-wm Wondermedia Technologies, Inc.
-x-powers X-Powers
-xes Extreme Engineering Solutions (X-ES)
-xillybus Xillybus Ltd.
-xlnx Xilinx
-xunlong Shenzhen Xunlong Software CO.,Limited
-ysoft Y Soft Corporation a.s.
-zarlink Zarlink Semiconductor
-zeitec ZEITEC Semiconductor Co., LTD.
-zidoo Shenzhen Zidoo Technology Co., Ltd.
-zii Zodiac Inflight Innovations
-zte ZTE Corp.
-zyxel ZyXEL Communications Corp.
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/vendor-prefixes.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Devicetree Vendor Prefix Registry
+
+maintainers:
+ - Rob Herring <robh@kernel.org>
+
+select: true
+
+properties: {}
+
+patternProperties:
+ # Prefixes which are not vendors, but followed the pattern
+ # DO NOT ADD NEW PROPERTIES TO THIS LIST
+ "^(at25|devbus|dmacap|dsa|exynos|gpio-fan|gpio|gpmc|hdmi|i2c-gpio),.*": true
+ "^(keypad|m25p|max8952|max8997|max8998|mpmc),.*": true
+ "^(pinctrl-single|#pinctrl-single|PowerPC),.*": true
+ "^(pl022|pxa-mmc|rcar_sound|rotary-encoder|s5m8767|sdhci),.*": true
+ "^(simple-audio-card|simple-graph-card|st-plgpio|st-spics|ts),.*": true
+
+ # Keep list in alphabetical order.
+ "^abilis,.*":
+ description: Abilis Systems
+ "^abracon,.*":
+ description: Abracon Corporation
+ "^actions,.*":
+ description: Actions Semiconductor Co., Ltd.
+ "^active-semi,.*":
+ description: Active-Semi International Inc
+ "^ad,.*":
+ description: Avionic Design GmbH
+ "^adafruit,.*":
+ description: Adafruit Industries, LLC
+ "^adapteva,.*":
+ description: Adapteva, Inc.
+ "^adaptrum,.*":
+ description: Adaptrum, Inc.
+ "^adh,.*":
+ description: AD Holdings Plc.
+ "^adi,.*":
+ description: Analog Devices, Inc.
+ "^advantech,.*":
+ description: Advantech Corporation
+ "^aeroflexgaisler,.*":
+ description: Aeroflex Gaisler AB
+ "^al,.*":
+ description: Annapurna Labs
+ "^allo,.*":
+ description: Allo.com
+ "^allwinner,.*":
+ description: Allwinner Technology Co., Ltd.
+ "^alphascale,.*":
+ description: AlphaScale Integrated Circuits Systems, Inc.
+ "^altr,.*":
+ description: Altera Corp.
+ "^amarula,.*":
+ description: Amarula Solutions
+ "^amazon,.*":
+ description: Amazon.com, Inc.
+ "^amcc,.*":
+ description: Applied Micro Circuits Corporation (APM, formally AMCC)
+ "^amd,.*":
+ description: Advanced Micro Devices (AMD), Inc.
+ "^amediatech,.*":
+ description: Shenzhen Amediatech Technology Co., Ltd
+ "^amlogic,.*":
+ description: Amlogic, Inc.
+ "^ampire,.*":
+ description: Ampire Co., Ltd.
+ "^ams,.*":
+ description: AMS AG
+ "^amstaos,.*":
+ description: AMS-Taos Inc.
+ "^analogix,.*":
+ description: Analogix Semiconductor, Inc.
+ "^andestech,.*":
+ description: Andes Technology Corporation
+ "^apm,.*":
+ description: Applied Micro Circuits Corporation (APM)
+ "^aptina,.*":
+ description: Aptina Imaging
+ "^arasan,.*":
+ description: Arasan Chip Systems
+ "^archermind,.*":
+ description: ArcherMind Technology (Nanjing) Co., Ltd.
+ "^arctic,.*":
+ description: Arctic Sand
+ "^arcx,.*":
+ description: arcx Inc. / Archronix Inc.
+ "^aries,.*":
+ description: Aries Embedded GmbH
+ "^arm,.*":
+ description: ARM Ltd.
+ "^armadeus,.*":
+ description: ARMadeus Systems SARL
+ "^arrow,.*":
+ description: Arrow Electronics
+ "^artesyn,.*":
+ description: Artesyn Embedded Technologies Inc.
+ "^asahi-kasei,.*":
+ description: Asahi Kasei Corp.
+ "^aspeed,.*":
+ description: ASPEED Technology Inc.
+ "^asus,.*":
+ description: AsusTek Computer Inc.
+ "^atlas,.*":
+ description: Atlas Scientific LLC
+ "^atmel,.*":
+ description: Atmel Corporation
+ "^auo,.*":
+ description: AU Optronics Corporation
+ "^auvidea,.*":
+ description: Auvidea GmbH
+ "^avago,.*":
+ description: Avago Technologies
+ "^avia,.*":
+ description: avia semiconductor
+ "^avic,.*":
+ description: Shanghai AVIC Optoelectronics Co., Ltd.
+ "^avnet,.*":
+ description: Avnet, Inc.
+ "^axentia,.*":
+ description: Axentia Technologies AB
+ "^axis,.*":
+ description: Axis Communications AB
+ "^azoteq,.*":
+ description: Azoteq (Pty) Ltd
+ "^azw,.*":
+ description: Shenzhen AZW Technology Co., Ltd.
+ "^bananapi,.*":
+ description: BIPAI KEJI LIMITED
+ "^bhf,.*":
+ description: Beckhoff Automation GmbH & Co. KG
+ "^bitmain,.*":
+ description: Bitmain Technologies
+ "^boe,.*":
+ description: BOE Technology Group Co., Ltd.
+ "^bosch,.*":
+ description: Bosch Sensortec GmbH
+ "^boundary,.*":
+ description: Boundary Devices Inc.
+ "^brcm,.*":
+ description: Broadcom Corporation
+ "^buffalo,.*":
+ description: Buffalo, Inc.
+ "^bticino,.*":
+ description: Bticino International
+ "^calxeda,.*":
+ description: Calxeda
+ "^capella,.*":
+ description: Capella Microsystems, Inc
+ "^cascoda,.*":
+ description: Cascoda, Ltd.
+ "^catalyst,.*":
+ description: Catalyst Semiconductor, Inc.
+ "^cavium,.*":
+ description: Cavium, Inc.
+ "^cdns,.*":
+ description: Cadence Design Systems Inc.
+ "^cdtech,.*":
+ description: CDTech(H.K.) Electronics Limited
+ "^ceva,.*":
+ description: Ceva, Inc.
+ "^chipidea,.*":
+ description: Chipidea, Inc
+ "^chipone,.*":
+ description: ChipOne
+ "^chipspark,.*":
+ description: ChipSPARK
+ "^chrp,.*":
+ description: Common Hardware Reference Platform
+ "^chunghwa,.*":
+ description: Chunghwa Picture Tubes Ltd.
+ "^ciaa,.*":
+ description: Computadora Industrial Abierta Argentina
+ "^cirrus,.*":
+ description: Cirrus Logic, Inc.
+ "^cloudengines,.*":
+ description: Cloud Engines, Inc.
+ "^cnm,.*":
+ description: Chips&Media, Inc.
+ "^cnxt,.*":
+ description: Conexant Systems, Inc.
+ "^compulab,.*":
+ description: CompuLab Ltd.
+ "^cortina,.*":
+ description: Cortina Systems, Inc.
+ "^cosmic,.*":
+ description: Cosmic Circuits
+ "^crane,.*":
+ description: Crane Connectivity Solutions
+ "^creative,.*":
+ description: Creative Technology Ltd
+ "^crystalfontz,.*":
+ description: Crystalfontz America, Inc.
+ "^csky,.*":
+ description: Hangzhou C-SKY Microsystems Co., Ltd
+ "^cubietech,.*":
+ description: Cubietech, Ltd.
+ "^cypress,.*":
+ description: Cypress Semiconductor Corporation
+ "^cznic,.*":
+ description: CZ.NIC, z.s.p.o.
+ "^dallas,.*":
+ description: Maxim Integrated Products (formerly Dallas Semiconductor)
+ "^dataimage,.*":
+ description: DataImage, Inc.
+ "^davicom,.*":
+ description: DAVICOM Semiconductor, Inc.
+ "^delta,.*":
+ description: Delta Electronics, Inc.
+ "^denx,.*":
+ description: Denx Software Engineering
+ "^devantech,.*":
+ description: Devantech, Ltd.
+ "^dh,.*":
+ description: DH electronics GmbH
+ "^digi,.*":
+ description: Digi International Inc.
+ "^digilent,.*":
+ description: Diglent, Inc.
+ "^dioo,.*":
+ description: Dioo Microcircuit Co., Ltd
+ "^dlc,.*":
+ description: DLC Display Co., Ltd.
+ "^dlg,.*":
+ description: Dialog Semiconductor
+ "^dlink,.*":
+ description: D-Link Corporation
+ "^dmo,.*":
+ description: Data Modul AG
+ "^domintech,.*":
+ description: Domintech Co., Ltd.
+ "^dongwoon,.*":
+ description: Dongwoon Anatech
+ "^dptechnics,.*":
+ description: DPTechnics
+ "^dragino,.*":
+ description: Dragino Technology Co., Limited
+ "^ea,.*":
+ description: Embedded Artists AB
+ "^ebs-systart,.*":
+ description: EBS-SYSTART GmbH
+ "^ebv,.*":
+ description: EBV Elektronik
+ "^eckelmann,.*":
+ description: Eckelmann AG
+ "^edt,.*":
+ description: Emerging Display Technologies
+ "^eeti,.*":
+ description: eGalax_eMPIA Technology Inc
+ "^elan,.*":
+ description: Elan Microelectronic Corp.
+ "^elgin,.*":
+ description: Elgin S/A.
+ "^embest,.*":
+ description: Shenzhen Embest Technology Co., Ltd.
+ "^emlid,.*":
+ description: Emlid, Ltd.
+ "^emmicro,.*":
+ description: EM Microelectronic
+ "^emtrion,.*":
+ description: emtrion GmbH
+ "^endless,.*":
+ description: Endless Mobile, Inc.
+ "^energymicro,.*":
+ description: Silicon Laboratories (formerly Energy Micro AS)
+ "^engicam,.*":
+ description: Engicam S.r.l.
+ "^epcos,.*":
+ description: EPCOS AG
+ "^epfl,.*":
+ description: Ecole Polytechnique Fédérale de Lausanne
+ "^epson,.*":
+ description: Seiko Epson Corp.
+ "^est,.*":
+ description: ESTeem Wireless Modems
+ "^ettus,.*":
+ description: NI Ettus Research
+ "^eukrea,.*":
+ description: Eukréa Electromatique
+ "^everest,.*":
+ description: Everest Semiconductor Co. Ltd.
+ "^everspin,.*":
+ description: Everspin Technologies, Inc.
+ "^exar,.*":
+ description: Exar Corporation
+ "^excito,.*":
+ description: Excito
+ "^ezchip,.*":
+ description: EZchip Semiconductor
+ "^facebook,.*":
+ description: Facebook
+ "^fairphone,.*":
+ description: Fairphone B.V.
+ "^faraday,.*":
+ description: Faraday Technology Corporation
+ "^fastrax,.*":
+ description: Fastrax Oy
+ "^fcs,.*":
+ description: Fairchild Semiconductor
+ "^feiyang,.*":
+ description: Shenzhen Fly Young Technology Co.,LTD.
+ "^firefly,.*":
+ description: Firefly
+ "^focaltech,.*":
+ description: FocalTech Systems Co.,Ltd
+ "^friendlyarm,.*":
+ description: Guangzhou FriendlyARM Computer Tech Co., Ltd
+ "^fsl,.*":
+ description: Freescale Semiconductor
+ "^fujitsu,.*":
+ description: Fujitsu Ltd.
+ "^gateworks,.*":
+ description: Gateworks Corporation
+ "^gcw,.*":
+ description: Game Consoles Worldwide
+ "^ge,.*":
+ description: General Electric Company
+ "^geekbuying,.*":
+ description: GeekBuying
+ "^gef,.*":
+ description: GE Fanuc Intelligent Platforms Embedded Systems, Inc.
+ "^GEFanuc,.*":
+ description: GE Fanuc Intelligent Platforms Embedded Systems, Inc.
+ "^geniatech,.*":
+ description: Geniatech, Inc.
+ "^giantec,.*":
+ description: Giantec Semiconductor, Inc.
+ "^giantplus,.*":
+ description: Giantplus Technology Co., Ltd.
+ "^globalscale,.*":
+ description: Globalscale Technologies, Inc.
+ "^globaltop,.*":
+ description: GlobalTop Technology, Inc.
+ "^gmt,.*":
+ description: Global Mixed-mode Technology, Inc.
+ "^goodix,.*":
+ description: Shenzhen Huiding Technology Co., Ltd.
+ "^google,.*":
+ description: Google, Inc.
+ "^grinn,.*":
+ description: Grinn
+ "^grmn,.*":
+ description: Garmin Limited
+ "^gumstix,.*":
+ description: Gumstix, Inc.
+ "^gw,.*":
+ description: Gateworks Corporation
+ "^hannstar,.*":
+ description: HannStar Display Corporation
+ "^haoyu,.*":
+ description: Haoyu Microelectronic Co. Ltd.
+ "^hardkernel,.*":
+ description: Hardkernel Co., Ltd
+ "^hideep,.*":
+ description: HiDeep Inc.
+ "^himax,.*":
+ description: Himax Technologies, Inc.
+ "^hisilicon,.*":
+ description: Hisilicon Limited.
+ "^hit,.*":
+ description: Hitachi Ltd.
+ "^hitex,.*":
+ description: Hitex Development Tools
+ "^holt,.*":
+ description: Holt Integrated Circuits, Inc.
+ "^honeywell,.*":
+ description: Honeywell
+ "^hp,.*":
+ description: Hewlett Packard
+ "^holtek,.*":
+ description: Holtek Semiconductor, Inc.
+ "^hwacom,.*":
+ description: HwaCom Systems Inc.
+ "^i2se,.*":
+ description: I2SE GmbH
+ "^ibm,.*":
+ description: International Business Machines (IBM)
+ "^icplus,.*":
+ description: IC Plus Corp.
+ "^idt,.*":
+ description: Integrated Device Technologies, Inc.
+ "^ifi,.*":
+ description: Ingenieurburo Fur Ic-Technologie (I/F/I)
+ "^ilitek,.*":
+ description: ILI Technology Corporation (ILITEK)
+ "^img,.*":
+ description: Imagination Technologies Ltd.
+ "^infineon,.*":
+ description: Infineon Technologies
+ "^inforce,.*":
+ description: Inforce Computing
+ "^ingenic,.*":
+ description: Ingenic Semiconductor
+ "^innolux,.*":
+ description: Innolux Corporation
+ "^inside-secure,.*":
+ description: INSIDE Secure
+ "^intel,.*":
+ description: Intel Corporation
+ "^intercontrol,.*":
+ description: Inter Control Group
+ "^invensense,.*":
+ description: InvenSense Inc.
+ "^inversepath,.*":
+ description: Inverse Path
+ "^iom,.*":
+ description: Iomega Corporation
+ "^isee,.*":
+ description: ISEE 2007 S.L.
+ "^isil,.*":
+ description: Intersil
+ "^issi,.*":
+ description: Integrated Silicon Solutions Inc.
+ "^itead,.*":
+ description: ITEAD Intelligent Systems Co.Ltd
+ "^iwave,.*":
+ description: iWave Systems Technologies Pvt. Ltd.
+ "^jdi,.*":
+ description: Japan Display Inc.
+ "^jedec,.*":
+ description: JEDEC Solid State Technology Association
+ "^jianda,.*":
+ description: Jiandangjing Technology Co., Ltd.
+ "^karo,.*":
+ description: Ka-Ro electronics GmbH
+ "^keithkoep,.*":
+ description: Keith & Koep GmbH
+ "^keymile,.*":
+ description: Keymile GmbH
+ "^khadas,.*":
+ description: Khadas
+ "^kiebackpeter,.*":
+ description: Kieback & Peter GmbH
+ "^kinetic,.*":
+ description: Kinetic Technologies
+ "^kingdisplay,.*":
+ description: King & Display Technology Co., Ltd.
+ "^kingnovel,.*":
+ description: Kingnovel Technology Co., Ltd.
+ "^kionix,.*":
+ description: Kionix, Inc.
+ "^kobo,.*":
+ description: Rakuten Kobo Inc.
+ "^koe,.*":
+ description: Kaohsiung Opto-Electronics Inc.
+ "^kosagi,.*":
+ description: Sutajio Ko-Usagi PTE Ltd.
+ "^kyo,.*":
+ description: Kyocera Corporation
+ "^lacie,.*":
+ description: LaCie
+ "^laird,.*":
+ description: Laird PLC
+ "^lantiq,.*":
+ description: Lantiq Semiconductor
+ "^lattice,.*":
+ description: Lattice Semiconductor
+ "^lego,.*":
+ description: LEGO Systems A/S
+ "^lemaker,.*":
+ description: Shenzhen LeMaker Technology Co., Ltd.
+ "^lenovo,.*":
+ description: Lenovo Group Ltd.
+ "^lg,.*":
+ description: LG Corporation
+ "^libretech,.*":
+ description: Shenzhen Libre Technology Co., Ltd
+ "^licheepi,.*":
+ description: Lichee Pi
+ "^linaro,.*":
+ description: Linaro Limited
+ "^linksys,.*":
+ description: Belkin International, Inc. (Linksys)
+ "^linux,.*":
+ description: Linux-specific binding
+ "^linx,.*":
+ description: Linx Technologies
+ "^lltc,.*":
+ description: Linear Technology Corporation
+ "^logicpd,.*":
+ description: Logic PD, Inc.
+ "^lsi,.*":
+ description: LSI Corp. (LSI Logic)
+ "^lwn,.*":
+ description: Liebherr-Werk Nenzing GmbH
+ "^macnica,.*":
+ description: Macnica Americas
+ "^marvell,.*":
+ description: Marvell Technology Group Ltd.
+ "^maxbotix,.*":
+ description: MaxBotix Inc.
+ "^maxim,.*":
+ description: Maxim Integrated Products
+ "^mbvl,.*":
+ description: Mobiveil Inc.
+ "^mcube,.*":
+ description: mCube
+ "^meas,.*":
+ description: Measurement Specialties
+ "^mediatek,.*":
+ description: MediaTek Inc.
+ "^megachips,.*":
+ description: MegaChips
+ "^mele,.*":
+ description: Shenzhen MeLE Digital Technology Ltd.
+ "^melexis,.*":
+ description: Melexis N.V.
+ "^melfas,.*":
+ description: MELFAS Inc.
+ "^mellanox,.*":
+ description: Mellanox Technologies
+ "^memsic,.*":
+ description: MEMSIC Inc.
+ "^menlo,.*":
+ description: Menlo Systems GmbH
+ "^merrii,.*":
+ description: Merrii Technology Co., Ltd.
+ "^micrel,.*":
+ description: Micrel Inc.
+ "^microchip,.*":
+ description: Microchip Technology Inc.
+ "^microcrystal,.*":
+ description: Micro Crystal AG
+ "^micron,.*":
+ description: Micron Technology Inc.
+ "^mikroe,.*":
+ description: MikroElektronika d.o.o.
+ "^minix,.*":
+ description: MINIX Technology Ltd.
+ "^miramems,.*":
+ description: MiraMEMS Sensing Technology Co., Ltd.
+ "^mitsubishi,.*":
+ description: Mitsubishi Electric Corporation
+ "^mosaixtech,.*":
+ description: Mosaix Technologies, Inc.
+ "^motorola,.*":
+ description: Motorola, Inc.
+ "^moxa,.*":
+ description: Moxa Inc.
+ "^mpl,.*":
+ description: MPL AG
+ "^mqmaker,.*":
+ description: mqmaker Inc.
+ "^mscc,.*":
+ description: Microsemi Corporation
+ "^msi,.*":
+ description: Micro-Star International Co. Ltd.
+ "^mti,.*":
+ description: Imagination Technologies Ltd. (formerly MIPS Technologies Inc.)
+ "^multi-inno,.*":
+ description: Multi-Inno Technology Co.,Ltd
+ "^mundoreader,.*":
+ description: Mundo Reader S.L.
+ "^murata,.*":
+ description: Murata Manufacturing Co., Ltd.
+ "^mxicy,.*":
+ description: Macronix International Co., Ltd.
+ "^myir,.*":
+ description: MYIR Tech Limited
+ "^national,.*":
+ description: National Semiconductor
+ "^nec,.*":
+ description: NEC LCD Technologies, Ltd.
+ "^neonode,.*":
+ description: Neonode Inc.
+ "^netgear,.*":
+ description: NETGEAR
+ "^netlogic,.*":
+ description: Broadcom Corporation (formerly NetLogic Microsystems)
+ "^netron-dy,.*":
+ description: Netron DY
+ "^netxeon,.*":
+ description: Shenzhen Netxeon Technology CO., LTD
+ "^nexbox,.*":
+ description: Nexbox
+ "^nextthing,.*":
+ description: Next Thing Co.
+ "^newhaven,.*":
+ description: Newhaven Display International
+ "^ni,.*":
+ description: National Instruments
+ "^nintendo,.*":
+ description: Nintendo
+ "^nlt,.*":
+ description: NLT Technologies, Ltd.
+ "^nokia,.*":
+ description: Nokia
+ "^nordic,.*":
+ description: Nordic Semiconductor
+ "^novtech,.*":
+ description: NovTech, Inc.
+ "^nutsboard,.*":
+ description: NutsBoard
+ "^nuvoton,.*":
+ description: Nuvoton Technology Corporation
+ "^nvd,.*":
+ description: New Vision Display
+ "^nvidia,.*":
+ description: NVIDIA
+ "^nxp,.*":
+ description: NXP Semiconductors
+ "^oceanic,.*":
+ description: Oceanic Systems (UK) Ltd.
+ "^okaya,.*":
+ description: Okaya Electric America, Inc.
+ "^oki,.*":
+ description: Oki Electric Industry Co., Ltd.
+ "^olimex,.*":
+ description: OLIMEX Ltd.
+ "^olpc,.*":
+ description: One Laptop Per Child
+ "^onion,.*":
+ description: Onion Corporation
+ "^onnn,.*":
+ description: ON Semiconductor Corp.
+ "^ontat,.*":
+ description: On Tat Industrial Company
+ "^opalkelly,.*":
+ description: Opal Kelly Incorporated
+ "^opencores,.*":
+ description: OpenCores.org
+ "^openrisc,.*":
+ description: OpenRISC.io
+ "^option,.*":
+ description: Option NV
+ "^oranth,.*":
+ description: Shenzhen Oranth Technology Co., Ltd.
+ "^ORCL,.*":
+ description: Oracle Corporation
+ "^orisetech,.*":
+ description: Orise Technology
+ "^ortustech,.*":
+ description: Ortus Technology Co., Ltd.
+ "^osddisplays,.*":
+ description: OSD Displays
+ "^ovti,.*":
+ description: OmniVision Technologies
+ "^oxsemi,.*":
+ description: Oxford Semiconductor, Ltd.
+ "^panasonic,.*":
+ description: Panasonic Corporation
+ "^parade,.*":
+ description: Parade Technologies Inc.
+ "^pda,.*":
+ description: Precision Design Associates, Inc.
+ "^pericom,.*":
+ description: Pericom Technology Inc.
+ "^pervasive,.*":
+ description: Pervasive Displays, Inc.
+ "^phicomm,.*":
+ description: PHICOMM Co., Ltd.
+ "^phytec,.*":
+ description: PHYTEC Messtechnik GmbH
+ "^picochip,.*":
+ description: Picochip Ltd
+ "^pine64,.*":
+ description: Pine64
+ "^pixcir,.*":
+ description: PIXCIR MICROELECTRONICS Co., Ltd
+ "^plantower,.*":
+ description: Plantower Co., Ltd
+ "^plathome,.*":
+ description: Plat'Home Co., Ltd.
+ "^plda,.*":
+ description: PLDA
+ "^plx,.*":
+ description: Broadcom Corporation (formerly PLX Technology)
+ "^pni,.*":
+ description: PNI Sensor Corporation
+ "^portwell,.*":
+ description: Portwell Inc.
+ "^poslab,.*":
+ description: Poslab Technology Co., Ltd.
+ "^powervr,.*":
+ description: PowerVR (deprecated, use img)
+ "^probox2,.*":
+ description: PROBOX2 (by W2COMP Co., Ltd.)
+ "^pulsedlight,.*":
+ description: PulsedLight, Inc
+ "^qca,.*":
+ description: Qualcomm Atheros, Inc.
+ "^qcom,.*":
+ description: Qualcomm Technologies, Inc
+ "^qemu,.*":
+ description: QEMU, a generic and open source machine emulator and virtualizer
+ "^qi,.*":
+ description: Qi Hardware
+ "^qiaodian,.*":
+ description: QiaoDian XianShi Corporation
+ "^qnap,.*":
+ description: QNAP Systems, Inc.
+ "^radxa,.*":
+ description: Radxa
+ "^raidsonic,.*":
+ description: RaidSonic Technology GmbH
+ "^ralink,.*":
+ description: Mediatek/Ralink Technology Corp.
+ "^ramtron,.*":
+ description: Ramtron International
+ "^raspberrypi,.*":
+ description: Raspberry Pi Foundation
+ "^raydium,.*":
+ description: Raydium Semiconductor Corp.
+ "^rda,.*":
+ description: Unisoc Communications, Inc.
+ "^realtek,.*":
+ description: Realtek Semiconductor Corp.
+ "^renesas,.*":
+ description: Renesas Electronics Corporation
+ "^richtek,.*":
+ description: Richtek Technology Corporation
+ "^ricoh,.*":
+ description: Ricoh Co. Ltd.
+ "^rikomagic,.*":
+ description: Rikomagic Tech Corp. Ltd
+ "^riscv,.*":
+ description: RISC-V Foundation
+ "^rockchip,.*":
+ description: Fuzhou Rockchip Electronics Co., Ltd
+ "^rocktech,.*":
+ description: ROCKTECH DISPLAYS LIMITED
+ "^rohm,.*":
+ description: ROHM Semiconductor Co., Ltd
+ "^ronbo,.*":
+ description: Ronbo Electronics
+ "^roofull,.*":
+ description: Shenzhen Roofull Technology Co, Ltd
+ "^samsung,.*":
+ description: Samsung Semiconductor
+ "^samtec,.*":
+ description: Samtec/Softing company
+ "^sancloud,.*":
+ description: Sancloud Ltd
+ "^sandisk,.*":
+ description: Sandisk Corporation
+ "^sbs,.*":
+ description: Smart Battery System
+ "^schindler,.*":
+ description: Schindler
+ "^seagate,.*":
+ description: Seagate Technology PLC
+ "^seirobotics,.*":
+ description: Shenzhen SEI Robotics Co., Ltd
+ "^semtech,.*":
+ description: Semtech Corporation
+ "^sensirion,.*":
+ description: Sensirion AG
+ "^sff,.*":
+ description: Small Form Factor Committee
+ "^sgd,.*":
+ description: Solomon Goldentek Display Corporation
+ "^sgx,.*":
+ description: SGX Sensortech
+ "^sharp,.*":
+ description: Sharp Corporation
+ "^shimafuji,.*":
+ description: Shimafuji Electric, Inc.
+ "^si-en,.*":
+ description: Si-En Technology Ltd.
+ "^si-linux,.*":
+ description: Silicon Linux Corporation
+ "^sifive,.*":
+ description: SiFive, Inc.
+ "^sigma,.*":
+ description: Sigma Designs, Inc.
+ "^sii,.*":
+ description: Seiko Instruments, Inc.
+ "^sil,.*":
+ description: Silicon Image
+ "^silabs,.*":
+ description: Silicon Laboratories
+ "^silead,.*":
+ description: Silead Inc.
+ "^silergy,.*":
+ description: Silergy Corp.
+ "^siliconmitus,.*":
+ description: Silicon Mitus, Inc.
+ "^simte,.*":
+ description: k
+ "^sirf,.*":
+ description: SiRF Technology, Inc.
+ "^sis,.*":
+ description: Silicon Integrated Systems Corp.
+ "^sitronix,.*":
+ description: Sitronix Technology Corporation
+ "^skyworks,.*":
+ description: Skyworks Solutions, Inc.
+ "^smsc,.*":
+ description: Standard Microsystems Corporation
+ "^snps,.*":
+ description: Synopsys, Inc.
+ "^socionext,.*":
+ description: Socionext Inc.
+ "^solidrun,.*":
+ description: SolidRun
+ "^solomon,.*":
+ description: Solomon Systech Limited
+ "^sony,.*":
+ description: Sony Corporation
+ "^spansion,.*":
+ description: Spansion Inc.
+ "^sprd,.*":
+ description: Spreadtrum Communications Inc.
+ "^sst,.*":
+ description: Silicon Storage Technology, Inc.
+ "^st,.*":
+ description: STMicroelectronics
+ "^starry,.*":
+ description: Starry Electronic Technology (ShenZhen) Co., LTD
+ "^startek,.*":
+ description: Startek
+ "^ste,.*":
+ description: ST-Ericsson
+ "^stericsson,.*":
+ description: ST-Ericsson
+ "^summit,.*":
+ description: Summit microelectronics
+ "^sunchip,.*":
+ description: Shenzhen Sunchip Technology Co., Ltd
+ "^SUNW,.*":
+ description: Sun Microsystems, Inc
+ "^swir,.*":
+ description: Sierra Wireless
+ "^syna,.*":
+ description: Synaptics Inc.
+ "^synology,.*":
+ description: Synology, Inc.
+ "^tbs,.*":
+ description: TBS Technologies
+ "^tbs-biometrics,.*":
+ description: Touchless Biometric Systems AG
+ "^tcg,.*":
+ description: Trusted Computing Group
+ "^tcl,.*":
+ description: Toby Churchill Ltd.
+ "^technexion,.*":
+ description: TechNexion
+ "^technologic,.*":
+ description: Technologic Systems
+ "^tempo,.*":
+ description: Tempo Semiconductor
+ "^techstar,.*":
+ description: Shenzhen Techstar Electronics Co., Ltd.
+ "^terasic,.*":
+ description: Terasic Inc.
+ "^thine,.*":
+ description: THine Electronics, Inc.
+ "^ti,.*":
+ description: Texas Instruments
+ "^tianma,.*":
+ description: Tianma Micro-electronics Co., Ltd.
+ "^tlm,.*":
+ description: Trusted Logic Mobility
+ "^tmt,.*":
+ description: Tecon Microprocessor Technologies, LLC.
+ "^topeet,.*":
+ description: Topeet
+ "^toradex,.*":
+ description: Toradex AG
+ "^toshiba,.*":
+ description: Toshiba Corporation
+ "^toumaz,.*":
+ description: Toumaz
+ "^tpk,.*":
+ description: TPK U.S.A. LLC
+ "^tplink,.*":
+ description: TP-LINK Technologies Co., Ltd.
+ "^tpo,.*":
+ description: TPO
+ "^tq,.*":
+ description: TQ Systems GmbH
+ "^tronfy,.*":
+ description: Tronfy
+ "^tronsmart,.*":
+ description: Tronsmart
+ "^truly,.*":
+ description: Truly Semiconductors Limited
+ "^tsd,.*":
+ description: Theobroma Systems Design und Consulting GmbH
+ "^tyan,.*":
+ description: Tyan Computer Corporation
+ "^u-blox,.*":
+ description: u-blox
+ "^ucrobotics,.*":
+ description: uCRobotics
+ "^ubnt,.*":
+ description: Ubiquiti Networks
+ "^udoo,.*":
+ description: Udoo
+ "^uniwest,.*":
+ description: United Western Technologies Corp (UniWest)
+ "^upisemi,.*":
+ description: uPI Semiconductor Corp.
+ "^urt,.*":
+ description: United Radiant Technology Corporation
+ "^usi,.*":
+ description: Universal Scientific Industrial Co., Ltd.
+ "^v3,.*":
+ description: V3 Semiconductor
+ "^vamrs,.*":
+ description: Vamrs Ltd.
+ "^variscite,.*":
+ description: Variscite Ltd.
+ "^via,.*":
+ description: VIA Technologies, Inc.
+ "^virtio,.*":
+ description: Virtual I/O Device Specification, developed by the OASIS consortium
+ "^vishay,.*":
+ description: Vishay Intertechnology, Inc
+ "^vitesse,.*":
+ description: Vitesse Semiconductor Corporation
+ "^vivante,.*":
+ description: Vivante Corporation
+ "^vocore,.*":
+ description: VoCore Studio
+ "^voipac,.*":
+ description: Voipac Technologies s.r.o.
+ "^vot,.*":
+ description: Vision Optical Technology Co., Ltd.
+ "^wd,.*":
+ description: Western Digital Corp.
+ "^wetek,.*":
+ description: WeTek Electronics, limited.
+ "^wexler,.*":
+ description: Wexler
+ "^whwave,.*":
+ description: Shenzhen whwave Electronics, Inc.
+ "^wi2wi,.*":
+ description: Wi2Wi, Inc.
+ "^winbond,.*":
+ description: Winbond Electronics corp.
+ "^winstar,.*":
+ description: Winstar Display Corp.
+ "^wlf,.*":
+ description: Wolfson Microelectronics
+ "^wm,.*":
+ description: Wondermedia Technologies, Inc.
+ "^x-powers,.*":
+ description: X-Powers
+ "^xes,.*":
+ description: Extreme Engineering Solutions (X-ES)
+ "^xillybus,.*":
+ description: Xillybus Ltd.
+ "^xlnx,.*":
+ description: Xilinx
+ "^xunlong,.*":
+ description: Shenzhen Xunlong Software CO.,Limited
+ "^ysoft,.*":
+ description: Y Soft Corporation a.s.
+ "^zarlink,.*":
+ description: Zarlink Semiconductor
+ "^zeitec,.*":
+ description: ZEITEC Semiconductor Co., LTD.
+ "^zidoo,.*":
+ description: Shenzhen Zidoo Technology Co., Ltd.
+ "^zii,.*":
+ description: Zodiac Inflight Innovations
+ "^zte,.*":
+ description: ZTE Corp.
+ "^zyxel,.*":
+ description: ZyXEL Communications Corp.
+
+ # Normal property name match without a comma
+ # These should catch all node/property names without a prefix
+ "^[a-zA-Z0-9#][a-zA-Z0-9+\\-._@]{0,63}$": true
+ "^[a-zA-Z0-9+\\-._]*@[0-9a-zA-Z,]*$": true
+ "^#.*": true
+
+additionalProperties: false
+
+...
* - ``V4L2_FIELD_ANY``
- 0
- - Applications request this field order when any one of the
- ``V4L2_FIELD_NONE``, ``V4L2_FIELD_TOP``, ``V4L2_FIELD_BOTTOM``, or
- ``V4L2_FIELD_INTERLACED`` formats is acceptable. Drivers choose
- depending on hardware capabilities or e. g. the requested image
- size, and return the actual field order. Drivers must never return
- ``V4L2_FIELD_ANY``. If multiple field orders are possible the
+ - Applications request this field order when any field format
+ is acceptable. Drivers choose depending on hardware capabilities or
+ e.g. the requested image size, and return the actual field order.
+ Drivers must never return ``V4L2_FIELD_ANY``.
+ If multiple field orders are possible the
driver must choose one of the possible field orders during
:ref:`VIDIOC_S_FMT <VIDIOC_G_FMT>` or
:ref:`VIDIOC_TRY_FMT <VIDIOC_G_FMT>`. struct
``V4L2_FIELD_ANY``.
* - ``V4L2_FIELD_NONE``
- 1
- - Images are in progressive format, not interlaced. The driver may
- also indicate this order when it cannot distinguish between
- ``V4L2_FIELD_TOP`` and ``V4L2_FIELD_BOTTOM``.
+ - Images are in progressive (frame-based) format, not interlaced
+ (field-based).
* - ``V4L2_FIELD_TOP``
- 2
- Images consist of the top (aka odd) field only.
s64 tx_total_len,
gfp_t gfp,
rxrpc_notify_rx_t notify_rx,
- bool upgrade);
+ bool upgrade,
+ bool intr,
+ unsigned int debug_id);
This allocates the infrastructure to make a new RxRPC call and assigns
call and connection numbers. The call will be made on the UDP port that
the server upgrade the service to a better one. The resultant service ID
is returned by rxrpc_kernel_recv_data().
+ intr should be set to true if the call should be interruptible. If this
+ is not set, this function may not return until a channel has been
+ allocated; if it is set, the function may return -ERESTARTSYS.
+
+ debug_id is the call debugging ID to be used for tracing. This can be
+ obtained by atomically incrementing rxrpc_debug_id.
+
If this function is successful, an opaque reference to the RxRPC call is
returned. The caller now holds a reference on this and it must be
properly ended.
This value can be used to determine if the remote client has been
restarted as it shouldn't change otherwise.
+ (*) Set the maxmimum lifespan on a call.
+
+ void rxrpc_kernel_set_max_life(struct socket *sock,
+ struct rxrpc_call *call,
+ unsigned long hard_timeout)
+
+ This sets the maximum lifespan on a call to hard_timeout (which is in
+ jiffies). In the event of the timeout occurring, the call will be
+ aborted and -ETIME or -ETIMEDOUT will be returned.
+
=======================
CONFIGURABLE PARAMETERS
mds_user_clear.
The mitigation is invoked in prepare_exit_to_usermode() which covers
- most of the kernel to user space transitions. There are a few exceptions
- which are not invoking prepare_exit_to_usermode() on return to user
- space. These exceptions use the paranoid exit code.
+ all but one of the kernel to user space transitions. The exception
+ is when we return from a Non Maskable Interrupt (NMI), which is
+ handled directly in do_nmi().
- - Non Maskable Interrupt (NMI):
-
- Access to sensible data like keys, credentials in the NMI context is
- mostly theoretical: The CPU can do prefetching or execute a
- misspeculated code path and thereby fetching data which might end up
- leaking through a buffer.
-
- But for mounting other attacks the kernel stack address of the task is
- already valuable information. So in full mitigation mode, the NMI is
- mitigated on the return from do_nmi() to provide almost complete
- coverage.
-
- - Double fault (#DF):
-
- A double fault is usually fatal, but the ESPFIX workaround, which can
- be triggered from user space through modify_ldt(2) is a recoverable
- double fault. #DF uses the paranoid exit path, so explicit mitigation
- in the double fault handler is required.
-
- - Machine Check Exception (#MC):
-
- Another corner case is a #MC which hits between the CPU buffer clear
- invocation and the actual return to user. As this still is in kernel
- space it takes the paranoid exit path which does not clear the CPU
- buffers. So the #MC handler repopulates the buffers to some
- extent. Machine checks are not reliably controllable and the window is
- extremly small so mitigation would just tick a checkbox that this
- theoretical corner case is covered. To keep the amount of special
- cases small, ignore #MC.
-
- - Debug Exception (#DB):
-
- This takes the paranoid exit path only when the INT1 breakpoint is in
- kernel space. #DB on a user space address takes the regular exit path,
- so no extra mitigation required.
+ (The reason that NMI is special is that prepare_exit_to_usermode() can
+ enable IRQs. In NMI context, NMIs are blocked, and we don't want to
+ enable IRQs with NMIs blocked.)
2. C-State transition
bool
config 64BIT_TIME
- def_bool ARCH_HAS_64BIT_TIME
+ def_bool y
help
This should be selected by all architectures that need to support
new system calls with a 64-bit time_t. This is relevant on all 32-bit
+++ /dev/null
-#ifndef __ALPHA_SEGMENT_H
-#define __ALPHA_SEGMENT_H
-
-/* Only here because we have some old header files that expect it.. */
-
-#endif
#include <asm/hwrpb.h>
#include <asm/io.h>
-#include <asm/segment.h>
#if 0
# define DBG_DEVS(args) printk args
#include <asm/hwrpb.h>
#include <asm/io.h>
-#include <asm/segment.h>
#define SMC_DEBUG 0
#endif
+#include <asm/segment.h>
#include <asm-generic/uaccess.h>
#endif
CONFIG_SPI_SPIDEV=y
CONFIG_GPIO_SYSFS=y
CONFIG_SENSORS_LM75=y
-CONFIG_THERMAL=m
+CONFIG_THERMAL=y
CONFIG_WATCHDOG=y
CONFIG_S3C2410_WATCHDOG=y
CONFIG_FB=y
CONFIG_SENSORS_LM90=m
CONFIG_SENSORS_LM95245=m
CONFIG_SENSORS_NTC_THERMISTOR=m
-CONFIG_THERMAL=m
+CONFIG_THERMAL=y
CONFIG_WATCHDOG=y
CONFIG_XILINX_WATCHDOG=m
CONFIG_SA1100_WATCHDOG=m
generic-y += parport.h
generic-y += preempt.h
generic-y += seccomp.h
-generic-y += segment.h
generic-y += serial.h
generic-y += simd.h
generic-y += trace_clock.h
generic-y += msi.h
generic-y += qrwlock.h
generic-y += qspinlock.h
-generic-y += segment.h
generic-y += serial.h
generic-y += set_memory.h
generic-y += switch_to.h
generic-y += percpu.h
generic-y += pgalloc.h
generic-y += preempt.h
-generic-y += segment.h
generic-y += serial.h
generic-y += shmparam.h
generic-y += tlbflush.h
select HAVE_ARCH_KGDB
select HAVE_ARCH_HASH
select CPU_NO_EFFICIENT_FFS
+ select UACCESS_MEMCPY
config CPU_BIG_ENDIAN
def_bool y
generic-y += tlbflush.h
generic-y += topology.h
generic-y += trace_clock.h
+generic-y += uaccess.h
generic-y += unaligned.h
generic-y += vga.h
generic-y += word-at-a-time.h
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_UACCESS_H
-#define _ASM_UACCESS_H
-
-#include <linux/string.h>
-
-static inline __must_check unsigned long
-raw_copy_from_user(void *to, const void __user * from, unsigned long n)
-{
- if (__builtin_constant_p(n)) {
- switch(n) {
- case 1:
- *(u8 *)to = *(u8 __force *)from;
- return 0;
- case 2:
- *(u16 *)to = *(u16 __force *)from;
- return 0;
- case 4:
- *(u32 *)to = *(u32 __force *)from;
- return 0;
- }
- }
-
- memcpy(to, (const void __force *)from, n);
- return 0;
-}
-
-static inline __must_check unsigned long
-raw_copy_to_user(void __user *to, const void *from, unsigned long n)
-{
- if (__builtin_constant_p(n)) {
- switch(n) {
- case 1:
- *(u8 __force *)to = *(u8 *)from;
- return 0;
- case 2:
- *(u16 __force *)to = *(u16 *)from;
- return 0;
- case 4:
- *(u32 __force *)to = *(u32 *)from;
- return 0;
- default:
- break;
- }
- }
-
- memcpy((void __force *)to, from, n);
- return 0;
-}
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
-#include <asm-generic/uaccess.h>
-
-#endif
generic-y += percpu.h
generic-y += preempt.h
generic-y += sections.h
-generic-y += segment.h
generic-y += serial.h
generic-y += shmparam.h
generic-y += topology.h
* User space memory access functions
*/
#include <linux/mm.h>
-#include <asm/segment.h>
#include <asm/sections.h>
/*
+++ /dev/null
-#ifndef _ASM_IA64_SEGMENT_H
-#define _ASM_IA64_SEGMENT_H
-
-/* Only here because we have some old header files that expect it.. */
-
-#endif /* _ASM_IA64_SEGMENT_H */
# CONFIG_HW_RANDOM is not set
CONFIG_RAW_DRIVER=m
# CONFIG_HWMON is not set
-CONFIG_THERMAL=m
+CONFIG_THERMAL=y
CONFIG_WATCHDOG=y
CONFIG_INDYDOG=m
# CONFIG_VGA_CONSOLE is not set
CONFIG_I2C_TAOS_EVM=m
CONFIG_I2C_STUB=m
# CONFIG_HWMON is not set
-CONFIG_THERMAL=m
+CONFIG_THERMAL=y
CONFIG_MFD_PCF50633=m
CONFIG_PCF50633_ADC=m
CONFIG_PCF50633_GPIO=m
generic-y += qrwlock.h
generic-y += qspinlock.h
generic-y += sections.h
-generic-y += segment.h
generic-y += trace_clock.h
generic-y += unaligned.h
generic-y += user.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += sections.h
-generic-y += segment.h
generic-y += serial.h
generic-y += switch_to.h
generic-y += timex.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += sections.h
-generic-y += segment.h
generic-y += serial.h
generic-y += spinlock.h
generic-y += topology.h
generic-y += qrwlock_types.h
generic-y += qrwlock.h
generic-y += sections.h
-generic-y += segment.h
generic-y += shmparam.h
generic-y += switch_to.h
generic-y += topology.h
#include <linux/elf.h>
#include <asm/thread_info.h>
-#include <asm/segment.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <linux/device.h>
#include <asm/sections.h>
-#include <asm/segment.h>
#include <asm/pgtable.h>
#include <asm/types.h>
#include <asm/setup.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
-#include <asm/segment.h>
#include <asm/io.h>
#include <asm/pgtable.h>
#include <asm/unwinder.h>
#include <linux/blkdev.h> /* for initrd_* */
#include <linux/pagemap.h>
-#include <asm/segment.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/dma.h>
#include <linux/mm.h>
#include <linux/init.h>
-#include <asm/segment.h>
#include <asm/tlbflush.h>
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
generic-y += percpu.h
generic-y += preempt.h
generic-y += seccomp.h
-generic-y += segment.h
generic-y += trace_clock.h
generic-y += user.h
generic-y += vga.h
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_SEGMENT_H
-#define _ASM_SEGMENT_H
-
-#endif
#include <linux/seccomp.h>
#include <linux/compat.h>
#include <trace/syscall.h>
-#include <asm/segment.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
# Hardware Monitoring support
#CONFIG_SENSORS_LM75=m
# Generic Thermal sysfs driver
-#CONFIG_THERMAL=m
+#CONFIG_THERMAL=y
#CONFIG_THERMAL_HWMON=y
# Multimedia support
generic-y += percpu.h
generic-y += preempt.h
generic-y += sections.h
-generic-y += segment.h
generic-y += serial.h
generic-y += shmparam.h
generic-y += syscalls.h
config GENERIC_BUG_RELATIVE_POINTERS
bool
-config GENERIC_HWEIGHT
- def_bool y
-
config ARCH_MAY_HAVE_PC_FDC
def_bool y
depends on ISA_DMA_API
enum {
sym_vvar_start,
sym_vvar_page,
- sym_hpet_page,
sym_pvclock_page,
sym_hvclock_page,
};
const int special_pages[] = {
sym_vvar_page,
- sym_hpet_page,
sym_pvclock_page,
sym_hvclock_page,
};
struct vdso_sym required_syms[] = {
[sym_vvar_start] = {"vvar_start", true},
[sym_vvar_page] = {"vvar_page", true},
- [sym_hpet_page] = {"hpet_page", true},
[sym_pvclock_page] = {"pvclock_page", true},
[sym_hvclock_page] = {"hvclock_page", true},
{"VDSO32_NOTE_MASK", true},
return 0;
}
-const struct attribute_group *amd_iommu_attr_groups[] = {
+static const struct attribute_group *amd_iommu_attr_groups[] = {
&amd_iommu_format_group,
&amd_iommu_cpumask_group,
&amd_iommu_events_group,
return ret;
if (event->attr.precise_ip) {
- if (!(event->attr.freq || event->attr.wakeup_events)) {
+ if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) {
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
if (!(event->attr.sample_type &
~intel_pmu_large_pebs_flags(event)))
/* Event constraint, but match on all event flags too. */
#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
- EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+ EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS)
#define INTEL_FLAGS_EVENT_CONSTRAINT_RANGE(c, e, n) \
- EVENT_CONSTRAINT_RANGE(c, e, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+ EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS)
/* Check only flags, but allow all event/umask */
#define INTEL_ALL_EVENT_CONSTRAINT(code, n) \
#define REG_OUT "a"
#endif
-#define __HAVE_ARCH_SW_HWEIGHT
-
static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res;
long sym_vvar_start; /* Negative offset to the vvar area */
long sym_vvar_page;
- long sym_hpet_page;
long sym_pvclock_page;
long sym_hvclock_page;
long sym_VDSO32_NOTE_MASK;
/*
* Called from kretprobe_trampoline
*/
-static __used void *trampoline_handler(struct pt_regs *regs)
+__used __visible void *trampoline_handler(struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb;
struct kretprobe_instance *ri = NULL;
#include <asm/alternative.h>
#include <asm/fpu/xstate.h>
#include <asm/trace/mpx.h>
-#include <asm/nospec-branch.h>
#include <asm/mpx.h>
#include <asm/vm86.h>
#include <asm/umip.h>
regs->ip = (unsigned long)general_protection;
regs->sp = (unsigned long)&gpregs->orig_ax;
- /*
- * This situation can be triggered by userspace via
- * modify_ldt(2) and the return does not take the regular
- * user space exit, so a CPU buffer clear is required when
- * MDS mitigation is enabled.
- */
- mds_user_clear_cpu_buffers();
return;
}
#endif
#include "ident_map.c"
+#define DEFINE_POPULATE(fname, type1, type2, init) \
+static inline void fname##_init(struct mm_struct *mm, \
+ type1##_t *arg1, type2##_t *arg2, bool init) \
+{ \
+ if (init) \
+ fname##_safe(mm, arg1, arg2); \
+ else \
+ fname(mm, arg1, arg2); \
+}
+
+DEFINE_POPULATE(p4d_populate, p4d, pud, init)
+DEFINE_POPULATE(pgd_populate, pgd, p4d, init)
+DEFINE_POPULATE(pud_populate, pud, pmd, init)
+DEFINE_POPULATE(pmd_populate_kernel, pmd, pte, init)
+
+#define DEFINE_ENTRY(type1, type2, init) \
+static inline void set_##type1##_init(type1##_t *arg1, \
+ type2##_t arg2, bool init) \
+{ \
+ if (init) \
+ set_##type1##_safe(arg1, arg2); \
+ else \
+ set_##type1(arg1, arg2); \
+}
+
+DEFINE_ENTRY(p4d, p4d, init)
+DEFINE_ENTRY(pud, pud, init)
+DEFINE_ENTRY(pmd, pmd, init)
+DEFINE_ENTRY(pte, pte, init)
+
+
/*
* NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
* physical space so we can cache the place of the first one and move
*/
static unsigned long __meminit
phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
- pgprot_t prot)
+ pgprot_t prot, bool init)
{
unsigned long pages = 0, paddr_next;
unsigned long paddr_last = paddr_end;
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & PAGE_MASK, paddr_next,
E820_TYPE_RESERVED_KERN))
- set_pte_safe(pte, __pte(0));
+ set_pte_init(pte, __pte(0), init);
continue;
}
pr_info(" pte=%p addr=%lx pte=%016lx\n", pte, paddr,
pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
pages++;
- set_pte_safe(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
+ set_pte_init(pte, pfn_pte(paddr >> PAGE_SHIFT, prot), init);
paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
}
*/
static unsigned long __meminit
phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
- unsigned long page_size_mask, pgprot_t prot)
+ unsigned long page_size_mask, pgprot_t prot, bool init)
{
unsigned long pages = 0, paddr_next;
unsigned long paddr_last = paddr_end;
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & PMD_MASK, paddr_next,
E820_TYPE_RESERVED_KERN))
- set_pmd_safe(pmd, __pmd(0));
+ set_pmd_init(pmd, __pmd(0), init);
continue;
}
spin_lock(&init_mm.page_table_lock);
pte = (pte_t *)pmd_page_vaddr(*pmd);
paddr_last = phys_pte_init(pte, paddr,
- paddr_end, prot);
+ paddr_end, prot,
+ init);
spin_unlock(&init_mm.page_table_lock);
continue;
}
if (page_size_mask & (1<<PG_LEVEL_2M)) {
pages++;
spin_lock(&init_mm.page_table_lock);
- set_pte_safe((pte_t *)pmd,
- pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
- __pgprot(pgprot_val(prot) | _PAGE_PSE)));
+ set_pte_init((pte_t *)pmd,
+ pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
+ __pgprot(pgprot_val(prot) | _PAGE_PSE)),
+ init);
spin_unlock(&init_mm.page_table_lock);
paddr_last = paddr_next;
continue;
}
pte = alloc_low_page();
- paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot);
+ paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, init);
spin_lock(&init_mm.page_table_lock);
- pmd_populate_kernel_safe(&init_mm, pmd, pte);
+ pmd_populate_kernel_init(&init_mm, pmd, pte, init);
spin_unlock(&init_mm.page_table_lock);
}
update_page_count(PG_LEVEL_2M, pages);
*/
static unsigned long __meminit
phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
- unsigned long page_size_mask)
+ unsigned long page_size_mask, bool init)
{
unsigned long pages = 0, paddr_next;
unsigned long paddr_last = paddr_end;
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & PUD_MASK, paddr_next,
E820_TYPE_RESERVED_KERN))
- set_pud_safe(pud, __pud(0));
+ set_pud_init(pud, __pud(0), init);
continue;
}
paddr_last = phys_pmd_init(pmd, paddr,
paddr_end,
page_size_mask,
- prot);
+ prot, init);
continue;
}
/*
if (page_size_mask & (1<<PG_LEVEL_1G)) {
pages++;
spin_lock(&init_mm.page_table_lock);
- set_pte_safe((pte_t *)pud,
- pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
- PAGE_KERNEL_LARGE));
+ set_pte_init((pte_t *)pud,
+ pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
+ PAGE_KERNEL_LARGE),
+ init);
spin_unlock(&init_mm.page_table_lock);
paddr_last = paddr_next;
continue;
pmd = alloc_low_page();
paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
- page_size_mask, prot);
+ page_size_mask, prot, init);
spin_lock(&init_mm.page_table_lock);
- pud_populate_safe(&init_mm, pud, pmd);
+ pud_populate_init(&init_mm, pud, pmd, init);
spin_unlock(&init_mm.page_table_lock);
}
static unsigned long __meminit
phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
- unsigned long page_size_mask)
+ unsigned long page_size_mask, bool init)
{
unsigned long paddr_next, paddr_last = paddr_end;
unsigned long vaddr = (unsigned long)__va(paddr);
int i = p4d_index(vaddr);
if (!pgtable_l5_enabled())
- return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
+ return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
+ page_size_mask, init);
for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
p4d_t *p4d;
E820_TYPE_RAM) &&
!e820__mapped_any(paddr & P4D_MASK, paddr_next,
E820_TYPE_RESERVED_KERN))
- set_p4d_safe(p4d, __p4d(0));
+ set_p4d_init(p4d, __p4d(0), init);
continue;
}
if (!p4d_none(*p4d)) {
pud = pud_offset(p4d, 0);
- paddr_last = phys_pud_init(pud, paddr,
- paddr_end,
- page_size_mask);
+ paddr_last = phys_pud_init(pud, paddr, paddr_end,
+ page_size_mask, init);
continue;
}
pud = alloc_low_page();
paddr_last = phys_pud_init(pud, paddr, paddr_end,
- page_size_mask);
+ page_size_mask, init);
spin_lock(&init_mm.page_table_lock);
- p4d_populate_safe(&init_mm, p4d, pud);
+ p4d_populate_init(&init_mm, p4d, pud, init);
spin_unlock(&init_mm.page_table_lock);
}
return paddr_last;
}
-/*
- * Create page table mapping for the physical memory for specific physical
- * addresses. The virtual and physical addresses have to be aligned on PMD level
- * down. It returns the last physical address mapped.
- */
-unsigned long __meminit
-kernel_physical_mapping_init(unsigned long paddr_start,
- unsigned long paddr_end,
- unsigned long page_size_mask)
+static unsigned long __meminit
+__kernel_physical_mapping_init(unsigned long paddr_start,
+ unsigned long paddr_end,
+ unsigned long page_size_mask,
+ bool init)
{
bool pgd_changed = false;
unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
p4d = (p4d_t *)pgd_page_vaddr(*pgd);
paddr_last = phys_p4d_init(p4d, __pa(vaddr),
__pa(vaddr_end),
- page_size_mask);
+ page_size_mask,
+ init);
continue;
}
p4d = alloc_low_page();
paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
- page_size_mask);
+ page_size_mask, init);
spin_lock(&init_mm.page_table_lock);
if (pgtable_l5_enabled())
- pgd_populate_safe(&init_mm, pgd, p4d);
+ pgd_populate_init(&init_mm, pgd, p4d, init);
else
- p4d_populate_safe(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
+ p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr),
+ (pud_t *) p4d, init);
+
spin_unlock(&init_mm.page_table_lock);
pgd_changed = true;
}
return paddr_last;
}
+
+/*
+ * Create page table mapping for the physical memory for specific physical
+ * addresses. Note that it can only be used to populate non-present entries.
+ * The virtual and physical addresses have to be aligned on PMD level
+ * down. It returns the last physical address mapped.
+ */
+unsigned long __meminit
+kernel_physical_mapping_init(unsigned long paddr_start,
+ unsigned long paddr_end,
+ unsigned long page_size_mask)
+{
+ return __kernel_physical_mapping_init(paddr_start, paddr_end,
+ page_size_mask, true);
+}
+
+/*
+ * This function is similar to kernel_physical_mapping_init() above with the
+ * exception that it uses set_{pud,pmd}() instead of the set_{pud,pte}_safe()
+ * when updating the mapping. The caller is responsible to flush the TLBs after
+ * the function returns.
+ */
+unsigned long __meminit
+kernel_physical_mapping_change(unsigned long paddr_start,
+ unsigned long paddr_end,
+ unsigned long page_size_mask)
+{
+ return __kernel_physical_mapping_init(paddr_start, paddr_end,
+ page_size_mask, false);
+}
+
#ifndef CONFIG_NUMA
void __init initmem_init(void)
{
else
split_page_size_mask = 1 << PG_LEVEL_2M;
- kernel_physical_mapping_init(__pa(vaddr & pmask),
- __pa((vaddr_end & pmask) + psize),
- split_page_size_mask);
+ /*
+ * kernel_physical_mapping_change() does not flush the TLBs, so
+ * a TLB flush is required after we exit from the for loop.
+ */
+ kernel_physical_mapping_change(__pa(vaddr & pmask),
+ __pa((vaddr_end & pmask) + psize),
+ split_page_size_mask);
}
ret = 0;
unsigned long kernel_physical_mapping_init(unsigned long start,
unsigned long end,
unsigned long page_size_mask);
+unsigned long kernel_physical_mapping_change(unsigned long start,
+ unsigned long end,
+ unsigned long page_size_mask);
void zone_sizes_init(void);
extern int after_bootmem;
+++ /dev/null
-/*
- * include/asm-xtensa/segment.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-
-#ifndef _XTENSA_SEGMENT_H
-#define _XTENSA_SEGMENT_H
-
-#include <linux/uaccess.h>
-
-#endif /* _XTENSA_SEGEMENT_H */
struct rbd_client *rbdc;
int ret;
- mutex_lock_nested(&client_mutex, SINGLE_DEPTH_NESTING);
+ mutex_lock(&client_mutex);
rbdc = rbd_client_find(ceph_opts);
if (rbdc) {
ceph_destroy_options(ceph_opts);
zero_bvecs(&obj_req->bvec_pos, off, bytes);
break;
default:
- rbd_assert(0);
+ BUG();
}
}
kfree(obj_request->bvec_pos.bvecs);
break;
default:
- rbd_assert(0);
+ BUG();
}
kfree(obj_request->img_extents);
&obj_req->bvec_pos);
break;
default:
- rbd_assert(0);
+ BUG();
}
}
ret = rbd_obj_setup_zeroout(obj_req);
break;
default:
- rbd_assert(0);
+ BUG();
}
if (ret < 0)
return ret;
&obj_req->bvec_pos);
break;
default:
- rbd_assert(0);
+ BUG();
}
} else {
ret = rbd_img_fill_from_bvecs(child_img_req,
num_osd_ops += count_zeroout_ops(obj_req);
break;
default:
- rbd_assert(0);
+ BUG();
}
obj_req->osd_req = rbd_osd_req_create(obj_req, num_osd_ops);
__rbd_obj_setup_zeroout(obj_req, which);
break;
default:
- rbd_assert(0);
+ BUG();
}
ret = ceph_osdc_alloc_messages(obj_req->osd_req, GFP_NOIO);
goto err_rq;
}
- rbd_assert(op_type == OBJ_OP_READ ||
- rbd_dev->spec->snap_id == CEPH_NOSNAP);
+ if (op_type != OBJ_OP_READ && rbd_dev->spec->snap_id != CEPH_NOSNAP) {
+ rbd_warn(rbd_dev, "%s on read-only snapshot",
+ obj_op_name(op_type));
+ result = -EIO;
+ goto err;
+ }
/*
* Quit early if the mapped snapshot no longer exists. It's
micro-server but may appear on others in the future.
config EDAC_MPC85XX
- tristate "Freescale MPC83xx / MPC85xx"
- depends on FSL_SOC
+ bool "Freescale MPC83xx / MPC85xx"
+ depends on FSL_SOC && EDAC=y
help
Support for error detection and correction on the Freescale
MPC8349, MPC8560, MPC8540, MPC8548, T4240
struct mem_ctl_info *edac_mc_find(int idx)
{
- struct mem_ctl_info *mci = NULL;
+ struct mem_ctl_info *mci;
struct list_head *item;
mutex_lock(&mem_ctls_mutex);
list_for_each(item, &mc_devices) {
mci = list_entry(item, struct mem_ctl_info, link);
-
- if (mci->mc_idx >= idx) {
- if (mci->mc_idx == idx) {
- goto unlock;
- }
- break;
- }
+ if (mci->mc_idx == idx)
+ goto unlock;
}
+ mci = NULL;
unlock:
mutex_unlock(&mem_ctls_mutex);
return mci;
* The complex conditional is necessary to avoid a cyclic dependency
* between hwmon and thermal_sys modules.
*/
-#if IS_REACHABLE(CONFIG_THERMAL) && defined(CONFIG_THERMAL_OF) && \
- (!defined(CONFIG_THERMAL_HWMON) || \
- !(defined(MODULE) && IS_MODULE(CONFIG_THERMAL)))
+#ifdef CONFIG_THERMAL_OF
static int hwmon_thermal_get_temp(void *data, int *temp)
{
struct hwmon_thermal_data *tdata = data;
If unsure, say N.
+config DM_DUST
+ tristate "Bad sector simulation target"
+ depends on BLK_DEV_DM
+ ---help---
+ A target that simulates bad sector behavior.
+ Useful for testing.
+
+ If unsure, say N.
+
config DM_INIT
bool "DM \"dm-mod.create=\" parameter support"
depends on BLK_DEV_DM=y
obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o
obj-$(CONFIG_DM_CRYPT) += dm-crypt.o
obj-$(CONFIG_DM_DELAY) += dm-delay.o
+obj-$(CONFIG_DM_DUST) += dm-dust.o
obj-$(CONFIG_DM_FLAKEY) += dm-flakey.o
obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o
obj-$(CONFIG_DM_MULTIPATH_QL) += dm-queue-length.o
if (r)
return r;
- for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) {
+ for (b = 0; ; b++) {
r = fn(context, cmd->discard_block_size, to_dblock(b),
dm_bitset_cursor_get_value(&c));
if (r)
break;
+
+ if (b >= (from_dblock(cmd->discard_nr_blocks) - 1))
+ break;
+
+ r = dm_bitset_cursor_next(&c);
+ if (r)
+ break;
}
dm_bitset_cursor_end(&c);
{
#ifdef CONFIG_BLK_DEV_INTEGRITY
struct blk_integrity *bi = blk_get_integrity(cc->dev->bdev->bd_disk);
+ struct mapped_device *md = dm_table_get_md(ti->table);
/* From now we require underlying device with our integrity profile */
if (!bi || strcasecmp(bi->profile->name, "DM-DIF-EXT-TAG")) {
if (crypt_integrity_aead(cc)) {
cc->integrity_tag_size = cc->on_disk_tag_size - cc->integrity_iv_size;
- DMINFO("Integrity AEAD, tag size %u, IV size %u.",
+ DMDEBUG("%s: Integrity AEAD, tag size %u, IV size %u.", dm_device_name(md),
cc->integrity_tag_size, cc->integrity_iv_size);
if (crypto_aead_setauthsize(any_tfm_aead(cc), cc->integrity_tag_size)) {
return -EINVAL;
}
} else if (cc->integrity_iv_size)
- DMINFO("Additional per-sector space %u bytes for IV.",
+ DMDEBUG("%s: Additional per-sector space %u bytes for IV.", dm_device_name(md),
cc->integrity_iv_size);
if ((cc->integrity_tag_size + cc->integrity_iv_size) != bi->tag_size) {
return iv_of_dmreq(cc, dmreq) + cc->iv_size;
}
-static uint64_t *org_sector_of_dmreq(struct crypt_config *cc,
+static __le64 *org_sector_of_dmreq(struct crypt_config *cc,
struct dm_crypt_request *dmreq)
{
u8 *ptr = iv_of_dmreq(cc, dmreq) + cc->iv_size + cc->iv_size;
- return (uint64_t*) ptr;
+ return (__le64 *) ptr;
}
static unsigned int *org_tag_of_dmreq(struct crypt_config *cc,
struct bio_vec bv_out = bio_iter_iovec(ctx->bio_out, ctx->iter_out);
struct dm_crypt_request *dmreq;
u8 *iv, *org_iv, *tag_iv, *tag;
- uint64_t *sector;
+ __le64 *sector;
int r = 0;
BUG_ON(cc->integrity_iv_size && cc->integrity_iv_size != cc->iv_size);
r = crypto_aead_decrypt(req);
}
- if (r == -EBADMSG)
- DMERR_LIMIT("INTEGRITY AEAD ERROR, sector %llu",
+ if (r == -EBADMSG) {
+ char b[BDEVNAME_SIZE];
+ DMERR_LIMIT("%s: INTEGRITY AEAD ERROR, sector %llu", bio_devname(ctx->bio_in, b),
(unsigned long long)le64_to_cpu(*sector));
+ }
if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post)
r = cc->iv_gen_ops->post(cc, org_iv, dmreq);
struct scatterlist *sg_in, *sg_out;
struct dm_crypt_request *dmreq;
u8 *iv, *org_iv, *tag_iv;
- uint64_t *sector;
+ __le64 *sector;
int r = 0;
/* Reject unexpected unaligned bio. */
error = cc->iv_gen_ops->post(cc, org_iv_of_dmreq(cc, dmreq), dmreq);
if (error == -EBADMSG) {
- DMERR_LIMIT("INTEGRITY AEAD ERROR, sector %llu",
+ char b[BDEVNAME_SIZE];
+ DMERR_LIMIT("%s: INTEGRITY AEAD ERROR, sector %llu", bio_devname(ctx->bio_in, b),
(unsigned long long)le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)));
io->error = BLK_STS_PROTECTION;
} else if (error < 0)
* algorithm implementation is used. Help people debug performance
* problems by logging the ->cra_driver_name.
*/
- DMINFO("%s using implementation \"%s\"", ciphermode,
+ DMDEBUG_LIMIT("%s using implementation \"%s\"", ciphermode,
crypto_skcipher_alg(any_tfm(cc))->base.cra_driver_name);
return 0;
}
return err;
}
- DMINFO("%s using implementation \"%s\"", ciphermode,
+ DMDEBUG_LIMIT("%s using implementation \"%s\"", ciphermode,
crypto_aead_alg(any_tfm_aead(cc))->base.cra_driver_name);
return 0;
}
{
struct delay_c *dc = ti->private;
- destroy_workqueue(dc->kdelayd_wq);
+ if (dc->kdelayd_wq)
+ destroy_workqueue(dc->kdelayd_wq);
if (dc->read.dev)
dm_put_device(ti, dc->read.dev);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 Red Hat, Inc.
+ *
+ * This is a test "dust" device, which fails reads on specified
+ * sectors, emulating the behavior of a hard disk drive sending
+ * a "Read Medium Error" sense.
+ *
+ */
+
+#include <linux/device-mapper.h>
+#include <linux/module.h>
+#include <linux/rbtree.h>
+
+#define DM_MSG_PREFIX "dust"
+
+struct badblock {
+ struct rb_node node;
+ sector_t bb;
+};
+
+struct dust_device {
+ struct dm_dev *dev;
+ struct rb_root badblocklist;
+ unsigned long long badblock_count;
+ spinlock_t dust_lock;
+ unsigned int blksz;
+ unsigned int sect_per_block;
+ sector_t start;
+ bool fail_read_on_bb:1;
+ bool quiet_mode:1;
+};
+
+static struct badblock *dust_rb_search(struct rb_root *root, sector_t blk)
+{
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct badblock *bblk = rb_entry(node, struct badblock, node);
+
+ if (bblk->bb > blk)
+ node = node->rb_left;
+ else if (bblk->bb < blk)
+ node = node->rb_right;
+ else
+ return bblk;
+ }
+
+ return NULL;
+}
+
+static bool dust_rb_insert(struct rb_root *root, struct badblock *new)
+{
+ struct badblock *bblk;
+ struct rb_node **link = &root->rb_node, *parent = NULL;
+ sector_t value = new->bb;
+
+ while (*link) {
+ parent = *link;
+ bblk = rb_entry(parent, struct badblock, node);
+
+ if (bblk->bb > value)
+ link = &(*link)->rb_left;
+ else if (bblk->bb < value)
+ link = &(*link)->rb_right;
+ else
+ return false;
+ }
+
+ rb_link_node(&new->node, parent, link);
+ rb_insert_color(&new->node, root);
+
+ return true;
+}
+
+static int dust_remove_block(struct dust_device *dd, unsigned long long block)
+{
+ struct badblock *bblock;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->dust_lock, flags);
+ bblock = dust_rb_search(&dd->badblocklist, block * dd->sect_per_block);
+
+ if (bblock == NULL) {
+ if (!dd->quiet_mode) {
+ DMERR("%s: block %llu not found in badblocklist",
+ __func__, block);
+ }
+ spin_unlock_irqrestore(&dd->dust_lock, flags);
+ return -EINVAL;
+ }
+
+ rb_erase(&bblock->node, &dd->badblocklist);
+ dd->badblock_count--;
+ if (!dd->quiet_mode)
+ DMINFO("%s: badblock removed at block %llu", __func__, block);
+ kfree(bblock);
+ spin_unlock_irqrestore(&dd->dust_lock, flags);
+
+ return 0;
+}
+
+static int dust_add_block(struct dust_device *dd, unsigned long long block)
+{
+ struct badblock *bblock;
+ unsigned long flags;
+
+ bblock = kmalloc(sizeof(*bblock), GFP_KERNEL);
+ if (bblock == NULL) {
+ if (!dd->quiet_mode)
+ DMERR("%s: badblock allocation failed", __func__);
+ return -ENOMEM;
+ }
+
+ spin_lock_irqsave(&dd->dust_lock, flags);
+ bblock->bb = block * dd->sect_per_block;
+ if (!dust_rb_insert(&dd->badblocklist, bblock)) {
+ if (!dd->quiet_mode) {
+ DMERR("%s: block %llu already in badblocklist",
+ __func__, block);
+ }
+ spin_unlock_irqrestore(&dd->dust_lock, flags);
+ kfree(bblock);
+ return -EINVAL;
+ }
+
+ dd->badblock_count++;
+ if (!dd->quiet_mode)
+ DMINFO("%s: badblock added at block %llu", __func__, block);
+ spin_unlock_irqrestore(&dd->dust_lock, flags);
+
+ return 0;
+}
+
+static int dust_query_block(struct dust_device *dd, unsigned long long block)
+{
+ struct badblock *bblock;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->dust_lock, flags);
+ bblock = dust_rb_search(&dd->badblocklist, block * dd->sect_per_block);
+ if (bblock != NULL)
+ DMINFO("%s: block %llu found in badblocklist", __func__, block);
+ else
+ DMINFO("%s: block %llu not found in badblocklist", __func__, block);
+ spin_unlock_irqrestore(&dd->dust_lock, flags);
+
+ return 0;
+}
+
+static int __dust_map_read(struct dust_device *dd, sector_t thisblock)
+{
+ struct badblock *bblk = dust_rb_search(&dd->badblocklist, thisblock);
+
+ if (bblk)
+ return DM_MAPIO_KILL;
+
+ return DM_MAPIO_REMAPPED;
+}
+
+static int dust_map_read(struct dust_device *dd, sector_t thisblock,
+ bool fail_read_on_bb)
+{
+ unsigned long flags;
+ int ret = DM_MAPIO_REMAPPED;
+
+ if (fail_read_on_bb) {
+ spin_lock_irqsave(&dd->dust_lock, flags);
+ ret = __dust_map_read(dd, thisblock);
+ spin_unlock_irqrestore(&dd->dust_lock, flags);
+ }
+
+ return ret;
+}
+
+static void __dust_map_write(struct dust_device *dd, sector_t thisblock)
+{
+ struct badblock *bblk = dust_rb_search(&dd->badblocklist, thisblock);
+
+ if (bblk) {
+ rb_erase(&bblk->node, &dd->badblocklist);
+ dd->badblock_count--;
+ kfree(bblk);
+ if (!dd->quiet_mode) {
+ sector_div(thisblock, dd->sect_per_block);
+ DMINFO("block %llu removed from badblocklist by write",
+ (unsigned long long)thisblock);
+ }
+ }
+}
+
+static int dust_map_write(struct dust_device *dd, sector_t thisblock,
+ bool fail_read_on_bb)
+{
+ unsigned long flags;
+
+ if (fail_read_on_bb) {
+ spin_lock_irqsave(&dd->dust_lock, flags);
+ __dust_map_write(dd, thisblock);
+ spin_unlock_irqrestore(&dd->dust_lock, flags);
+ }
+
+ return DM_MAPIO_REMAPPED;
+}
+
+static int dust_map(struct dm_target *ti, struct bio *bio)
+{
+ struct dust_device *dd = ti->private;
+ int ret;
+
+ bio_set_dev(bio, dd->dev->bdev);
+ bio->bi_iter.bi_sector = dd->start + dm_target_offset(ti, bio->bi_iter.bi_sector);
+
+ if (bio_data_dir(bio) == READ)
+ ret = dust_map_read(dd, bio->bi_iter.bi_sector, dd->fail_read_on_bb);
+ else
+ ret = dust_map_write(dd, bio->bi_iter.bi_sector, dd->fail_read_on_bb);
+
+ return ret;
+}
+
+static bool __dust_clear_badblocks(struct rb_root *tree,
+ unsigned long long count)
+{
+ struct rb_node *node = NULL, *nnode = NULL;
+
+ nnode = rb_first(tree);
+ if (nnode == NULL) {
+ BUG_ON(count != 0);
+ return false;
+ }
+
+ while (nnode) {
+ node = nnode;
+ nnode = rb_next(node);
+ rb_erase(node, tree);
+ count--;
+ kfree(node);
+ }
+ BUG_ON(count != 0);
+ BUG_ON(tree->rb_node != NULL);
+
+ return true;
+}
+
+static int dust_clear_badblocks(struct dust_device *dd)
+{
+ unsigned long flags;
+ struct rb_root badblocklist;
+ unsigned long long badblock_count;
+
+ spin_lock_irqsave(&dd->dust_lock, flags);
+ badblocklist = dd->badblocklist;
+ badblock_count = dd->badblock_count;
+ dd->badblocklist = RB_ROOT;
+ dd->badblock_count = 0;
+ spin_unlock_irqrestore(&dd->dust_lock, flags);
+
+ if (!__dust_clear_badblocks(&badblocklist, badblock_count))
+ DMINFO("%s: no badblocks found", __func__);
+ else
+ DMINFO("%s: badblocks cleared", __func__);
+
+ return 0;
+}
+
+/*
+ * Target parameters:
+ *
+ * <device_path> <offset> <blksz>
+ *
+ * device_path: path to the block device
+ * offset: offset to data area from start of device_path
+ * blksz: block size (minimum 512, maximum 1073741824, must be a power of 2)
+ */
+static int dust_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+ struct dust_device *dd;
+ unsigned long long tmp;
+ char dummy;
+ unsigned int blksz;
+ unsigned int sect_per_block;
+ sector_t DUST_MAX_BLKSZ_SECTORS = 2097152;
+ sector_t max_block_sectors = min(ti->len, DUST_MAX_BLKSZ_SECTORS);
+
+ if (argc != 3) {
+ ti->error = "Invalid argument count";
+ return -EINVAL;
+ }
+
+ if (kstrtouint(argv[2], 10, &blksz) || !blksz) {
+ ti->error = "Invalid block size parameter";
+ return -EINVAL;
+ }
+
+ if (blksz < 512) {
+ ti->error = "Block size must be at least 512";
+ return -EINVAL;
+ }
+
+ if (!is_power_of_2(blksz)) {
+ ti->error = "Block size must be a power of 2";
+ return -EINVAL;
+ }
+
+ if (to_sector(blksz) > max_block_sectors) {
+ ti->error = "Block size is too large";
+ return -EINVAL;
+ }
+
+ sect_per_block = (blksz >> SECTOR_SHIFT);
+
+ if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1 || tmp != (sector_t)tmp) {
+ ti->error = "Invalid device offset sector";
+ return -EINVAL;
+ }
+
+ dd = kzalloc(sizeof(struct dust_device), GFP_KERNEL);
+ if (dd == NULL) {
+ ti->error = "Cannot allocate context";
+ return -ENOMEM;
+ }
+
+ if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dd->dev)) {
+ ti->error = "Device lookup failed";
+ kfree(dd);
+ return -EINVAL;
+ }
+
+ dd->sect_per_block = sect_per_block;
+ dd->blksz = blksz;
+ dd->start = tmp;
+
+ /*
+ * Whether to fail a read on a "bad" block.
+ * Defaults to false; enabled later by message.
+ */
+ dd->fail_read_on_bb = false;
+
+ /*
+ * Initialize bad block list rbtree.
+ */
+ dd->badblocklist = RB_ROOT;
+ dd->badblock_count = 0;
+ spin_lock_init(&dd->dust_lock);
+
+ dd->quiet_mode = false;
+
+ BUG_ON(dm_set_target_max_io_len(ti, dd->sect_per_block) != 0);
+
+ ti->num_discard_bios = 1;
+ ti->num_flush_bios = 1;
+ ti->private = dd;
+
+ return 0;
+}
+
+static void dust_dtr(struct dm_target *ti)
+{
+ struct dust_device *dd = ti->private;
+
+ __dust_clear_badblocks(&dd->badblocklist, dd->badblock_count);
+ dm_put_device(ti, dd->dev);
+ kfree(dd);
+}
+
+static int dust_message(struct dm_target *ti, unsigned int argc, char **argv,
+ char *result_buf, unsigned int maxlen)
+{
+ struct dust_device *dd = ti->private;
+ sector_t size = i_size_read(dd->dev->bdev->bd_inode) >> SECTOR_SHIFT;
+ bool invalid_msg = false;
+ int result = -EINVAL;
+ unsigned long long tmp, block;
+ unsigned long flags;
+ char dummy;
+
+ if (argc == 1) {
+ if (!strcasecmp(argv[0], "addbadblock") ||
+ !strcasecmp(argv[0], "removebadblock") ||
+ !strcasecmp(argv[0], "queryblock")) {
+ DMERR("%s requires an additional argument", argv[0]);
+ } else if (!strcasecmp(argv[0], "disable")) {
+ DMINFO("disabling read failures on bad sectors");
+ dd->fail_read_on_bb = false;
+ result = 0;
+ } else if (!strcasecmp(argv[0], "enable")) {
+ DMINFO("enabling read failures on bad sectors");
+ dd->fail_read_on_bb = true;
+ result = 0;
+ } else if (!strcasecmp(argv[0], "countbadblocks")) {
+ spin_lock_irqsave(&dd->dust_lock, flags);
+ DMINFO("countbadblocks: %llu badblock(s) found",
+ dd->badblock_count);
+ spin_unlock_irqrestore(&dd->dust_lock, flags);
+ result = 0;
+ } else if (!strcasecmp(argv[0], "clearbadblocks")) {
+ result = dust_clear_badblocks(dd);
+ } else if (!strcasecmp(argv[0], "quiet")) {
+ if (!dd->quiet_mode)
+ dd->quiet_mode = true;
+ else
+ dd->quiet_mode = false;
+ result = 0;
+ } else {
+ invalid_msg = true;
+ }
+ } else if (argc == 2) {
+ if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1)
+ return result;
+
+ block = tmp;
+ sector_div(size, dd->sect_per_block);
+ if (block > size) {
+ DMERR("selected block value out of range");
+ return result;
+ }
+
+ if (!strcasecmp(argv[0], "addbadblock"))
+ result = dust_add_block(dd, block);
+ else if (!strcasecmp(argv[0], "removebadblock"))
+ result = dust_remove_block(dd, block);
+ else if (!strcasecmp(argv[0], "queryblock"))
+ result = dust_query_block(dd, block);
+ else
+ invalid_msg = true;
+
+ } else
+ DMERR("invalid number of arguments '%d'", argc);
+
+ if (invalid_msg)
+ DMERR("unrecognized message '%s' received", argv[0]);
+
+ return result;
+}
+
+static void dust_status(struct dm_target *ti, status_type_t type,
+ unsigned int status_flags, char *result, unsigned int maxlen)
+{
+ struct dust_device *dd = ti->private;
+ unsigned int sz = 0;
+
+ switch (type) {
+ case STATUSTYPE_INFO:
+ DMEMIT("%s %s %s", dd->dev->name,
+ dd->fail_read_on_bb ? "fail_read_on_bad_block" : "bypass",
+ dd->quiet_mode ? "quiet" : "verbose");
+ break;
+
+ case STATUSTYPE_TABLE:
+ DMEMIT("%s %llu %u", dd->dev->name,
+ (unsigned long long)dd->start, dd->blksz);
+ break;
+ }
+}
+
+static int dust_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
+{
+ struct dust_device *dd = ti->private;
+ struct dm_dev *dev = dd->dev;
+
+ *bdev = dev->bdev;
+
+ /*
+ * Only pass ioctls through if the device sizes match exactly.
+ */
+ if (dd->start ||
+ ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT)
+ return 1;
+
+ return 0;
+}
+
+static int dust_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn,
+ void *data)
+{
+ struct dust_device *dd = ti->private;
+
+ return fn(ti, dd->dev, dd->start, ti->len, data);
+}
+
+static struct target_type dust_target = {
+ .name = "dust",
+ .version = {1, 0, 0},
+ .module = THIS_MODULE,
+ .ctr = dust_ctr,
+ .dtr = dust_dtr,
+ .iterate_devices = dust_iterate_devices,
+ .map = dust_map,
+ .message = dust_message,
+ .status = dust_status,
+ .prepare_ioctl = dust_prepare_ioctl,
+};
+
+static int __init dm_dust_init(void)
+{
+ int result = dm_register_target(&dust_target);
+
+ if (result < 0)
+ DMERR("dm_register_target failed %d", result);
+
+ return result;
+}
+
+static void __exit dm_dust_exit(void)
+{
+ dm_unregister_target(&dust_target);
+}
+
+module_init(dm_dust_init);
+module_exit(dm_dust_exit);
+
+MODULE_DESCRIPTION(DM_NAME " dust test target");
+MODULE_AUTHOR("Bryan Gurney <dm-devel@redhat.com>");
+MODULE_LICENSE("GPL");
#define _LINUX_DM_EXCEPTION_STORE
#include <linux/blkdev.h>
+#include <linux/list_bl.h>
#include <linux/device-mapper.h>
/*
* chunk within the device.
*/
struct dm_exception {
- struct list_head hash_list;
+ struct hlist_bl_node hash_list;
chunk_t old_chunk;
chunk_t new_chunk;
while (table_entry) {
DMDEBUG("parsing table \"%s\"", str);
- if (++dev->dmi.target_count >= DM_MAX_TARGETS) {
+ if (++dev->dmi.target_count > DM_MAX_TARGETS) {
DMERR("too many targets %u > %d",
dev->dmi.target_count, DM_MAX_TARGETS);
return -EINVAL;
return -ENOMEM;
list_add_tail(&dev->list, devices);
- if (++ndev >= DM_MAX_DEVICES) {
- DMERR("too many targets %u > %d",
- dev->dmi.target_count, DM_MAX_TARGETS);
+ if (++ndev > DM_MAX_DEVICES) {
+ DMERR("too many devices %lu > %d",
+ ndev, DM_MAX_DEVICES);
return -EINVAL;
}
#include <linux/rbtree.h>
#include <linux/delay.h>
#include <linux/random.h>
+#include <linux/reboot.h>
#include <crypto/hash.h>
#include <crypto/skcipher.h>
#include <linux/async_tx.h>
#define DEFAULT_INTERLEAVE_SECTORS 32768
#define DEFAULT_JOURNAL_SIZE_FACTOR 7
+#define DEFAULT_SECTORS_PER_BITMAP_BIT 32768
#define DEFAULT_BUFFER_SECTORS 128
#define DEFAULT_JOURNAL_WATERMARK 50
#define DEFAULT_SYNC_MSEC 10000
#define METADATA_WORKQUEUE_MAX_ACTIVE 16
#define RECALC_SECTORS 8192
#define RECALC_WRITE_SUPER 16
+#define BITMAP_BLOCK_SIZE 4096 /* don't change it */
+#define BITMAP_FLUSH_INTERVAL (10 * HZ)
/*
* Warning - DEBUG_PRINT prints security-sensitive data to the log,
#define SB_MAGIC "integrt"
#define SB_VERSION_1 1
#define SB_VERSION_2 2
+#define SB_VERSION_3 3
#define SB_SECTORS 8
#define MAX_SECTORS_PER_BLOCK 8
__u64 provided_data_sectors; /* userspace uses this value */
__u32 flags;
__u8 log2_sectors_per_block;
- __u8 pad[3];
+ __u8 log2_blocks_per_bitmap_bit;
+ __u8 pad[2];
__u64 recalc_sector;
};
#define SB_FLAG_HAVE_JOURNAL_MAC 0x1
#define SB_FLAG_RECALCULATING 0x2
+#define SB_FLAG_DIRTY_BITMAP 0x4
#define JOURNAL_ENTRY_ROUNDUP 8
struct workqueue_struct *metadata_wq;
struct superblock *sb;
unsigned journal_pages;
+ unsigned n_bitmap_blocks;
+
struct page_list *journal;
struct page_list *journal_io;
struct page_list *journal_xor;
+ struct page_list *recalc_bitmap;
+ struct page_list *may_write_bitmap;
+ struct bitmap_block_status *bbs;
+ unsigned bitmap_flush_interval;
+ int synchronous_mode;
+ struct bio_list synchronous_bios;
+ struct delayed_work bitmap_flush_work;
struct crypto_skcipher *journal_crypt;
struct scatterlist **journal_scatterlist;
__s8 log2_metadata_run;
__u8 log2_buffer_sectors;
__u8 sectors_per_block;
+ __u8 log2_blocks_per_bitmap_bit;
unsigned char mode;
int suspending;
bool journal_uptodate;
bool just_formatted;
+ bool recalculate_flag;
struct alg_spec internal_hash_alg;
struct alg_spec journal_crypt_alg;
struct alg_spec journal_mac_alg;
atomic64_t number_of_mismatches;
+
+ struct notifier_block reboot_notifier;
};
struct dm_integrity_range {
sector_t logical_sector;
- unsigned n_sectors;
+ sector_t n_sectors;
bool waiting;
union {
struct rb_node node;
struct journal_completion *comp;
};
+struct bitmap_block_status {
+ struct work_struct work;
+ struct dm_integrity_c *ic;
+ unsigned idx;
+ unsigned long *bitmap;
+ struct bio_list bio_queue;
+ spinlock_t bio_queue_lock;
+
+};
+
static struct kmem_cache *journal_io_cache;
#define JOURNAL_IO_MEMPOOL 32
static void sb_set_version(struct dm_integrity_c *ic)
{
- if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
+ if (ic->mode == 'B' || ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP))
+ ic->sb->version = SB_VERSION_3;
+ else if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
ic->sb->version = SB_VERSION_2;
else
ic->sb->version = SB_VERSION_1;
return dm_io(&io_req, 1, &io_loc, NULL);
}
+#define BITMAP_OP_TEST_ALL_SET 0
+#define BITMAP_OP_TEST_ALL_CLEAR 1
+#define BITMAP_OP_SET 2
+#define BITMAP_OP_CLEAR 3
+
+static bool block_bitmap_op(struct dm_integrity_c *ic, struct page_list *bitmap,
+ sector_t sector, sector_t n_sectors, int mode)
+{
+ unsigned long bit, end_bit, this_end_bit, page, end_page;
+ unsigned long *data;
+
+ if (unlikely(((sector | n_sectors) & ((1 << ic->sb->log2_sectors_per_block) - 1)) != 0)) {
+ DMCRIT("invalid bitmap access (%llx,%llx,%d,%d,%d)",
+ (unsigned long long)sector,
+ (unsigned long long)n_sectors,
+ ic->sb->log2_sectors_per_block,
+ ic->log2_blocks_per_bitmap_bit,
+ mode);
+ BUG();
+ }
+
+ if (unlikely(!n_sectors))
+ return true;
+
+ bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
+ end_bit = (sector + n_sectors - 1) >>
+ (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
+
+ page = bit / (PAGE_SIZE * 8);
+ bit %= PAGE_SIZE * 8;
+
+ end_page = end_bit / (PAGE_SIZE * 8);
+ end_bit %= PAGE_SIZE * 8;
+
+repeat:
+ if (page < end_page) {
+ this_end_bit = PAGE_SIZE * 8 - 1;
+ } else {
+ this_end_bit = end_bit;
+ }
+
+ data = lowmem_page_address(bitmap[page].page);
+
+ if (mode == BITMAP_OP_TEST_ALL_SET) {
+ while (bit <= this_end_bit) {
+ if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
+ do {
+ if (data[bit / BITS_PER_LONG] != -1)
+ return false;
+ bit += BITS_PER_LONG;
+ } while (this_end_bit >= bit + BITS_PER_LONG - 1);
+ continue;
+ }
+ if (!test_bit(bit, data))
+ return false;
+ bit++;
+ }
+ } else if (mode == BITMAP_OP_TEST_ALL_CLEAR) {
+ while (bit <= this_end_bit) {
+ if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
+ do {
+ if (data[bit / BITS_PER_LONG] != 0)
+ return false;
+ bit += BITS_PER_LONG;
+ } while (this_end_bit >= bit + BITS_PER_LONG - 1);
+ continue;
+ }
+ if (test_bit(bit, data))
+ return false;
+ bit++;
+ }
+ } else if (mode == BITMAP_OP_SET) {
+ while (bit <= this_end_bit) {
+ if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
+ do {
+ data[bit / BITS_PER_LONG] = -1;
+ bit += BITS_PER_LONG;
+ } while (this_end_bit >= bit + BITS_PER_LONG - 1);
+ continue;
+ }
+ __set_bit(bit, data);
+ bit++;
+ }
+ } else if (mode == BITMAP_OP_CLEAR) {
+ if (!bit && this_end_bit == PAGE_SIZE * 8 - 1)
+ clear_page(data);
+ else while (bit <= this_end_bit) {
+ if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
+ do {
+ data[bit / BITS_PER_LONG] = 0;
+ bit += BITS_PER_LONG;
+ } while (this_end_bit >= bit + BITS_PER_LONG - 1);
+ continue;
+ }
+ __clear_bit(bit, data);
+ bit++;
+ }
+ } else {
+ BUG();
+ }
+
+ if (unlikely(page < end_page)) {
+ bit = 0;
+ page++;
+ goto repeat;
+ }
+
+ return true;
+}
+
+static void block_bitmap_copy(struct dm_integrity_c *ic, struct page_list *dst, struct page_list *src)
+{
+ unsigned n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE);
+ unsigned i;
+
+ for (i = 0; i < n_bitmap_pages; i++) {
+ unsigned long *dst_data = lowmem_page_address(dst[i].page);
+ unsigned long *src_data = lowmem_page_address(src[i].page);
+ copy_page(dst_data, src_data);
+ }
+}
+
+static struct bitmap_block_status *sector_to_bitmap_block(struct dm_integrity_c *ic, sector_t sector)
+{
+ unsigned bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
+ unsigned bitmap_block = bit / (BITMAP_BLOCK_SIZE * 8);
+
+ BUG_ON(bitmap_block >= ic->n_bitmap_blocks);
+ return &ic->bbs[bitmap_block];
+}
+
static void access_journal_check(struct dm_integrity_c *ic, unsigned section, unsigned offset,
bool e, const char *function)
{
if (unlikely(section >= ic->journal_sections) ||
unlikely(offset >= limit)) {
- printk(KERN_CRIT "%s: invalid access at (%u,%u), limit (%u,%u)\n",
- function, section, offset, ic->journal_sections, limit);
+ DMCRIT("%s: invalid access at (%u,%u), limit (%u,%u)",
+ function, section, offset, ic->journal_sections, limit);
BUG();
}
#endif
complete_journal_op(comp);
}
-static void rw_journal(struct dm_integrity_c *ic, int op, int op_flags, unsigned section,
- unsigned n_sections, struct journal_completion *comp)
+static void rw_journal_sectors(struct dm_integrity_c *ic, int op, int op_flags,
+ unsigned sector, unsigned n_sectors, struct journal_completion *comp)
{
struct dm_io_request io_req;
struct dm_io_region io_loc;
- unsigned sector, n_sectors, pl_index, pl_offset;
+ unsigned pl_index, pl_offset;
int r;
if (unlikely(dm_integrity_failed(ic))) {
return;
}
- sector = section * ic->journal_section_sectors;
- n_sectors = n_sections * ic->journal_section_sectors;
-
pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
}
}
+static void rw_journal(struct dm_integrity_c *ic, int op, int op_flags, unsigned section,
+ unsigned n_sections, struct journal_completion *comp)
+{
+ unsigned sector, n_sectors;
+
+ sector = section * ic->journal_section_sectors;
+ n_sectors = n_sections * ic->journal_section_sectors;
+
+ rw_journal_sectors(ic, op, op_flags, sector, n_sectors, comp);
+}
+
static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsigned commit_sections)
{
struct journal_completion io_comp;
} while (unlikely(new_range->waiting));
}
+static void add_new_range_and_wait(struct dm_integrity_c *ic, struct dm_integrity_range *new_range)
+{
+ if (unlikely(!add_new_range(ic, new_range, true)))
+ wait_and_add_new_range(ic, new_range);
+}
+
static void init_journal_node(struct journal_node *node)
{
RB_CLEAR_NODE(&node->node);
int r = dm_integrity_failed(ic);
if (unlikely(r) && !bio->bi_status)
bio->bi_status = errno_to_blk_status(r);
+ if (unlikely(ic->synchronous_mode) && bio_op(bio) == REQ_OP_WRITE) {
+ unsigned long flags;
+ spin_lock_irqsave(&ic->endio_wait.lock, flags);
+ bio_list_add(&ic->synchronous_bios, bio);
+ queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0);
+ spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
+ return;
+ }
bio_endio(bio);
}
else
wanted_tag_size *= ic->tag_size;
if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) {
- DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size);
+ DMERR("Invalid integrity data size %u, expected %u",
+ bip->bip_iter.bi_size, wanted_tag_size);
return DM_MAPIO_KILL;
}
}
unsigned ws, we, range_sectors;
dio->range.n_sectors = min(dio->range.n_sectors,
- ic->free_sectors << ic->sb->log2_sectors_per_block);
+ (sector_t)ic->free_sectors << ic->sb->log2_sectors_per_block);
if (unlikely(!dio->range.n_sectors)) {
if (from_map)
goto offload_to_thread;
goto journal_read_write;
}
+ if (ic->mode == 'B' && dio->write) {
+ if (!block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector,
+ dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) {
+ struct bitmap_block_status *bbs;
+
+ bbs = sector_to_bitmap_block(ic, dio->range.logical_sector);
+ spin_lock(&bbs->bio_queue_lock);
+ bio_list_add(&bbs->bio_queue, bio);
+ spin_unlock(&bbs->bio_queue_lock);
+ queue_work(ic->writer_wq, &bbs->work);
+ return;
+ }
+ }
+
dio->in_flight = (atomic_t)ATOMIC_INIT(2);
if (need_sync_io) {
if (need_sync_io) {
wait_for_completion_io(&read_comp);
- if (unlikely(ic->recalc_wq != NULL) &&
- ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
dio->range.logical_sector + dio->range.n_sectors > le64_to_cpu(ic->sb->recalc_sector))
goto skip_check;
+ if (ic->mode == 'B') {
+ if (!block_bitmap_op(ic, ic->recalc_bitmap, dio->range.logical_sector,
+ dio->range.n_sectors, BITMAP_OP_TEST_ALL_CLEAR))
+ goto skip_check;
+ }
+
if (likely(!bio->bi_status))
integrity_metadata(&dio->work);
else
wraparound_section(ic, &ic->free_section);
ic->n_uncommitted_sections++;
}
- WARN_ON(ic->journal_sections * ic->journal_section_entries !=
- (ic->n_uncommitted_sections + ic->n_committed_sections) * ic->journal_section_entries + ic->free_sectors);
+ if (WARN_ON(ic->journal_sections * ic->journal_section_entries !=
+ (ic->n_uncommitted_sections + ic->n_committed_sections) *
+ ic->journal_section_entries + ic->free_sectors)) {
+ DMCRIT("journal_sections %u, journal_section_entries %u, "
+ "n_uncommitted_sections %u, n_committed_sections %u, "
+ "journal_section_entries %u, free_sectors %u",
+ ic->journal_sections, ic->journal_section_entries,
+ ic->n_uncommitted_sections, ic->n_committed_sections,
+ ic->journal_section_entries, ic->free_sectors);
+ }
}
static void integrity_commit(struct work_struct *w)
io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block;
spin_lock_irq(&ic->endio_wait.lock);
- if (unlikely(!add_new_range(ic, &io->range, true)))
- wait_and_add_new_range(ic, &io->range);
+ add_new_range_and_wait(ic, &io->range);
if (likely(!from_replay)) {
struct journal_node *section_node = &ic->journal_tree[i * ic->journal_section_entries];
sector_t area, offset;
sector_t metadata_block;
unsigned metadata_offset;
+ sector_t logical_sector, n_sectors;
__u8 *t;
unsigned i;
int r;
unsigned super_counter = 0;
+ DEBUG_print("start recalculation... (position %llx)\n", le64_to_cpu(ic->sb->recalc_sector));
+
spin_lock_irq(&ic->endio_wait.lock);
next_chunk:
goto unlock_ret;
range.logical_sector = le64_to_cpu(ic->sb->recalc_sector);
- if (unlikely(range.logical_sector >= ic->provided_data_sectors))
+ if (unlikely(range.logical_sector >= ic->provided_data_sectors)) {
+ if (ic->mode == 'B') {
+ DEBUG_print("queue_delayed_work: bitmap_flush_work\n");
+ queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0);
+ }
goto unlock_ret;
+ }
get_area_and_offset(ic, range.logical_sector, &area, &offset);
range.n_sectors = min((sector_t)RECALC_SECTORS, ic->provided_data_sectors - range.logical_sector);
if (!ic->meta_dev)
- range.n_sectors = min(range.n_sectors, (1U << ic->sb->log2_interleave_sectors) - (unsigned)offset);
-
- if (unlikely(!add_new_range(ic, &range, true)))
- wait_and_add_new_range(ic, &range);
+ range.n_sectors = min(range.n_sectors, ((sector_t)1U << ic->sb->log2_interleave_sectors) - (unsigned)offset);
+ add_new_range_and_wait(ic, &range);
spin_unlock_irq(&ic->endio_wait.lock);
+ logical_sector = range.logical_sector;
+ n_sectors = range.n_sectors;
+
+ if (ic->mode == 'B') {
+ if (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector, n_sectors, BITMAP_OP_TEST_ALL_CLEAR)) {
+ goto advance_and_next;
+ }
+ while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector,
+ ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) {
+ logical_sector += ic->sectors_per_block;
+ n_sectors -= ic->sectors_per_block;
+ cond_resched();
+ }
+ while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector + n_sectors - ic->sectors_per_block,
+ ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) {
+ n_sectors -= ic->sectors_per_block;
+ cond_resched();
+ }
+ get_area_and_offset(ic, logical_sector, &area, &offset);
+ }
+
+ DEBUG_print("recalculating: %lx, %lx\n", logical_sector, n_sectors);
if (unlikely(++super_counter == RECALC_WRITE_SUPER)) {
recalc_write_super(ic);
+ if (ic->mode == 'B') {
+ queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval);
+ }
super_counter = 0;
}
io_req.client = ic->io;
io_loc.bdev = ic->dev->bdev;
io_loc.sector = get_data_sector(ic, area, offset);
- io_loc.count = range.n_sectors;
+ io_loc.count = n_sectors;
r = dm_io(&io_req, 1, &io_loc, NULL);
if (unlikely(r)) {
}
t = ic->recalc_tags;
- for (i = 0; i < range.n_sectors; i += ic->sectors_per_block) {
- integrity_sector_checksum(ic, range.logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t);
+ for (i = 0; i < n_sectors; i += ic->sectors_per_block) {
+ integrity_sector_checksum(ic, logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t);
t += ic->tag_size;
}
goto err;
}
+advance_and_next:
+ cond_resched();
+
spin_lock_irq(&ic->endio_wait.lock);
remove_range_unlocked(ic, &range);
ic->sb->recalc_sector = cpu_to_le64(range.logical_sector + range.n_sectors);
recalc_write_super(ic);
}
+static void bitmap_block_work(struct work_struct *w)
+{
+ struct bitmap_block_status *bbs = container_of(w, struct bitmap_block_status, work);
+ struct dm_integrity_c *ic = bbs->ic;
+ struct bio *bio;
+ struct bio_list bio_queue;
+ struct bio_list waiting;
+
+ bio_list_init(&waiting);
+
+ spin_lock(&bbs->bio_queue_lock);
+ bio_queue = bbs->bio_queue;
+ bio_list_init(&bbs->bio_queue);
+ spin_unlock(&bbs->bio_queue_lock);
+
+ while ((bio = bio_list_pop(&bio_queue))) {
+ struct dm_integrity_io *dio;
+
+ dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
+
+ if (block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector,
+ dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) {
+ remove_range(ic, &dio->range);
+ INIT_WORK(&dio->work, integrity_bio_wait);
+ queue_work(ic->wait_wq, &dio->work);
+ } else {
+ block_bitmap_op(ic, ic->journal, dio->range.logical_sector,
+ dio->range.n_sectors, BITMAP_OP_SET);
+ bio_list_add(&waiting, bio);
+ }
+ }
+
+ if (bio_list_empty(&waiting))
+ return;
+
+ rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC,
+ bbs->idx * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT),
+ BITMAP_BLOCK_SIZE >> SECTOR_SHIFT, NULL);
+
+ while ((bio = bio_list_pop(&waiting))) {
+ struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
+
+ block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector,
+ dio->range.n_sectors, BITMAP_OP_SET);
+
+ remove_range(ic, &dio->range);
+ INIT_WORK(&dio->work, integrity_bio_wait);
+ queue_work(ic->wait_wq, &dio->work);
+ }
+
+ queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval);
+}
+
+static void bitmap_flush_work(struct work_struct *work)
+{
+ struct dm_integrity_c *ic = container_of(work, struct dm_integrity_c, bitmap_flush_work.work);
+ struct dm_integrity_range range;
+ unsigned long limit;
+ struct bio *bio;
+
+ dm_integrity_flush_buffers(ic);
+
+ range.logical_sector = 0;
+ range.n_sectors = ic->provided_data_sectors;
+
+ spin_lock_irq(&ic->endio_wait.lock);
+ add_new_range_and_wait(ic, &range);
+ spin_unlock_irq(&ic->endio_wait.lock);
+
+ dm_integrity_flush_buffers(ic);
+ if (ic->meta_dev)
+ blkdev_issue_flush(ic->dev->bdev, GFP_NOIO, NULL);
+
+ limit = ic->provided_data_sectors;
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
+ limit = le64_to_cpu(ic->sb->recalc_sector)
+ >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)
+ << (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
+ }
+ /*DEBUG_print("zeroing journal\n");*/
+ block_bitmap_op(ic, ic->journal, 0, limit, BITMAP_OP_CLEAR);
+ block_bitmap_op(ic, ic->may_write_bitmap, 0, limit, BITMAP_OP_CLEAR);
+
+ rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0,
+ ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
+
+ spin_lock_irq(&ic->endio_wait.lock);
+ remove_range_unlocked(ic, &range);
+ while (unlikely((bio = bio_list_pop(&ic->synchronous_bios)) != NULL)) {
+ bio_endio(bio);
+ spin_unlock_irq(&ic->endio_wait.lock);
+ spin_lock_irq(&ic->endio_wait.lock);
+ }
+ spin_unlock_irq(&ic->endio_wait.lock);
+}
+
+
static void init_journal(struct dm_integrity_c *ic, unsigned start_section,
unsigned n_sections, unsigned char commit_seq)
{
init_journal_node(&ic->journal_tree[i]);
}
+static void dm_integrity_enter_synchronous_mode(struct dm_integrity_c *ic)
+{
+ DEBUG_print("dm_integrity_enter_synchronous_mode\n");
+
+ if (ic->mode == 'B') {
+ ic->bitmap_flush_interval = msecs_to_jiffies(10) + 1;
+ ic->synchronous_mode = 1;
+
+ cancel_delayed_work_sync(&ic->bitmap_flush_work);
+ queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0);
+ flush_workqueue(ic->commit_wq);
+ }
+}
+
+static int dm_integrity_reboot(struct notifier_block *n, unsigned long code, void *x)
+{
+ struct dm_integrity_c *ic = container_of(n, struct dm_integrity_c, reboot_notifier);
+
+ DEBUG_print("dm_integrity_reboot\n");
+
+ dm_integrity_enter_synchronous_mode(ic);
+
+ return NOTIFY_DONE;
+}
+
static void dm_integrity_postsuspend(struct dm_target *ti)
{
struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private;
+ int r;
+
+ WARN_ON(unregister_reboot_notifier(&ic->reboot_notifier));
del_timer_sync(&ic->autocommit_timer);
if (ic->recalc_wq)
drain_workqueue(ic->recalc_wq);
+ if (ic->mode == 'B')
+ cancel_delayed_work_sync(&ic->bitmap_flush_work);
+
queue_work(ic->commit_wq, &ic->commit_work);
drain_workqueue(ic->commit_wq);
dm_integrity_flush_buffers(ic);
}
+ if (ic->mode == 'B') {
+ dm_integrity_flush_buffers(ic);
+#if 1
+ /* set to 0 to test bitmap replay code */
+ init_journal(ic, 0, ic->journal_sections, 0);
+ ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
+ r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
+ if (unlikely(r))
+ dm_integrity_io_error(ic, "writing superblock", r);
+#endif
+ }
+
WRITE_ONCE(ic->suspending, 0);
BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
static void dm_integrity_resume(struct dm_target *ti)
{
struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private;
+ int r;
+ DEBUG_print("resume\n");
+
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) {
+ DEBUG_print("resume dirty_bitmap\n");
+ rw_journal_sectors(ic, REQ_OP_READ, 0, 0,
+ ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
+ if (ic->mode == 'B') {
+ if (ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit) {
+ block_bitmap_copy(ic, ic->recalc_bitmap, ic->journal);
+ block_bitmap_copy(ic, ic->may_write_bitmap, ic->journal);
+ if (!block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors,
+ BITMAP_OP_TEST_ALL_CLEAR)) {
+ ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
+ ic->sb->recalc_sector = cpu_to_le64(0);
+ }
+ } else {
+ DEBUG_print("non-matching blocks_per_bitmap_bit: %u, %u\n",
+ ic->sb->log2_blocks_per_bitmap_bit, ic->log2_blocks_per_bitmap_bit);
+ ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit;
+ block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET);
+ block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET);
+ block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_SET);
+ rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0,
+ ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
+ ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
+ ic->sb->recalc_sector = cpu_to_le64(0);
+ }
+ } else {
+ if (!(ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit &&
+ block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_TEST_ALL_CLEAR))) {
+ ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
+ ic->sb->recalc_sector = cpu_to_le64(0);
+ }
+ init_journal(ic, 0, ic->journal_sections, 0);
+ replay_journal(ic);
+ ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
+ }
+ r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
+ if (unlikely(r))
+ dm_integrity_io_error(ic, "writing superblock", r);
+ } else {
+ replay_journal(ic);
+ if (ic->mode == 'B') {
+ int mode;
+ ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
+ ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit;
+ r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
+ if (unlikely(r))
+ dm_integrity_io_error(ic, "writing superblock", r);
+
+ mode = ic->recalculate_flag ? BITMAP_OP_SET : BITMAP_OP_CLEAR;
+ block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, mode);
+ block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, mode);
+ block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, mode);
+ rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0,
+ ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
+ }
+ }
- replay_journal(ic);
-
- if (ic->recalc_wq && ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
+ DEBUG_print("testing recalc: %x\n", ic->sb->flags);
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
__u64 recalc_pos = le64_to_cpu(ic->sb->recalc_sector);
+ DEBUG_print("recalc pos: %lx / %lx\n", (long)recalc_pos, ic->provided_data_sectors);
if (recalc_pos < ic->provided_data_sectors) {
queue_work(ic->recalc_wq, &ic->recalc_work);
} else if (recalc_pos > ic->provided_data_sectors) {
recalc_write_super(ic);
}
}
+
+ ic->reboot_notifier.notifier_call = dm_integrity_reboot;
+ ic->reboot_notifier.next = NULL;
+ ic->reboot_notifier.priority = INT_MAX - 1; /* be notified after md and before hardware drivers */
+ WARN_ON(register_reboot_notifier(&ic->reboot_notifier));
+
+#if 0
+ /* set to 1 to stress test synchronous mode */
+ dm_integrity_enter_synchronous_mode(ic);
+#endif
}
static void dm_integrity_status(struct dm_target *ti, status_type_t type,
__u64 watermark_percentage = (__u64)(ic->journal_entries - ic->free_sectors_threshold) * 100;
watermark_percentage += ic->journal_entries / 2;
do_div(watermark_percentage, ic->journal_entries);
- arg_count = 5;
+ arg_count = 3;
arg_count += !!ic->meta_dev;
arg_count += ic->sectors_per_block != 1;
arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING));
+ arg_count += ic->mode == 'J';
+ arg_count += ic->mode == 'J';
+ arg_count += ic->mode == 'B';
+ arg_count += ic->mode == 'B';
arg_count += !!ic->internal_hash_alg.alg_string;
arg_count += !!ic->journal_crypt_alg.alg_string;
arg_count += !!ic->journal_mac_alg.alg_string;
DMEMIT(" meta_device:%s", ic->meta_dev->name);
if (ic->sectors_per_block != 1)
DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT);
- if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
+ if (ic->recalculate_flag)
DMEMIT(" recalculate");
DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS);
DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors);
DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors);
- DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage);
- DMEMIT(" commit_time:%u", ic->autocommit_msec);
+ if (ic->mode == 'J') {
+ DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage);
+ DMEMIT(" commit_time:%u", ic->autocommit_msec);
+ }
+ if (ic->mode == 'B') {
+ DMEMIT(" sectors_per_bit:%llu", (unsigned long long)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit);
+ DMEMIT(" bitmap_flush_interval:%u", jiffies_to_msecs(ic->bitmap_flush_interval));
+ }
#define EMIT_ALG(a, n) \
do { \
if (last_sector < ic->start || last_sector >= ic->meta_device_sectors)
return -EINVAL;
} else {
- __u64 meta_size = ic->provided_data_sectors * ic->tag_size;
+ __u64 meta_size = (ic->provided_data_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size;
meta_size = (meta_size + ((1U << (ic->log2_buffer_sectors + SECTOR_SHIFT)) - 1))
>> (ic->log2_buffer_sectors + SECTOR_SHIFT);
meta_size <<= ic->log2_buffer_sectors;
blk_queue_max_integrity_segments(disk->queue, UINT_MAX);
}
-static void dm_integrity_free_page_list(struct dm_integrity_c *ic, struct page_list *pl)
+static void dm_integrity_free_page_list(struct page_list *pl)
{
unsigned i;
if (!pl)
return;
- for (i = 0; i < ic->journal_pages; i++)
- if (pl[i].page)
- __free_page(pl[i].page);
+ for (i = 0; pl[i].page; i++)
+ __free_page(pl[i].page);
kvfree(pl);
}
-static struct page_list *dm_integrity_alloc_page_list(struct dm_integrity_c *ic)
+static struct page_list *dm_integrity_alloc_page_list(unsigned n_pages)
{
- size_t page_list_desc_size = ic->journal_pages * sizeof(struct page_list);
struct page_list *pl;
unsigned i;
- pl = kvmalloc(page_list_desc_size, GFP_KERNEL | __GFP_ZERO);
+ pl = kvmalloc_array(n_pages + 1, sizeof(struct page_list), GFP_KERNEL | __GFP_ZERO);
if (!pl)
return NULL;
- for (i = 0; i < ic->journal_pages; i++) {
+ for (i = 0; i < n_pages; i++) {
pl[i].page = alloc_page(GFP_KERNEL);
if (!pl[i].page) {
- dm_integrity_free_page_list(ic, pl);
+ dm_integrity_free_page_list(pl);
return NULL;
}
if (i)
pl[i - 1].next = &pl[i];
}
+ pl[i].page = NULL;
+ pl[i].next = NULL;
return pl;
}
kvfree(sl);
}
-static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic, struct page_list *pl)
+static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic,
+ struct page_list *pl)
{
struct scatterlist **sl;
unsigned i;
unsigned idx;
page_list_location(ic, i, 0, &start_index, &start_offset);
- page_list_location(ic, i, ic->journal_section_sectors - 1, &end_index, &end_offset);
+ page_list_location(ic, i, ic->journal_section_sectors - 1,
+ &end_index, &end_offset);
n_pages = (end_index - start_index + 1);
}
ic->journal_pages = journal_pages;
- ic->journal = dm_integrity_alloc_page_list(ic);
+ ic->journal = dm_integrity_alloc_page_list(ic->journal_pages);
if (!ic->journal) {
*error = "Could not allocate memory for journal";
r = -ENOMEM;
DEBUG_print("cipher %s, block size %u iv size %u\n",
ic->journal_crypt_alg.alg_string, blocksize, ivsize);
- ic->journal_io = dm_integrity_alloc_page_list(ic);
+ ic->journal_io = dm_integrity_alloc_page_list(ic->journal_pages);
if (!ic->journal_io) {
*error = "Could not allocate memory for journal io";
r = -ENOMEM;
goto bad;
}
- ic->journal_xor = dm_integrity_alloc_page_list(ic);
+ ic->journal_xor = dm_integrity_alloc_page_list(ic->journal_pages);
if (!ic->journal_xor) {
*error = "Could not allocate memory for journal xor";
r = -ENOMEM;
sg_set_buf(&sg[i], &ic->commit_ids, sizeof ic->commit_ids);
memset(crypt_iv, 0x00, ivsize);
- skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, crypt_iv);
+ skcipher_request_set_crypt(req, sg, sg,
+ PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, crypt_iv);
init_completion(&comp.comp);
comp.in_flight = (atomic_t)ATOMIC_INIT(1);
if (do_crypt(true, req, &comp))
* device
* offset from the start of the device
* tag size
- * D - direct writes, J - journal writes, R - recovery mode
+ * D - direct writes, J - journal writes, B - bitmap mode, R - recovery mode
* number of optional arguments
* optional arguments:
* journal_sectors
* buffer_sectors
* journal_watermark
* commit_time
+ * meta_device
+ * block_size
+ * sectors_per_bit
+ * bitmap_flush_interval
* internal_hash
* journal_crypt
* journal_mac
- * block_size
+ * recalculate
*/
static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
{
{0, 9, "Invalid number of feature args"},
};
unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec;
- bool recalculate;
bool should_write_sb;
__u64 threshold;
unsigned long long start;
+ __s8 log2_sectors_per_bitmap_bit = -1;
+ __s8 log2_blocks_per_bitmap_bit;
+ __u64 bits_in_journal;
+ __u64 n_bitmap_bits;
#define DIRECT_ARGUMENTS 4
init_waitqueue_head(&ic->copy_to_journal_wait);
init_completion(&ic->crypto_backoff);
atomic64_set(&ic->number_of_mismatches, 0);
+ ic->bitmap_flush_interval = BITMAP_FLUSH_INTERVAL;
r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ic->dev);
if (r) {
}
}
- if (!strcmp(argv[3], "J") || !strcmp(argv[3], "D") || !strcmp(argv[3], "R"))
+ if (!strcmp(argv[3], "J") || !strcmp(argv[3], "B") ||
+ !strcmp(argv[3], "D") || !strcmp(argv[3], "R")) {
ic->mode = argv[3][0];
- else {
- ti->error = "Invalid mode (expecting J, D, R)";
+ } else {
+ ti->error = "Invalid mode (expecting J, B, D, R)";
r = -EINVAL;
goto bad;
}
buffer_sectors = DEFAULT_BUFFER_SECTORS;
journal_watermark = DEFAULT_JOURNAL_WATERMARK;
sync_msec = DEFAULT_SYNC_MSEC;
- recalculate = false;
ic->sectors_per_block = 1;
as.argc = argc - DIRECT_ARGUMENTS;
while (extra_args--) {
const char *opt_string;
unsigned val;
+ unsigned long long llval;
opt_string = dm_shift_arg(&as);
if (!opt_string) {
r = -EINVAL;
dm_put_device(ti, ic->meta_dev);
ic->meta_dev = NULL;
}
- r = dm_get_device(ti, strchr(opt_string, ':') + 1, dm_table_get_mode(ti->table), &ic->meta_dev);
+ r = dm_get_device(ti, strchr(opt_string, ':') + 1,
+ dm_table_get_mode(ti->table), &ic->meta_dev);
if (r) {
ti->error = "Device lookup failed";
goto bad;
goto bad;
}
ic->sectors_per_block = val >> SECTOR_SHIFT;
+ } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) {
+ log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval);
+ } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) {
+ if (val >= (uint64_t)UINT_MAX * 1000 / HZ) {
+ r = -EINVAL;
+ ti->error = "Invalid bitmap_flush_interval argument";
+ }
+ ic->bitmap_flush_interval = msecs_to_jiffies(val);
} else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error,
"Invalid internal_hash argument");
if (r)
goto bad;
} else if (!strcmp(opt_string, "recalculate")) {
- recalculate = true;
+ ic->recalculate_flag = true;
} else {
r = -EINVAL;
ti->error = "Invalid argument";
if (!journal_sectors) {
journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS,
- ic->data_device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR);
+ ic->data_device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR);
}
if (!buffer_sectors)
else
ic->log2_tag_size = -1;
+ if (ic->mode == 'B' && !ic->internal_hash) {
+ r = -EINVAL;
+ ti->error = "Bitmap mode can be only used with internal hash";
+ goto bad;
+ }
+
ic->autocommit_jiffies = msecs_to_jiffies(sync_msec);
ic->autocommit_msec = sync_msec;
timer_setup(&ic->autocommit_timer, autocommit_fn, 0);
}
INIT_WORK(&ic->commit_work, integrity_commit);
- if (ic->mode == 'J') {
+ if (ic->mode == 'J' || ic->mode == 'B') {
ic->writer_wq = alloc_workqueue("dm-integrity-writer", WQ_MEM_RECLAIM, 1);
if (!ic->writer_wq) {
ti->error = "Cannot allocate workqueue";
should_write_sb = true;
}
- if (!ic->sb->version || ic->sb->version > SB_VERSION_2) {
+ if (!ic->sb->version || ic->sb->version > SB_VERSION_3) {
r = -EINVAL;
ti->error = "Unknown version";
goto bad;
ti->error = "The device is too small";
goto bad;
}
+
+ if (log2_sectors_per_bitmap_bit < 0)
+ log2_sectors_per_bitmap_bit = __fls(DEFAULT_SECTORS_PER_BITMAP_BIT);
+ if (log2_sectors_per_bitmap_bit < ic->sb->log2_sectors_per_block)
+ log2_sectors_per_bitmap_bit = ic->sb->log2_sectors_per_block;
+
+ bits_in_journal = ((__u64)ic->journal_section_sectors * ic->journal_sections) << (SECTOR_SHIFT + 3);
+ if (bits_in_journal > UINT_MAX)
+ bits_in_journal = UINT_MAX;
+ while (bits_in_journal < (ic->provided_data_sectors + ((sector_t)1 << log2_sectors_per_bitmap_bit) - 1) >> log2_sectors_per_bitmap_bit)
+ log2_sectors_per_bitmap_bit++;
+
+ log2_blocks_per_bitmap_bit = log2_sectors_per_bitmap_bit - ic->sb->log2_sectors_per_block;
+ ic->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit;
+ if (should_write_sb) {
+ ic->sb->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit;
+ }
+ n_bitmap_bits = ((ic->provided_data_sectors >> ic->sb->log2_sectors_per_block)
+ + (((sector_t)1 << log2_blocks_per_bitmap_bit) - 1)) >> log2_blocks_per_bitmap_bit;
+ ic->n_bitmap_blocks = DIV_ROUND_UP(n_bitmap_bits, BITMAP_BLOCK_SIZE * 8);
+
if (!ic->meta_dev)
ic->log2_buffer_sectors = min(ic->log2_buffer_sectors, (__u8)__ffs(ic->metadata_run));
DEBUG_print(" journal_sections %u\n", (unsigned)le32_to_cpu(ic->sb->journal_sections));
DEBUG_print(" journal_entries %u\n", ic->journal_entries);
DEBUG_print(" log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors);
- DEBUG_print(" device_sectors 0x%llx\n", (unsigned long long)ic->device_sectors);
+ DEBUG_print(" data_device_sectors 0x%llx\n", i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT);
DEBUG_print(" initial_sectors 0x%x\n", ic->initial_sectors);
DEBUG_print(" metadata_run 0x%x\n", ic->metadata_run);
DEBUG_print(" log2_metadata_run %d\n", ic->log2_metadata_run);
DEBUG_print(" provided_data_sectors 0x%llx (%llu)\n", (unsigned long long)ic->provided_data_sectors,
(unsigned long long)ic->provided_data_sectors);
DEBUG_print(" log2_buffer_sectors %u\n", ic->log2_buffer_sectors);
+ DEBUG_print(" bits_in_journal %llu\n", (unsigned long long)bits_in_journal);
- if (recalculate && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) {
+ if (ic->recalculate_flag && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) {
ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
ic->sb->recalc_sector = cpu_to_le64(0);
}
- if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
- if (!ic->internal_hash) {
- r = -EINVAL;
- ti->error = "Recalculate is only valid with internal hash";
- goto bad;
- }
+ if (ic->internal_hash) {
ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1);
if (!ic->recalc_wq ) {
ti->error = "Cannot allocate workqueue";
r = create_journal(ic, &ti->error);
if (r)
goto bad;
+
+ }
+
+ if (ic->mode == 'B') {
+ unsigned i;
+ unsigned n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE);
+
+ ic->recalc_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages);
+ if (!ic->recalc_bitmap) {
+ r = -ENOMEM;
+ goto bad;
+ }
+ ic->may_write_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages);
+ if (!ic->may_write_bitmap) {
+ r = -ENOMEM;
+ goto bad;
+ }
+ ic->bbs = kvmalloc_array(ic->n_bitmap_blocks, sizeof(struct bitmap_block_status), GFP_KERNEL);
+ if (!ic->bbs) {
+ r = -ENOMEM;
+ goto bad;
+ }
+ INIT_DELAYED_WORK(&ic->bitmap_flush_work, bitmap_flush_work);
+ for (i = 0; i < ic->n_bitmap_blocks; i++) {
+ struct bitmap_block_status *bbs = &ic->bbs[i];
+ unsigned sector, pl_index, pl_offset;
+
+ INIT_WORK(&bbs->work, bitmap_block_work);
+ bbs->ic = ic;
+ bbs->idx = i;
+ bio_list_init(&bbs->bio_queue);
+ spin_lock_init(&bbs->bio_queue_lock);
+
+ sector = i * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT);
+ pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
+ pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
+
+ bbs->bitmap = lowmem_page_address(ic->journal[pl_index].page) + pl_offset;
+ }
}
if (should_write_sb) {
if (r)
goto bad;
}
+ if (ic->mode == 'B') {
+ unsigned max_io_len = ((sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit) * (BITMAP_BLOCK_SIZE * 8);
+ if (!max_io_len)
+ max_io_len = 1U << 31;
+ DEBUG_print("max_io_len: old %u, new %u\n", ti->max_io_len, max_io_len);
+ if (!ti->max_io_len || ti->max_io_len > max_io_len) {
+ r = dm_set_target_max_io_len(ti, max_io_len);
+ if (r)
+ goto bad;
+ }
+ }
if (!ic->internal_hash)
dm_integrity_set(ti, ic);
ti->flush_supported = true;
return 0;
+
bad:
dm_integrity_dtr(ti);
return r;
destroy_workqueue(ic->writer_wq);
if (ic->recalc_wq)
destroy_workqueue(ic->recalc_wq);
- if (ic->recalc_buffer)
- vfree(ic->recalc_buffer);
- if (ic->recalc_tags)
- kvfree(ic->recalc_tags);
+ vfree(ic->recalc_buffer);
+ kvfree(ic->recalc_tags);
+ kvfree(ic->bbs);
if (ic->bufio)
dm_bufio_client_destroy(ic->bufio);
mempool_exit(&ic->journal_io_mempool);
dm_put_device(ti, ic->dev);
if (ic->meta_dev)
dm_put_device(ti, ic->meta_dev);
- dm_integrity_free_page_list(ic, ic->journal);
- dm_integrity_free_page_list(ic, ic->journal_io);
- dm_integrity_free_page_list(ic, ic->journal_xor);
+ dm_integrity_free_page_list(ic->journal);
+ dm_integrity_free_page_list(ic->journal_io);
+ dm_integrity_free_page_list(ic->journal_xor);
+ dm_integrity_free_page_list(ic->recalc_bitmap);
+ dm_integrity_free_page_list(ic->may_write_bitmap);
if (ic->journal_scatterlist)
dm_integrity_free_journal_scatterlist(ic, ic->journal_scatterlist);
if (ic->journal_io_scatterlist)
static struct target_type integrity_target = {
.name = "integrity",
- .version = {1, 2, 0},
+ .version = {1, 3, 0},
.module = THIS_MODULE,
.features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
.ctr = dm_integrity_ctr,
/* alloc table */
r = dm_table_create(&t, get_mode(dmi), dmi->target_count, md);
if (r)
- goto err_destroy_dm;
+ goto err_hash_remove;
/* add targets */
for (i = 0; i < dmi->target_count; i++) {
err_destroy_table:
dm_table_destroy(t);
+err_hash_remove:
+ (void) __hash_remove(__get_name_cell(dmi->name));
+ /* release reference from __get_name_cell */
+ dm_put(md);
err_destroy_dm:
dm_put(md);
dm_destroy(md);
return DM_MAPIO_REMAPPED;
}
-static void multipath_release_clone(struct request *clone)
+static void multipath_release_clone(struct request *clone,
+ union map_info *map_context)
{
+ if (unlikely(map_context)) {
+ /*
+ * non-NULL map_context means caller is still map
+ * method; must undo multipath_clone_and_map()
+ */
+ struct dm_mpath_io *mpio = get_mpio(map_context);
+ struct pgpath *pgpath = mpio->pgpath;
+
+ if (pgpath && pgpath->pg->ps.type->end_io)
+ pgpath->pg->ps.type->end_io(&pgpath->pg->ps,
+ &pgpath->path,
+ mpio->nr_bytes);
+ }
+
blk_put_request(clone);
}
if (attached_handler_name || m->hw_handler_name) {
INIT_DELAYED_WORK(&p->activate_path, activate_path_work);
r = setup_scsi_dh(p->path.dev->bdev, m, &attached_handler_name, &ti->error);
+ kfree(attached_handler_name);
if (r) {
dm_put_device(ti, p->path.dev);
goto bad;
return p;
bad:
- kfree(attached_handler_name);
free_pgpath(p);
return ERR_PTR(r);
}
struct request *rq = tio->orig;
blk_rq_unprep_clone(clone);
- tio->ti->type->release_clone_rq(clone);
+ tio->ti->type->release_clone_rq(clone, NULL);
rq_end_stats(md, rq);
blk_mq_end_request(rq, error);
rq_end_stats(md, rq);
if (tio->clone) {
blk_rq_unprep_clone(tio->clone);
- tio->ti->type->release_clone_rq(tio->clone);
+ tio->ti->type->release_clone_rq(tio->clone, NULL);
}
dm_mq_delay_requeue_request(rq, delay_ms);
case DM_MAPIO_REMAPPED:
if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
/* -ENOMEM */
- ti->type->release_clone_rq(clone);
+ ti->type->release_clone_rq(clone, &tio->info);
return DM_MAPIO_REQUEUE;
}
ret = dm_dispatch_clone_request(clone, rq);
if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
blk_rq_unprep_clone(clone);
- tio->ti->type->release_clone_rq(clone);
+ tio->ti->type->release_clone_rq(clone, &tio->info);
tio->clone = NULL;
return DM_MAPIO_REQUEUE;
}
#include <linux/init.h>
#include <linux/kdev_t.h>
#include <linux/list.h>
+#include <linux/list_bl.h>
#include <linux/mempool.h>
#include <linux/module.h>
#include <linux/slab.h>
struct dm_exception_table {
uint32_t hash_mask;
unsigned hash_shift;
- struct list_head *table;
+ struct hlist_bl_head *table;
};
struct dm_snapshot {
- struct mutex lock;
+ struct rw_semaphore lock;
struct dm_dev *origin;
struct dm_dev *cow;
atomic_t pending_exceptions_count;
- /* Protected by "lock" */
+ spinlock_t pe_allocation_lock;
+
+ /* Protected by "pe_allocation_lock" */
sector_t exception_start_sequence;
/* Protected by kcopyd single-threaded callback */
if (!bdev_equal(s->cow->bdev, snap->cow->bdev))
continue;
- mutex_lock(&s->lock);
+ down_read(&s->lock);
active = s->active;
- mutex_unlock(&s->lock);
+ up_read(&s->lock);
if (active) {
if (snap_src)
* The lowest hash_shift bits of the chunk number are ignored, allowing
* some consecutive chunks to be grouped together.
*/
+static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk);
+
+/* Lock to protect access to the completed and pending exception hash tables. */
+struct dm_exception_table_lock {
+ struct hlist_bl_head *complete_slot;
+ struct hlist_bl_head *pending_slot;
+};
+
+static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk,
+ struct dm_exception_table_lock *lock)
+{
+ struct dm_exception_table *complete = &s->complete;
+ struct dm_exception_table *pending = &s->pending;
+
+ lock->complete_slot = &complete->table[exception_hash(complete, chunk)];
+ lock->pending_slot = &pending->table[exception_hash(pending, chunk)];
+}
+
+static void dm_exception_table_lock(struct dm_exception_table_lock *lock)
+{
+ hlist_bl_lock(lock->complete_slot);
+ hlist_bl_lock(lock->pending_slot);
+}
+
+static void dm_exception_table_unlock(struct dm_exception_table_lock *lock)
+{
+ hlist_bl_unlock(lock->pending_slot);
+ hlist_bl_unlock(lock->complete_slot);
+}
+
static int dm_exception_table_init(struct dm_exception_table *et,
uint32_t size, unsigned hash_shift)
{
et->hash_shift = hash_shift;
et->hash_mask = size - 1;
- et->table = dm_vcalloc(size, sizeof(struct list_head));
+ et->table = dm_vcalloc(size, sizeof(struct hlist_bl_head));
if (!et->table)
return -ENOMEM;
for (i = 0; i < size; i++)
- INIT_LIST_HEAD(et->table + i);
+ INIT_HLIST_BL_HEAD(et->table + i);
return 0;
}
static void dm_exception_table_exit(struct dm_exception_table *et,
struct kmem_cache *mem)
{
- struct list_head *slot;
- struct dm_exception *ex, *next;
+ struct hlist_bl_head *slot;
+ struct dm_exception *ex;
+ struct hlist_bl_node *pos, *n;
int i, size;
size = et->hash_mask + 1;
for (i = 0; i < size; i++) {
slot = et->table + i;
- list_for_each_entry_safe (ex, next, slot, hash_list)
+ hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list)
kmem_cache_free(mem, ex);
}
static void dm_remove_exception(struct dm_exception *e)
{
- list_del(&e->hash_list);
+ hlist_bl_del(&e->hash_list);
}
/*
static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et,
chunk_t chunk)
{
- struct list_head *slot;
+ struct hlist_bl_head *slot;
+ struct hlist_bl_node *pos;
struct dm_exception *e;
slot = &et->table[exception_hash(et, chunk)];
- list_for_each_entry (e, slot, hash_list)
+ hlist_bl_for_each_entry(e, pos, slot, hash_list)
if (chunk >= e->old_chunk &&
chunk <= e->old_chunk + dm_consecutive_chunk_count(e))
return e;
static void dm_insert_exception(struct dm_exception_table *eh,
struct dm_exception *new_e)
{
- struct list_head *l;
+ struct hlist_bl_head *l;
+ struct hlist_bl_node *pos;
struct dm_exception *e = NULL;
l = &eh->table[exception_hash(eh, new_e->old_chunk)];
goto out;
/* List is ordered by old_chunk */
- list_for_each_entry_reverse(e, l, hash_list) {
+ hlist_bl_for_each_entry(e, pos, l, hash_list) {
/* Insert after an existing chunk? */
if (new_e->old_chunk == (e->old_chunk +
dm_consecutive_chunk_count(e) + 1) &&
return;
}
- if (new_e->old_chunk > e->old_chunk)
+ if (new_e->old_chunk < e->old_chunk)
break;
}
out:
- list_add(&new_e->hash_list, e ? &e->hash_list : l);
+ if (!e) {
+ /*
+ * Either the table doesn't support consecutive chunks or slot
+ * l is empty.
+ */
+ hlist_bl_add_head(&new_e->hash_list, l);
+ } else if (new_e->old_chunk < e->old_chunk) {
+ /* Add before an existing exception */
+ hlist_bl_add_before(&new_e->hash_list, &e->hash_list);
+ } else {
+ /* Add to l's tail: e is the last exception in this slot */
+ hlist_bl_add_behind(&new_e->hash_list, &e->hash_list);
+ }
}
/*
*/
static int dm_add_exception(void *context, chunk_t old, chunk_t new)
{
+ struct dm_exception_table_lock lock;
struct dm_snapshot *s = context;
struct dm_exception *e;
/* Consecutive_count is implicitly initialised to zero */
e->new_chunk = new;
+ /*
+ * Although there is no need to lock access to the exception tables
+ * here, if we don't then hlist_bl_add_head(), called by
+ * dm_insert_exception(), will complain about accessing the
+ * corresponding list without locking it first.
+ */
+ dm_exception_table_lock_init(s, old, &lock);
+
+ dm_exception_table_lock(&lock);
dm_insert_exception(&s->complete, e);
+ dm_exception_table_unlock(&lock);
return 0;
}
{
/* use a fixed size of 2MB */
unsigned long mem = 2 * 1024 * 1024;
- mem /= sizeof(struct list_head);
+ mem /= sizeof(struct hlist_bl_head);
return mem;
}
int r;
chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1;
- mutex_lock(&s->lock);
+ down_write(&s->lock);
/*
* Process chunks (and associated exceptions) in reverse order
b = __release_queued_bios_after_merge(s);
out:
- mutex_unlock(&s->lock);
+ up_write(&s->lock);
if (b)
flush_bios(b);
if (linear_chunks < 0) {
DMERR("Read error in exception store: "
"shutting down merge");
- mutex_lock(&s->lock);
+ down_write(&s->lock);
s->merge_failed = 1;
- mutex_unlock(&s->lock);
+ up_write(&s->lock);
}
goto shut;
}
previous_count = read_pending_exceptions_done_count();
}
- mutex_lock(&s->lock);
+ down_write(&s->lock);
s->first_merging_chunk = old_chunk;
s->num_merging_chunks = linear_chunks;
- mutex_unlock(&s->lock);
+ up_write(&s->lock);
/* Wait until writes to all 'linear_chunks' drain */
for (i = 0; i < linear_chunks; i++)
return;
shut:
- mutex_lock(&s->lock);
+ down_write(&s->lock);
s->merge_failed = 1;
b = __release_queued_bios_after_merge(s);
- mutex_unlock(&s->lock);
+ up_write(&s->lock);
error_bios(b);
merge_shutdown(s);
s->snapshot_overflowed = 0;
s->active = 0;
atomic_set(&s->pending_exceptions_count, 0);
+ spin_lock_init(&s->pe_allocation_lock);
s->exception_start_sequence = 0;
s->exception_complete_sequence = 0;
s->out_of_order_tree = RB_ROOT;
- mutex_init(&s->lock);
+ init_rwsem(&s->lock);
INIT_LIST_HEAD(&s->list);
spin_lock_init(&s->pe_lock);
s->state_bits = 0;
/* Check whether exception handover must be cancelled */
(void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
if (snap_src && snap_dest && (s == snap_src)) {
- mutex_lock(&snap_dest->lock);
+ down_write(&snap_dest->lock);
snap_dest->valid = 0;
- mutex_unlock(&snap_dest->lock);
+ up_write(&snap_dest->lock);
DMERR("Cancelling snapshot handover.");
}
up_read(&_origins_lock);
dm_exception_store_destroy(s->store);
- mutex_destroy(&s->lock);
-
dm_put_device(ti, s->cow);
dm_put_device(ti, s->origin);
dm_table_event(s->ti->table);
}
+static void invalidate_snapshot(struct dm_snapshot *s, int err)
+{
+ down_write(&s->lock);
+ __invalidate_snapshot(s, err);
+ up_write(&s->lock);
+}
+
static void pending_complete(void *context, int success)
{
struct dm_snap_pending_exception *pe = context;
struct bio *origin_bios = NULL;
struct bio *snapshot_bios = NULL;
struct bio *full_bio = NULL;
+ struct dm_exception_table_lock lock;
int error = 0;
+ dm_exception_table_lock_init(s, pe->e.old_chunk, &lock);
+
if (!success) {
/* Read/write error - snapshot is unusable */
- mutex_lock(&s->lock);
- __invalidate_snapshot(s, -EIO);
+ invalidate_snapshot(s, -EIO);
error = 1;
+
+ dm_exception_table_lock(&lock);
goto out;
}
e = alloc_completed_exception(GFP_NOIO);
if (!e) {
- mutex_lock(&s->lock);
- __invalidate_snapshot(s, -ENOMEM);
+ invalidate_snapshot(s, -ENOMEM);
error = 1;
+
+ dm_exception_table_lock(&lock);
goto out;
}
*e = pe->e;
- mutex_lock(&s->lock);
+ down_read(&s->lock);
+ dm_exception_table_lock(&lock);
if (!s->valid) {
+ up_read(&s->lock);
free_completed_exception(e);
error = 1;
+
goto out;
}
- /* Check for conflicting reads */
- __check_for_conflicting_io(s, pe->e.old_chunk);
-
/*
- * Add a proper exception, and remove the
- * in-flight exception from the list.
+ * Add a proper exception. After inserting the completed exception all
+ * subsequent snapshot reads to this chunk will be redirected to the
+ * COW device. This ensures that we do not starve. Moreover, as long
+ * as the pending exception exists, neither origin writes nor snapshot
+ * merging can overwrite the chunk in origin.
*/
dm_insert_exception(&s->complete, e);
+ up_read(&s->lock);
+
+ /* Wait for conflicting reads to drain */
+ if (__chunk_is_tracked(s, pe->e.old_chunk)) {
+ dm_exception_table_unlock(&lock);
+ __check_for_conflicting_io(s, pe->e.old_chunk);
+ dm_exception_table_lock(&lock);
+ }
out:
+ /* Remove the in-flight exception from the list */
dm_remove_exception(&pe->e);
+
+ dm_exception_table_unlock(&lock);
+
snapshot_bios = bio_list_get(&pe->snapshot_bios);
origin_bios = bio_list_get(&pe->origin_bios);
full_bio = pe->full_bio;
full_bio->bi_end_io = pe->full_bio_end_io;
increment_pending_exceptions_done_count();
- mutex_unlock(&s->lock);
-
/* Submit any pending write bios */
if (error) {
if (full_bio)
}
/*
- * Looks to see if this snapshot already has a pending exception
- * for this chunk, otherwise it allocates a new one and inserts
- * it into the pending table.
+ * Inserts a pending exception into the pending table.
*
- * NOTE: a write lock must be held on snap->lock before calling
- * this.
+ * NOTE: a write lock must be held on the chunk's pending exception table slot
+ * before calling this.
*/
static struct dm_snap_pending_exception *
-__find_pending_exception(struct dm_snapshot *s,
- struct dm_snap_pending_exception *pe, chunk_t chunk)
+__insert_pending_exception(struct dm_snapshot *s,
+ struct dm_snap_pending_exception *pe, chunk_t chunk)
{
- struct dm_snap_pending_exception *pe2;
-
- pe2 = __lookup_pending_exception(s, chunk);
- if (pe2) {
- free_pending_exception(pe);
- return pe2;
- }
-
pe->e.old_chunk = chunk;
bio_list_init(&pe->origin_bios);
bio_list_init(&pe->snapshot_bios);
pe->started = 0;
pe->full_bio = NULL;
+ spin_lock(&s->pe_allocation_lock);
if (s->store->type->prepare_exception(s->store, &pe->e)) {
+ spin_unlock(&s->pe_allocation_lock);
free_pending_exception(pe);
return NULL;
}
pe->exception_sequence = s->exception_start_sequence++;
+ spin_unlock(&s->pe_allocation_lock);
dm_insert_exception(&s->pending, &pe->e);
return pe;
}
+/*
+ * Looks to see if this snapshot already has a pending exception
+ * for this chunk, otherwise it allocates a new one and inserts
+ * it into the pending table.
+ *
+ * NOTE: a write lock must be held on the chunk's pending exception table slot
+ * before calling this.
+ */
+static struct dm_snap_pending_exception *
+__find_pending_exception(struct dm_snapshot *s,
+ struct dm_snap_pending_exception *pe, chunk_t chunk)
+{
+ struct dm_snap_pending_exception *pe2;
+
+ pe2 = __lookup_pending_exception(s, chunk);
+ if (pe2) {
+ free_pending_exception(pe);
+ return pe2;
+ }
+
+ return __insert_pending_exception(s, pe, chunk);
+}
+
static void remap_exception(struct dm_snapshot *s, struct dm_exception *e,
struct bio *bio, chunk_t chunk)
{
int r = DM_MAPIO_REMAPPED;
chunk_t chunk;
struct dm_snap_pending_exception *pe = NULL;
+ struct dm_exception_table_lock lock;
init_tracked_chunk(bio);
}
chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
+ dm_exception_table_lock_init(s, chunk, &lock);
/* Full snapshots are not usable */
/* To get here the table must be live so s->active is always set. */
if (!s->valid)
return DM_MAPIO_KILL;
- mutex_lock(&s->lock);
+ down_read(&s->lock);
+ dm_exception_table_lock(&lock);
if (!s->valid || (unlikely(s->snapshot_overflowed) &&
bio_data_dir(bio) == WRITE)) {
if (bio_data_dir(bio) == WRITE) {
pe = __lookup_pending_exception(s, chunk);
if (!pe) {
- mutex_unlock(&s->lock);
+ dm_exception_table_unlock(&lock);
pe = alloc_pending_exception(s);
- mutex_lock(&s->lock);
-
- if (!s->valid || s->snapshot_overflowed) {
- free_pending_exception(pe);
- r = DM_MAPIO_KILL;
- goto out_unlock;
- }
+ dm_exception_table_lock(&lock);
e = dm_lookup_exception(&s->complete, chunk);
if (e) {
pe = __find_pending_exception(s, pe, chunk);
if (!pe) {
+ dm_exception_table_unlock(&lock);
+ up_read(&s->lock);
+
+ down_write(&s->lock);
+
if (s->store->userspace_supports_overflow) {
- s->snapshot_overflowed = 1;
- DMERR("Snapshot overflowed: Unable to allocate exception.");
+ if (s->valid && !s->snapshot_overflowed) {
+ s->snapshot_overflowed = 1;
+ DMERR("Snapshot overflowed: Unable to allocate exception.");
+ }
} else
__invalidate_snapshot(s, -ENOMEM);
+ up_write(&s->lock);
+
r = DM_MAPIO_KILL;
- goto out_unlock;
+ goto out;
}
}
bio->bi_iter.bi_size ==
(s->store->chunk_size << SECTOR_SHIFT)) {
pe->started = 1;
- mutex_unlock(&s->lock);
+
+ dm_exception_table_unlock(&lock);
+ up_read(&s->lock);
+
start_full_bio(pe, bio);
goto out;
}
bio_list_add(&pe->snapshot_bios, bio);
if (!pe->started) {
- /* this is protected by snap->lock */
+ /* this is protected by the exception table lock */
pe->started = 1;
- mutex_unlock(&s->lock);
+
+ dm_exception_table_unlock(&lock);
+ up_read(&s->lock);
+
start_copy(pe);
goto out;
}
}
out_unlock:
- mutex_unlock(&s->lock);
+ dm_exception_table_unlock(&lock);
+ up_read(&s->lock);
out:
return r;
}
chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
- mutex_lock(&s->lock);
+ down_write(&s->lock);
/* Full merging snapshots are redirected to the origin */
if (!s->valid)
bio_set_dev(bio, s->origin->bdev);
if (bio_data_dir(bio) == WRITE) {
- mutex_unlock(&s->lock);
+ up_write(&s->lock);
return do_origin(s->origin, bio);
}
out_unlock:
- mutex_unlock(&s->lock);
+ up_write(&s->lock);
return r;
}
down_read(&_origins_lock);
(void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
if (snap_src && snap_dest) {
- mutex_lock(&snap_src->lock);
+ down_read(&snap_src->lock);
if (s == snap_src) {
DMERR("Unable to resume snapshot source until "
"handover completes.");
"source is suspended.");
r = -EINVAL;
}
- mutex_unlock(&snap_src->lock);
+ up_read(&snap_src->lock);
}
up_read(&_origins_lock);
(void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
if (snap_src && snap_dest) {
- mutex_lock(&snap_src->lock);
- mutex_lock_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING);
+ down_write(&snap_src->lock);
+ down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING);
__handover_exceptions(snap_src, snap_dest);
- mutex_unlock(&snap_dest->lock);
- mutex_unlock(&snap_src->lock);
+ up_write(&snap_dest->lock);
+ up_write(&snap_src->lock);
}
up_read(&_origins_lock);
/* Now we have correct chunk size, reregister */
reregister_snapshot(s);
- mutex_lock(&s->lock);
+ down_write(&s->lock);
s->active = 1;
- mutex_unlock(&s->lock);
+ up_write(&s->lock);
}
static uint32_t get_origin_minimum_chunksize(struct block_device *bdev)
switch (type) {
case STATUSTYPE_INFO:
- mutex_lock(&snap->lock);
+ down_write(&snap->lock);
if (!snap->valid)
DMEMIT("Invalid");
DMEMIT("Unknown");
}
- mutex_unlock(&snap->lock);
+ up_write(&snap->lock);
break;
int r = DM_MAPIO_REMAPPED;
struct dm_snapshot *snap;
struct dm_exception *e;
- struct dm_snap_pending_exception *pe;
+ struct dm_snap_pending_exception *pe, *pe2;
struct dm_snap_pending_exception *pe_to_start_now = NULL;
struct dm_snap_pending_exception *pe_to_start_last = NULL;
+ struct dm_exception_table_lock lock;
chunk_t chunk;
/* Do all the snapshots on this origin */
if (dm_target_is_snapshot_merge(snap->ti))
continue;
- mutex_lock(&snap->lock);
-
- /* Only deal with valid and active snapshots */
- if (!snap->valid || !snap->active)
- goto next_snapshot;
-
/* Nothing to do if writing beyond end of snapshot */
if (sector >= dm_table_get_size(snap->ti->table))
- goto next_snapshot;
+ continue;
/*
* Remember, different snapshots can have
* different chunk sizes.
*/
chunk = sector_to_chunk(snap->store, sector);
+ dm_exception_table_lock_init(snap, chunk, &lock);
- /*
- * Check exception table to see if block
- * is already remapped in this snapshot
- * and trigger an exception if not.
- */
- e = dm_lookup_exception(&snap->complete, chunk);
- if (e)
+ down_read(&snap->lock);
+ dm_exception_table_lock(&lock);
+
+ /* Only deal with valid and active snapshots */
+ if (!snap->valid || !snap->active)
goto next_snapshot;
pe = __lookup_pending_exception(snap, chunk);
if (!pe) {
- mutex_unlock(&snap->lock);
- pe = alloc_pending_exception(snap);
- mutex_lock(&snap->lock);
-
- if (!snap->valid) {
- free_pending_exception(pe);
- goto next_snapshot;
- }
-
+ /*
+ * Check exception table to see if block is already
+ * remapped in this snapshot and trigger an exception
+ * if not.
+ */
e = dm_lookup_exception(&snap->complete, chunk);
- if (e) {
- free_pending_exception(pe);
+ if (e)
goto next_snapshot;
- }
- pe = __find_pending_exception(snap, pe, chunk);
- if (!pe) {
- __invalidate_snapshot(snap, -ENOMEM);
- goto next_snapshot;
+ dm_exception_table_unlock(&lock);
+ pe = alloc_pending_exception(snap);
+ dm_exception_table_lock(&lock);
+
+ pe2 = __lookup_pending_exception(snap, chunk);
+
+ if (!pe2) {
+ e = dm_lookup_exception(&snap->complete, chunk);
+ if (e) {
+ free_pending_exception(pe);
+ goto next_snapshot;
+ }
+
+ pe = __insert_pending_exception(snap, pe, chunk);
+ if (!pe) {
+ dm_exception_table_unlock(&lock);
+ up_read(&snap->lock);
+
+ invalidate_snapshot(snap, -ENOMEM);
+ continue;
+ }
+ } else {
+ free_pending_exception(pe);
+ pe = pe2;
}
}
}
next_snapshot:
- mutex_unlock(&snap->lock);
+ dm_exception_table_unlock(&lock);
+ up_read(&snap->lock);
if (pe_to_start_now) {
start_copy(pe_to_start_now);
return DM_MAPIO_KILL;
}
-static void io_err_release_clone_rq(struct request *clone)
+static void io_err_release_clone_rq(struct request *clone,
+ union map_info *map_context)
{
}
*/
bool fail_io:1;
+ /*
+ * Set once a thin-pool has been accessed through one of the interfaces
+ * that imply the pool is in-service (e.g. thin devices created/deleted,
+ * thin-pool message, metadata snapshots, etc).
+ */
+ bool in_service:1;
+
/*
* Reading the space map roots can fail, so we read it into these
* buffers before the superblock is locked and updated.
/*----------------------------------------------------------------*/
+/*
+ * Variant that is used for in-core only changes or code that
+ * shouldn't put the pool in service on its own (e.g. commit).
+ */
+static inline void __pmd_write_lock(struct dm_pool_metadata *pmd)
+ __acquires(pmd->root_lock)
+{
+ down_write(&pmd->root_lock);
+}
+#define pmd_write_lock_in_core(pmd) __pmd_write_lock((pmd))
+
+static inline void pmd_write_lock(struct dm_pool_metadata *pmd)
+{
+ __pmd_write_lock(pmd);
+ if (unlikely(!pmd->in_service))
+ pmd->in_service = true;
+}
+
+static inline void pmd_write_unlock(struct dm_pool_metadata *pmd)
+ __releases(pmd->root_lock)
+{
+ up_write(&pmd->root_lock);
+}
+
+/*----------------------------------------------------------------*/
+
static int superblock_lock_zero(struct dm_pool_metadata *pmd,
struct dm_block **sblock)
{
*/
BUILD_BUG_ON(sizeof(struct thin_disk_superblock) > 512);
+ if (unlikely(!pmd->in_service))
+ return 0;
+
r = __write_changed_details(pmd);
if (r < 0)
return r;
pmd->time = 0;
INIT_LIST_HEAD(&pmd->thin_devices);
pmd->fail_io = false;
+ pmd->in_service = false;
pmd->bdev = bdev;
pmd->data_block_size = data_block_size;
DMWARN("%s: __commit_transaction() failed, error = %d",
__func__, r);
}
-
if (!pmd->fail_io)
__destroy_persistent_data_objects(pmd);
{
int r = -EINVAL;
- down_write(&pmd->root_lock);
+ pmd_write_lock(pmd);
if (!pmd->fail_io)
r = __create_thin(pmd, dev);
- up_write(&pmd->root_lock);
+ pmd_write_unlock(pmd);
return r;
}
{
int r = -EINVAL;
- down_write(&pmd->root_lock);
+ pmd_write_lock(pmd);
if (!pmd->fail_io)
r = __create_snap(pmd, dev, origin);
- up_write(&pmd->root_lock);
+ pmd_write_unlock(pmd);
return r;
}
{
int r = -EINVAL;
- down_write(&pmd->root_lock);
+ pmd_write_lock(pmd);
if (!pmd->fail_io)
r = __delete_device(pmd, dev);
- up_write(&pmd->root_lock);
+ pmd_write_unlock(pmd);
return r;
}
{
int r = -EINVAL;
- down_write(&pmd->root_lock);
+ pmd_write_lock(pmd);
if (pmd->fail_io)
goto out;
r = 0;
out:
- up_write(&pmd->root_lock);
+ pmd_write_unlock(pmd);
return r;
}
* We commit to ensure the btree roots which we increment in a
* moment are up to date.
*/
- __commit_transaction(pmd);
+ r = __commit_transaction(pmd);
+ if (r < 0) {
+ DMWARN("%s: __commit_transaction() failed, error = %d",
+ __func__, r);
+ return r;
+ }
/*
* Copy the superblock.
{
int r = -EINVAL;
- down_write(&pmd->root_lock);
+ pmd_write_lock(pmd);
if (!pmd->fail_io)
r = __reserve_metadata_snap(pmd);
- up_write(&pmd->root_lock);
+ pmd_write_unlock(pmd);
return r;
}
{
int r = -EINVAL;
- down_write(&pmd->root_lock);
+ pmd_write_lock(pmd);
if (!pmd->fail_io)
r = __release_metadata_snap(pmd);
- up_write(&pmd->root_lock);
+ pmd_write_unlock(pmd);
return r;
}
{
int r = -EINVAL;
- down_write(&pmd->root_lock);
+ pmd_write_lock_in_core(pmd);
if (!pmd->fail_io)
r = __open_device(pmd, dev, 0, td);
- up_write(&pmd->root_lock);
+ pmd_write_unlock(pmd);
return r;
}
int dm_pool_close_thin_device(struct dm_thin_device *td)
{
- down_write(&td->pmd->root_lock);
+ pmd_write_lock_in_core(td->pmd);
__close_device(td);
- up_write(&td->pmd->root_lock);
+ pmd_write_unlock(td->pmd);
return 0;
}
{
int r = -EINVAL;
- down_write(&td->pmd->root_lock);
+ pmd_write_lock(td->pmd);
if (!td->pmd->fail_io)
r = __insert(td, block, data_block);
- up_write(&td->pmd->root_lock);
+ pmd_write_unlock(td->pmd);
return r;
}
{
int r = -EINVAL;
- down_write(&td->pmd->root_lock);
+ pmd_write_lock(td->pmd);
if (!td->pmd->fail_io)
r = __remove(td, block);
- up_write(&td->pmd->root_lock);
+ pmd_write_unlock(td->pmd);
return r;
}
{
int r = -EINVAL;
- down_write(&td->pmd->root_lock);
+ pmd_write_lock(td->pmd);
if (!td->pmd->fail_io)
r = __remove_range(td, begin, end);
- up_write(&td->pmd->root_lock);
+ pmd_write_unlock(td->pmd);
return r;
}
{
int r = 0;
- down_write(&pmd->root_lock);
+ pmd_write_lock(pmd);
for (; b != e; b++) {
r = dm_sm_inc_block(pmd->data_sm, b);
if (r)
break;
}
- up_write(&pmd->root_lock);
+ pmd_write_unlock(pmd);
return r;
}
{
int r = 0;
- down_write(&pmd->root_lock);
+ pmd_write_lock(pmd);
for (; b != e; b++) {
r = dm_sm_dec_block(pmd->data_sm, b);
if (r)
break;
}
- up_write(&pmd->root_lock);
+ pmd_write_unlock(pmd);
return r;
}
{
int r = -EINVAL;
- down_write(&pmd->root_lock);
+ pmd_write_lock(pmd);
if (!pmd->fail_io)
r = dm_sm_new_block(pmd->data_sm, result);
- up_write(&pmd->root_lock);
+ pmd_write_unlock(pmd);
return r;
}
{
int r = -EINVAL;
- down_write(&pmd->root_lock);
+ /*
+ * Care is taken to not have commit be what
+ * triggers putting the thin-pool in-service.
+ */
+ __pmd_write_lock(pmd);
if (pmd->fail_io)
goto out;
r = __commit_transaction(pmd);
- if (r <= 0)
+ if (r < 0)
goto out;
/*
*/
r = __begin_transaction(pmd);
out:
- up_write(&pmd->root_lock);
+ pmd_write_unlock(pmd);
return r;
}
{
int r = -EINVAL;
- down_write(&pmd->root_lock);
+ pmd_write_lock(pmd);
if (pmd->fail_io)
goto out;
pmd->fail_io = true;
out:
- up_write(&pmd->root_lock);
+ pmd_write_unlock(pmd);
return r;
}
{
int r = -EINVAL;
- down_write(&pmd->root_lock);
+ pmd_write_lock(pmd);
if (!pmd->fail_io)
r = __resize_space_map(pmd->data_sm, new_count);
- up_write(&pmd->root_lock);
+ pmd_write_unlock(pmd);
return r;
}
{
int r = -EINVAL;
- down_write(&pmd->root_lock);
+ pmd_write_lock(pmd);
if (!pmd->fail_io) {
r = __resize_space_map(pmd->metadata_sm, new_count);
if (!r)
__set_metadata_reserve(pmd);
}
- up_write(&pmd->root_lock);
+ pmd_write_unlock(pmd);
return r;
}
void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd)
{
- down_write(&pmd->root_lock);
+ pmd_write_lock_in_core(pmd);
dm_bm_set_read_only(pmd->bm);
- up_write(&pmd->root_lock);
+ pmd_write_unlock(pmd);
}
void dm_pool_metadata_read_write(struct dm_pool_metadata *pmd)
{
- down_write(&pmd->root_lock);
+ pmd_write_lock_in_core(pmd);
dm_bm_set_read_write(pmd->bm);
- up_write(&pmd->root_lock);
+ pmd_write_unlock(pmd);
}
int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
{
int r;
- down_write(&pmd->root_lock);
+ pmd_write_lock_in_core(pmd);
r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context);
- up_write(&pmd->root_lock);
+ pmd_write_unlock(pmd);
return r;
}
struct dm_block *sblock;
struct thin_disk_superblock *disk_super;
- down_write(&pmd->root_lock);
+ pmd_write_lock(pmd);
pmd->flags |= THIN_METADATA_NEEDS_CHECK_FLAG;
r = superblock_lock(pmd, &sblock);
dm_bm_unlock(sblock);
out:
- up_write(&pmd->root_lock);
+ pmd_write_unlock(pmd);
return r;
}
struct dm_writecache *wc;
struct wc_entry **wc_list;
unsigned wc_list_n;
- unsigned page_offset;
struct page *page;
struct wc_entry *wc_list_inline[WB_LIST_INLINE];
struct bio bio;
e = container_of(node, struct wc_entry, rb_node);
if (read_original_sector(wc, e) == block)
break;
+
node = (read_original_sector(wc, e) >= block ?
e->rb_node.rb_left : e->rb_node.rb_right);
if (unlikely(!node)) {
- if (!(flags & WFE_RETURN_FOLLOWING)) {
+ if (!(flags & WFE_RETURN_FOLLOWING))
return NULL;
- }
if (read_original_sector(wc, e) >= block) {
- break;
+ return e;
} else {
node = rb_next(&e->rb_node);
- if (unlikely(!node)) {
+ if (unlikely(!node))
return NULL;
- }
e = container_of(node, struct wc_entry, rb_node);
- break;
+ return e;
}
}
}
node = rb_prev(&e->rb_node);
else
node = rb_next(&e->rb_node);
- if (!node)
+ if (unlikely(!node))
return e;
e2 = container_of(node, struct wc_entry, rb_node);
if (read_original_sector(wc, e2) != block)
writecache_free_entry(wc, e);
}
- if (!node)
+ if (unlikely(!node))
break;
e = container_of(node, struct wc_entry, rb_node);
bio = bio_alloc_bioset(GFP_NOIO, max_pages, &wc->bio_set);
wb = container_of(bio, struct writeback_struct, bio);
wb->wc = wc;
- wb->bio.bi_end_io = writecache_writeback_endio;
- bio_set_dev(&wb->bio, wc->dev->bdev);
- wb->bio.bi_iter.bi_sector = read_original_sector(wc, e);
- wb->page_offset = PAGE_SIZE;
+ bio->bi_end_io = writecache_writeback_endio;
+ bio_set_dev(bio, wc->dev->bdev);
+ bio->bi_iter.bi_sector = read_original_sector(wc, e);
if (max_pages <= WB_LIST_INLINE ||
unlikely(!(wb->wc_list = kmalloc_array(max_pages, sizeof(struct wc_entry *),
GFP_NOIO | __GFP_NORETRY |
wb->wc_list[wb->wc_list_n++] = f;
e = f;
}
- bio_set_op_attrs(&wb->bio, REQ_OP_WRITE, WC_MODE_FUA(wc) * REQ_FUA);
+ bio_set_op_attrs(bio, REQ_OP_WRITE, WC_MODE_FUA(wc) * REQ_FUA);
if (writecache_has_error(wc)) {
bio->bi_status = BLK_STS_IOERR;
- bio_endio(&wb->bio);
+ bio_endio(bio);
} else {
- submit_bio(&wb->bio);
+ submit_bio(bio);
}
__writeback_throttle(wc, wbl);
goto out;
}
+ if (!nr_blkz)
+ break;
+
/* Process report */
for (i = 0; i < nr_blkz; i++) {
ret = dmz_init_zone(zmd, zone, &blkz[i]);
/* Get zone information from disk */
ret = blkdev_report_zones(zmd->dev->bdev, dmz_start_sect(zmd, zone),
&blkz, &nr_blkz, GFP_NOIO);
+ if (!nr_blkz)
+ ret = -EIO;
if (ret) {
dmz_dev_err(zmd->dev, "Get zone %u report failed",
dmz_id(zmd, zone));
q = bdev_get_queue(dev->bdev);
dev->capacity = i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
- aligned_capacity = dev->capacity & ~(blk_queue_zone_sectors(q) - 1);
+ aligned_capacity = dev->capacity &
+ ~((sector_t)blk_queue_zone_sectors(q) - 1);
if (ti->begin ||
((ti->len != dev->capacity) && (ti->len != aligned_capacity))) {
ti->error = "Partial mapping not supported";
}
static struct table_device *find_table_device(struct list_head *l, dev_t dev,
- fmode_t mode) {
+ fmode_t mode)
+{
struct table_device *td;
list_for_each_entry(td, l, list)
}
int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
- struct dm_dev **result) {
+ struct dm_dev **result)
+{
int r;
struct table_device *td;
static struct mapped_device *alloc_dev(int minor)
{
int r, numa_node_id = dm_get_numa_node();
- struct dax_device *dax_dev = NULL;
struct mapped_device *md;
void *old_md;
sprintf(md->disk->disk_name, "dm-%d", minor);
if (IS_ENABLED(CONFIG_DAX_DRIVER)) {
- dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
- if (!dax_dev)
+ md->dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
+ if (!md->dax_dev)
goto bad;
}
- md->dax_dev = dax_dev;
add_disk_no_queue_reg(md->disk);
format_dev_t(md->name, MKDEV(_major, minor));
static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm)
{
+ memset(ll, 0, sizeof(struct ll_disk));
+
ll->tm = tm;
ll->bitmap_info.tm = tm;
#define ISC_PFG_CFG0_BPS_TWELVE (0x0 << 28)
#define ISC_PFE_CFG0_BPS_MASK GENMASK(30, 28)
+#define ISC_PFE_CFG0_COLEN BIT(12)
+#define ISC_PFE_CFG0_ROWEN BIT(13)
+
+/* ISC Parallel Front End Configuration 1 Register */
+#define ISC_PFE_CFG1 0x00000010
+
+#define ISC_PFE_CFG1_COLMIN(v) ((v))
+#define ISC_PFE_CFG1_COLMIN_MASK GENMASK(15, 0)
+#define ISC_PFE_CFG1_COLMAX(v) ((v) << 16)
+#define ISC_PFE_CFG1_COLMAX_MASK GENMASK(31, 16)
+
+/* ISC Parallel Front End Configuration 2 Register */
+#define ISC_PFE_CFG2 0x00000014
+
+#define ISC_PFE_CFG2_ROWMIN(v) ((v))
+#define ISC_PFE_CFG2_ROWMIN_MASK GENMASK(15, 0)
+#define ISC_PFE_CFG2_ROWMAX(v) ((v) << 16)
+#define ISC_PFE_CFG2_ROWMAX_MASK GENMASK(31, 16)
+
/* ISC Clock Enable Register */
#define ISC_CLKEN 0x00000018
u32 sizeimage = isc->fmt.fmt.pix.sizeimage;
u32 dctrl_dview;
dma_addr_t addr0;
+ u32 h, w;
+
+ h = isc->fmt.fmt.pix.height;
+ w = isc->fmt.fmt.pix.width;
+
+ /*
+ * In case the sensor is not RAW, it will output a pixel (12-16 bits)
+ * with two samples on the ISC Data bus (which is 8-12)
+ * ISC will count each sample, so, we need to multiply these values
+ * by two, to get the real number of samples for the required pixels.
+ */
+ if (!ISC_IS_FORMAT_RAW(isc->config.sd_format->mbus_code)) {
+ h <<= 1;
+ w <<= 1;
+ }
+
+ /*
+ * We limit the column/row count that the ISC will output according
+ * to the configured resolution that we want.
+ * This will avoid the situation where the sensor is misconfigured,
+ * sending more data, and the ISC will just take it and DMA to memory,
+ * causing corruption.
+ */
+ regmap_write(regmap, ISC_PFE_CFG1,
+ (ISC_PFE_CFG1_COLMIN(0) & ISC_PFE_CFG1_COLMIN_MASK) |
+ (ISC_PFE_CFG1_COLMAX(w - 1) & ISC_PFE_CFG1_COLMAX_MASK));
+
+ regmap_write(regmap, ISC_PFE_CFG2,
+ (ISC_PFE_CFG2_ROWMIN(0) & ISC_PFE_CFG2_ROWMIN_MASK) |
+ (ISC_PFE_CFG2_ROWMAX(h - 1) & ISC_PFE_CFG2_ROWMAX_MASK));
+
+ regmap_update_bits(regmap, ISC_PFE_CFG0,
+ ISC_PFE_CFG0_COLEN | ISC_PFE_CFG0_ROWEN,
+ ISC_PFE_CFG0_COLEN | ISC_PFE_CFG0_ROWEN);
addr0 = vb2_dma_contig_plane_dma_addr(&isc->cur_frm->vb.vb2_buf, 0);
regmap_write(regmap, ISC_DAD0, addr0);
struct vb2_queue *q = &isc->vb2_vidq;
int ret;
+ INIT_WORK(&isc->awb_work, isc_awb_work);
+
ret = v4l2_device_register_subdev_nodes(&isc->v4l2_dev);
if (ret < 0) {
v4l2_err(&isc->v4l2_dev, "Failed to register subdev nodes\n");
return ret;
}
- INIT_WORK(&isc->awb_work, isc_awb_work);
-
/* Register video device */
strscpy(vdev->name, ATMEL_ISC_NAME, sizeof(vdev->name));
vdev->release = video_device_release_empty;
break;
}
- subdev_entity->asd = devm_kzalloc(dev,
- sizeof(*subdev_entity->asd), GFP_KERNEL);
+ /* asd will be freed by the subsystem once it's added to the
+ * notifier list
+ */
+ subdev_entity->asd = kzalloc(sizeof(*subdev_entity->asd),
+ GFP_KERNEL);
if (!subdev_entity->asd) {
of_node_put(rem);
ret = -ENOMEM;
subdev_entity->asd);
if (ret) {
fwnode_handle_put(subdev_entity->asd->match.fwnode);
+ kfree(subdev_entity->asd);
goto cleanup_subdev;
}
static int coda_buf_prepare(struct vb2_buffer *vb)
{
+ struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
struct coda_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
struct coda_q_data *q_data;
q_data = get_q_data(ctx, vb->vb2_queue->type);
+ if (V4L2_TYPE_IS_OUTPUT(vb->vb2_queue->type)) {
+ if (vbuf->field == V4L2_FIELD_ANY)
+ vbuf->field = V4L2_FIELD_NONE;
+ if (vbuf->field != V4L2_FIELD_NONE) {
+ v4l2_warn(&ctx->dev->v4l2_dev,
+ "%s field isn't supported\n", __func__);
+ return -EINVAL;
+ }
+ }
if (vb2_plane_size(vb, 0) < q_data->sizeimage) {
v4l2_warn(&ctx->dev->v4l2_dev,
struct v4l2_output *output)
{
struct vpbe_config *cfg = vpbe_dev->cfg;
- int temp_index = output->index;
+ unsigned int temp_index = output->index;
if (temp_index >= cfg->num_outputs)
return -EINVAL;
unsigned long size;
struct videobuf_buffer *vb;
- vb = q->bufs[b->index];
-
if (!vout->streaming)
return -EINVAL;
- if (file->f_flags & O_NONBLOCK)
- /* Call videobuf_dqbuf for non blocking mode */
- ret = videobuf_dqbuf(q, (struct v4l2_buffer *)b, 1);
- else
- /* Call videobuf_dqbuf for blocking mode */
- ret = videobuf_dqbuf(q, (struct v4l2_buffer *)b, 0);
+ ret = videobuf_dqbuf(q, b, !!(file->f_flags & O_NONBLOCK));
+ if (ret)
+ return ret;
+
+ vb = q->bufs[b->index];
addr = (unsigned long) vout->buf_phy_addr[vb->i];
size = (unsigned long) vb->size;
dma_unmap_single(vout->vid_dev->v4l2_dev.dev, addr,
size, DMA_TO_DEVICE);
- return ret;
+ return 0;
}
static int vidioc_streamon(struct file *file, void *fh, enum v4l2_buf_type i)
/* Field Detection Control */
#define FLD_REG 0x1c
#define FLD_FLD_NUM(n) (((n) & 0xff) << 16)
+#define FLD_DET_SEL(n) (((n) & 0x3) << 4)
#define FLD_FLD_EN4 BIT(3)
#define FLD_FLD_EN3 BIT(2)
#define FLD_FLD_EN2 BIT(1)
/* Interrupt Enable */
#define INTEN_REG 0x30
+#define INTEN_INT_AFIFO_OF BIT(27)
+#define INTEN_INT_ERRSOTHS BIT(4)
+#define INTEN_INT_ERRSOTSYNCHS BIT(3)
/* Interrupt Source Mask */
#define INTCLOSE_REG 0x34
static int rcsi2_start_receiver(struct rcar_csi2 *priv)
{
const struct rcar_csi2_format *format;
- u32 phycnt, vcdt = 0, vcdt2 = 0;
+ u32 phycnt, vcdt = 0, vcdt2 = 0, fld = 0;
unsigned int i;
int mbps, ret;
vcdt2 |= vcdt_part << ((i % 2) * 16);
}
+ if (priv->mf.field == V4L2_FIELD_ALTERNATE) {
+ fld = FLD_DET_SEL(1) | FLD_FLD_EN4 | FLD_FLD_EN3 | FLD_FLD_EN2
+ | FLD_FLD_EN;
+
+ if (priv->mf.height == 240)
+ fld |= FLD_FLD_NUM(0);
+ else
+ fld |= FLD_FLD_NUM(1);
+ }
+
phycnt = PHYCNT_ENABLECLK;
phycnt |= (1 << priv->lanes) - 1;
if (mbps < 0)
return mbps;
+ /* Enable interrupts. */
+ rcsi2_write(priv, INTEN_REG, INTEN_INT_AFIFO_OF | INTEN_INT_ERRSOTHS
+ | INTEN_INT_ERRSOTSYNCHS);
+
/* Init */
rcsi2_write(priv, TREF_REG, TREF_TREF);
rcsi2_write(priv, PHTC_REG, 0);
rcsi2_write(priv, PHYCNT_REG, phycnt);
rcsi2_write(priv, LINKCNT_REG, LINKCNT_MONITOR_EN |
LINKCNT_REG_MONI_PACT_EN | LINKCNT_ICLK_NONSTOP);
- rcsi2_write(priv, FLD_REG, FLD_FLD_NUM(2) | FLD_FLD_EN4 |
- FLD_FLD_EN3 | FLD_FLD_EN2 | FLD_FLD_EN);
+ rcsi2_write(priv, FLD_REG, fld);
rcsi2_write(priv, PHYCNT_REG, phycnt | PHYCNT_SHUTDOWNZ);
rcsi2_write(priv, PHYCNT_REG, phycnt | PHYCNT_SHUTDOWNZ | PHYCNT_RSTZ);
.pad = &rcar_csi2_pad_ops,
};
+static irqreturn_t rcsi2_irq(int irq, void *data)
+{
+ struct rcar_csi2 *priv = data;
+ u32 status, err_status;
+
+ status = rcsi2_read(priv, INTSTATE_REG);
+ err_status = rcsi2_read(priv, INTERRSTATE_REG);
+
+ if (!status)
+ return IRQ_HANDLED;
+
+ rcsi2_write(priv, INTSTATE_REG, status);
+
+ if (!err_status)
+ return IRQ_HANDLED;
+
+ rcsi2_write(priv, INTERRSTATE_REG, err_status);
+
+ dev_info(priv->dev, "Transfer error, restarting CSI-2 receiver\n");
+
+ return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t rcsi2_irq_thread(int irq, void *data)
+{
+ struct rcar_csi2 *priv = data;
+
+ mutex_lock(&priv->lock);
+ rcsi2_stop(priv);
+ usleep_range(1000, 2000);
+ if (rcsi2_start(priv))
+ dev_warn(priv->dev, "Failed to restart CSI-2 receiver\n");
+ mutex_unlock(&priv->lock);
+
+ return IRQ_HANDLED;
+}
+
/* -----------------------------------------------------------------------------
* Async handling and registration of subdevices and links.
*/
struct platform_device *pdev)
{
struct resource *res;
- int irq;
+ int irq, ret;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
priv->base = devm_ioremap_resource(&pdev->dev, res);
if (irq < 0)
return irq;
+ ret = devm_request_threaded_irq(&pdev->dev, irq, rcsi2_irq,
+ rcsi2_irq_thread, IRQF_SHARED,
+ KBUILD_MODNAME, priv);
+ if (ret)
+ return ret;
+
priv->rstc = devm_reset_control_get(&pdev->dev, NULL);
if (IS_ERR(priv->rstc))
return PTR_ERR(priv->rstc);
hdmi_dev = cec_notifier_parse_hdmi_phandle(&pdev->dev);
- if (!hdmi_dev)
- return -ENODEV;
+ if (IS_ERR(hdmi_dev))
+ return PTR_ERR(hdmi_dev);
cec = devm_kzalloc(&pdev->dev, sizeof(struct tegra_cec), GFP_KERNEL);
config MLXSW_CORE_THERMAL
bool "Thermal zone support for Mellanox Technologies Switch ASICs"
depends on MLXSW_CORE && THERMAL
- depends on !(MLXSW_CORE=y && THERMAL=m)
default y
---help---
Say Y here if you want to automatically control fans speed according
if (ret)
return ret;
- ret = imx_media_capture_device_register(priv->vdev);
+ ret = imx_media_capture_device_register(priv->md, priv->vdev);
if (ret)
return ret;
}
EXPORT_SYMBOL_GPL(imx_media_capture_device_error);
-int imx_media_capture_device_register(struct imx_media_video_dev *vdev)
+int imx_media_capture_device_register(struct imx_media_dev *md,
+ struct imx_media_video_dev *vdev)
{
struct capture_priv *priv = to_capture_priv(vdev);
struct v4l2_subdev *sd = priv->src_sd;
struct v4l2_subdev_format fmt_src;
int ret;
- /* get media device */
- priv->md = dev_get_drvdata(sd->v4l2_dev->dev);
+ priv->md = md;
vfd->v4l2_dev = sd->v4l2_dev;
if (ret)
goto free_fim;
- ret = imx_media_capture_device_register(priv->vdev);
+ ret = imx_media_capture_device_register(priv->md, priv->vdev);
if (ret)
goto free_fim;
struct imx_media_video_dev *
imx_media_capture_device_init(struct v4l2_subdev *src_sd, int pad);
void imx_media_capture_device_remove(struct imx_media_video_dev *vdev);
-int imx_media_capture_device_register(struct imx_media_video_dev *vdev);
+int imx_media_capture_device_register(struct imx_media_dev *md,
+ struct imx_media_video_dev *vdev);
void imx_media_capture_device_unregister(struct imx_media_video_dev *vdev);
struct imx_media_buffer *
imx_media_capture_device_next_buf(struct imx_media_video_dev *vdev);
if (ret < 0)
return ret;
- ret = imx_media_capture_device_register(csi->vdev);
+ ret = imx_media_capture_device_register(csi->imxmd, csi->vdev);
if (ret < 0)
return ret;
vpu->vfd_enc = vfd;
video_set_drvdata(vfd, vpu);
- ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0);
+ ret = video_register_device(vfd, VFL_TYPE_GRABBER, -1);
if (ret) {
v4l2_err(&vpu->v4l2_dev, "Failed to register video device\n");
goto err_free_dev;
vpu->mdev.dev = vpu->dev;
strscpy(vpu->mdev.model, DRIVER_NAME, sizeof(vpu->mdev.model));
+ strscpy(vpu->mdev.bus_info, "platform: " DRIVER_NAME,
+ sizeof(vpu->mdev.model));
media_device_init(&vpu->mdev);
vpu->v4l2_dev.mdev = &vpu->mdev;
return 0;
err_video_dev_unreg:
if (vpu->vfd_enc) {
+ v4l2_m2m_unregister_media_controller(vpu->m2m_dev);
video_unregister_device(vpu->vfd_enc);
video_device_release(vpu->vfd_enc);
}
err_m2m_rel:
+ media_device_cleanup(&vpu->mdev);
v4l2_m2m_release(vpu->m2m_dev);
err_v4l2_unreg:
v4l2_device_unregister(&vpu->v4l2_dev);
err_clk_unprepare:
clk_bulk_unprepare(vpu->variant->num_clocks, vpu->clocks);
+ pm_runtime_dont_use_autosuspend(vpu->dev);
pm_runtime_disable(vpu->dev);
return ret;
}
v4l2_info(&vpu->v4l2_dev, "Removing %s\n", pdev->name);
media_device_unregister(&vpu->mdev);
- v4l2_m2m_unregister_media_controller(vpu->m2m_dev);
- v4l2_m2m_release(vpu->m2m_dev);
- media_device_cleanup(&vpu->mdev);
if (vpu->vfd_enc) {
+ v4l2_m2m_unregister_media_controller(vpu->m2m_dev);
video_unregister_device(vpu->vfd_enc);
video_device_release(vpu->vfd_enc);
}
+ media_device_cleanup(&vpu->mdev);
+ v4l2_m2m_release(vpu->m2m_dev);
v4l2_device_unregister(&vpu->v4l2_dev);
clk_bulk_unprepare(vpu->variant->num_clocks, vpu->clocks);
+ pm_runtime_dont_use_autosuspend(vpu->dev);
pm_runtime_disable(vpu->dev);
return 0;
}
struct v4l2_capability *cap)
{
struct rockchip_vpu_dev *vpu = video_drvdata(file);
+ struct video_device *vdev = video_devdata(file);
strscpy(cap->driver, vpu->dev->driver->name, sizeof(cap->driver));
- strscpy(cap->card, vpu->vfd_enc->name, sizeof(cap->card));
+ strscpy(cap->card, vdev->name, sizeof(cap->card));
snprintf(cap->bus_info, sizeof(cap->bus_info), "platform: %s",
vpu->dev->driver->name);
return 0;
#
menuconfig THERMAL
- tristate "Generic Thermal sysfs driver"
+ bool "Generic Thermal sysfs driver"
help
Generic Thermal Sysfs driver offers a generic mechanism for
thermal management. Usually it's made up of one or more thermal
Each thermal zone contains its own temperature, trip points,
cooling devices.
All platforms with ACPI thermal support can use this driver.
- If you want this support, you should say Y or M here.
+ If you want this support, you should say Y here.
if THERMAL
config THERMAL_EMERGENCY_POWEROFF_DELAY_MS
int "Emergency poweroff delay in milli-seconds"
- depends on THERMAL
default 0
help
Thermal subsystem will issue a graceful shutdown when
allocating and limiting power to devices.
config CPU_THERMAL
- bool "generic cpu cooling support"
+ bool "Generic cpu cooling support"
depends on CPU_FREQ
depends on THERMAL_OF
- depends on THERMAL=y
help
This implements the generic cpu cooling mechanism through frequency
reduction. An ACPI version of this already exists
config INTEL_POWERCLAMP
tristate "Intel PowerClamp idle injection driver"
- depends on THERMAL
depends on X86
depends on CPU_SUP_INTEL
help
{
struct int3403_priv *priv;
int result = 0;
+ unsigned long long tmp;
acpi_status status;
priv = devm_kzalloc(&pdev->dev, sizeof(struct int3403_priv),
goto err;
}
- status = acpi_evaluate_integer(priv->adev->handle, "PTYP",
- NULL, &priv->type);
- if (ACPI_FAILURE(status)) {
- unsigned long long tmp;
- status = acpi_evaluate_integer(priv->adev->handle, "_TMP",
- NULL, &tmp);
+ status = acpi_evaluate_integer(priv->adev->handle, "_TMP",
+ NULL, &tmp);
+ if (ACPI_FAILURE(status)) {
+ status = acpi_evaluate_integer(priv->adev->handle, "PTYP",
+ NULL, &priv->type);
if (ACPI_FAILURE(status)) {
result = -EINVAL;
goto err;
- } else {
- priv->type = INT3403_TYPE_SENSOR;
}
+ } else {
+ priv->type = INT3403_TYPE_SENSOR;
}
platform_set_drvdata(pdev, priv);
struct device_attribute *attr, \
char *buf) \
{ \
- struct pci_dev *pci_dev; \
- struct platform_device *pdev; \
- struct proc_thermal_device *proc_dev; \
+ struct proc_thermal_device *proc_dev = dev_get_drvdata(dev); \
\
if (proc_thermal_emum_mode == PROC_THERMAL_NONE) { \
dev_warn(dev, "Attempted to get power limit before device was initialized!\n"); \
return 0; \
} \
\
- if (proc_thermal_emum_mode == PROC_THERMAL_PLATFORM_DEV) { \
- pdev = to_platform_device(dev); \
- proc_dev = platform_get_drvdata(pdev); \
- } else { \
- pci_dev = to_pci_dev(dev); \
- proc_dev = pci_get_drvdata(pci_dev); \
- } \
return sprintf(buf, "%lu\n",\
(unsigned long)proc_dev->power_limits[index].suffix * 1000); \
}
THERMAL_DEVICE_POWER_CAPABILITY_CHANGED);
break;
default:
- dev_err(proc_priv->dev, "Unsupported event [0x%x]\n", event);
+ dev_dbg(proc_priv->dev, "Unsupported event [0x%x]\n", event);
break;
}
}
config QCOM_TSENS
tristate "Qualcomm TSENS Temperature Alarm"
- depends on THERMAL
depends on QCOM_QFPROM
depends on ARCH_QCOM || COMPILE_TEST
help
return thermal_gov_power_allocator_register();
}
-static void thermal_unregister_governors(void)
+static void __init thermal_unregister_governors(void)
{
thermal_gov_step_wise_unregister();
thermal_gov_fair_share_unregister();
*/
static struct thermal_cooling_device *
__thermal_cooling_device_register(struct device_node *np,
- char *type, void *devdata,
+ const char *type, void *devdata,
const struct thermal_cooling_device_ops *ops)
{
struct thermal_cooling_device *cdev;
* ERR_PTR. Caller must check return value with IS_ERR*() helpers.
*/
struct thermal_cooling_device *
-thermal_cooling_device_register(char *type, void *devdata,
+thermal_cooling_device_register(const char *type, void *devdata,
const struct thermal_cooling_device_ops *ops)
{
return __thermal_cooling_device_register(NULL, type, devdata, ops);
*/
struct thermal_cooling_device *
thermal_of_cooling_device_register(struct device_node *np,
- char *type, void *devdata,
+ const char *type, void *devdata,
const struct thermal_cooling_device_ops *ops)
{
return __thermal_cooling_device_register(np, type, devdata, ops);
unsigned long mode, void *_unused)
{
struct thermal_zone_device *tz;
+ enum thermal_device_mode tz_mode;
switch (mode) {
case PM_HIBERNATION_PREPARE:
case PM_POST_SUSPEND:
atomic_set(&in_suspend, 0);
list_for_each_entry(tz, &thermal_tz_list, node) {
+ tz_mode = THERMAL_DEVICE_ENABLED;
+ if (tz->ops->get_mode)
+ tz->ops->get_mode(tz, &tz_mode);
+
+ if (tz_mode == THERMAL_DEVICE_DISABLED)
+ continue;
+
thermal_zone_device_init(tz);
thermal_zone_device_update(tz,
THERMAL_EVENT_UNSPECIFIED);
mutex_destroy(&poweroff_lock);
return result;
}
-
-static void __exit thermal_exit(void)
-{
- unregister_pm_notifier(&thermal_pm_nb);
- of_thermal_destroy_zones();
- genetlink_exit();
- class_unregister(&thermal_class);
- thermal_unregister_governors();
- ida_destroy(&thermal_tz_ida);
- ida_destroy(&thermal_cdev_ida);
- mutex_destroy(&thermal_list_lock);
- mutex_destroy(&thermal_governor_lock);
-}
-
fs_initcall(thermal_init);
-module_exit(thermal_exit);
_enter("%s", cell->name);
ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1",
- &result, _expiry);
+ &result, _expiry, true);
if (ret < 0) {
_leave(" = %d [dns]", ret);
return ERR_PTR(ret);
#define AFSPATHMAX 1024 /* Maximum length of a pathname plus NUL */
#define AFSOPAQUEMAX 1024 /* Maximum length of an opaque field */
+#define AFS_VL_MAX_LIFESPAN (120 * HZ)
+#define AFS_PROBE_MAX_LIFESPAN (30 * HZ)
+
typedef u64 afs_volid_t;
typedef u64 afs_vnodeid_t;
typedef u64 afs_dataversion_t;
struct afs_callback {
time64_t expires_at; /* Time at which expires */
- unsigned version; /* Callback version */
- afs_callback_type_t type; /* Type of callback */
+ //unsigned version; /* Callback version */
+ //afs_callback_type_t type; /* Type of callback */
};
struct afs_callback_break {
u32 abort_code; /* Abort if bulk-fetching this failed */
};
+struct afs_status_cb {
+ struct afs_file_status status;
+ struct afs_callback callback;
+ unsigned int cb_break; /* Pre-op callback break counter */
+ bool have_status; /* True if status record was retrieved */
+ bool have_cb; /* True if cb record was retrieved */
+ bool have_error; /* True if status.abort_code indicates an error */
+};
+
/*
* AFS file status change request
*/
struct afs_server *server = entry->server;
again:
- if (vnode->cb_interest &&
- likely(vnode->cb_interest == entry->cb_interest))
+ vcbi = rcu_dereference_protected(vnode->cb_interest,
+ lockdep_is_held(&vnode->io_lock));
+ if (vcbi && likely(vcbi == entry->cb_interest))
return 0;
read_lock(&slist->lock);
cbi = afs_get_cb_interest(entry->cb_interest);
read_unlock(&slist->lock);
- vcbi = vnode->cb_interest;
if (vcbi) {
if (vcbi == cbi) {
afs_put_cb_interest(afs_v2net(vnode), cbi);
*/
if (cbi && vcbi->server == cbi->server) {
write_seqlock(&vnode->cb_lock);
- old = vnode->cb_interest;
- vnode->cb_interest = cbi;
+ old = rcu_dereference_protected(vnode->cb_interest,
+ lockdep_is_held(&vnode->cb_lock.lock));
+ rcu_assign_pointer(vnode->cb_interest, cbi);
write_sequnlock(&vnode->cb_lock);
afs_put_cb_interest(afs_v2net(vnode), old);
return 0;
*/
write_seqlock(&vnode->cb_lock);
- old = vnode->cb_interest;
- vnode->cb_interest = cbi;
+ old = rcu_dereference_protected(vnode->cb_interest,
+ lockdep_is_held(&vnode->cb_lock.lock));
+ rcu_assign_pointer(vnode->cb_interest, cbi);
vnode->cb_s_break = cbi->server->cb_s_break;
vnode->cb_v_break = vnode->volume->cb_v_break;
clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
vi = NULL;
write_unlock(&cbi->server->cb_break_lock);
- kfree(vi);
+ if (vi)
+ kfree_rcu(vi, rcu);
afs_put_server(net, cbi->server);
}
- kfree(cbi);
+ kfree_rcu(cbi, rcu);
}
}
vnode->cb_break++;
afs_clear_permits(vnode);
- spin_lock(&vnode->lock);
-
- _debug("break callback");
-
- if (list_empty(&vnode->granted_locks) &&
- !list_empty(&vnode->pending_locks))
+ if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
afs_lock_may_be_available(vnode);
- spin_unlock(&vnode->lock);
}
}
const char *name, unsigned int namelen,
const char *addresses)
{
+ struct afs_vlserver_list *vllist;
struct afs_cell *cell;
int i, ret;
atomic_set(&cell->usage, 2);
INIT_WORK(&cell->manager, afs_manage_cell);
- cell->flags = ((1 << AFS_CELL_FL_NOT_READY) |
- (1 << AFS_CELL_FL_NO_LOOKUP_YET));
INIT_LIST_HEAD(&cell->proc_volumes);
rwlock_init(&cell->proc_lock);
rwlock_init(&cell->vl_servers_lock);
- /* Fill in the VL server list if we were given a list of addresses to
- * use.
+ /* Provide a VL server list, filling it in if we were given a list of
+ * addresses to use.
*/
if (addresses) {
- struct afs_vlserver_list *vllist;
-
vllist = afs_parse_text_addrs(net,
addresses, strlen(addresses), ':',
VL_SERVICE, AFS_VL_PORT);
goto parse_failed;
}
- rcu_assign_pointer(cell->vl_servers, vllist);
+ vllist->source = DNS_RECORD_FROM_CONFIG;
+ vllist->status = DNS_LOOKUP_NOT_DONE;
cell->dns_expiry = TIME64_MAX;
- __clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags);
} else {
+ ret = -ENOMEM;
+ vllist = afs_alloc_vlserver_list(0);
+ if (!vllist)
+ goto error;
+ vllist->source = DNS_RECORD_UNAVAILABLE;
+ vllist->status = DNS_LOOKUP_NOT_DONE;
cell->dns_expiry = ktime_get_real_seconds();
}
+ rcu_assign_pointer(cell->vl_servers, vllist);
+
+ cell->dns_source = vllist->source;
+ cell->dns_status = vllist->status;
+ smp_store_release(&cell->dns_lookup_count, 1); /* vs source/status */
+
_leave(" = %p", cell);
return cell;
parse_failed:
if (ret == -EINVAL)
printk(KERN_ERR "kAFS: bad VL server IP address\n");
+error:
kfree(cell);
_leave(" = %d", ret);
return ERR_PTR(ret);
{
struct afs_cell *cell, *candidate, *cursor;
struct rb_node *parent, **pp;
+ enum afs_cell_state state;
int ret, n;
_enter("%s,%s", name, vllist);
wait_for_cell:
_debug("wait_for_cell");
- ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NOT_READY, TASK_INTERRUPTIBLE);
- smp_rmb();
-
- switch (READ_ONCE(cell->state)) {
- case AFS_CELL_FAILED:
+ wait_var_event(&cell->state,
+ ({
+ state = smp_load_acquire(&cell->state); /* vs error */
+ state == AFS_CELL_ACTIVE || state == AFS_CELL_FAILED;
+ }));
+
+ /* Check the state obtained from the wait check. */
+ if (state == AFS_CELL_FAILED) {
ret = cell->error;
goto error;
- default:
- _debug("weird %u %d", cell->state, cell->error);
- goto error;
- case AFS_CELL_ACTIVE:
- break;
}
_leave(" = %p [cell]", cell);
/*
* Update a cell's VL server address list from the DNS.
*/
-static void afs_update_cell(struct afs_cell *cell)
+static int afs_update_cell(struct afs_cell *cell)
{
- struct afs_vlserver_list *vllist, *old;
+ struct afs_vlserver_list *vllist, *old = NULL, *p;
unsigned int min_ttl = READ_ONCE(afs_cell_min_ttl);
unsigned int max_ttl = READ_ONCE(afs_cell_max_ttl);
time64_t now, expiry = 0;
+ int ret = 0;
_enter("%s", cell->name);
vllist = afs_dns_query(cell, &expiry);
+ if (IS_ERR(vllist)) {
+ ret = PTR_ERR(vllist);
+
+ _debug("%s: fail %d", cell->name, ret);
+ if (ret == -ENOMEM)
+ goto out_wake;
+
+ ret = -ENOMEM;
+ vllist = afs_alloc_vlserver_list(0);
+ if (!vllist)
+ goto out_wake;
+
+ switch (ret) {
+ case -ENODATA:
+ case -EDESTADDRREQ:
+ vllist->status = DNS_LOOKUP_GOT_NOT_FOUND;
+ break;
+ case -EAGAIN:
+ case -ECONNREFUSED:
+ vllist->status = DNS_LOOKUP_GOT_TEMP_FAILURE;
+ break;
+ default:
+ vllist->status = DNS_LOOKUP_GOT_LOCAL_FAILURE;
+ break;
+ }
+ }
+
+ _debug("%s: got list %d %d", cell->name, vllist->source, vllist->status);
+ cell->dns_status = vllist->status;
now = ktime_get_real_seconds();
if (min_ttl > max_ttl)
else if (expiry > now + max_ttl)
expiry = now + max_ttl;
- if (IS_ERR(vllist)) {
- switch (PTR_ERR(vllist)) {
- case -ENODATA:
- case -EDESTADDRREQ:
+ _debug("%s: status %d", cell->name, vllist->status);
+ if (vllist->source == DNS_RECORD_UNAVAILABLE) {
+ switch (vllist->status) {
+ case DNS_LOOKUP_GOT_NOT_FOUND:
/* The DNS said that the cell does not exist or there
* weren't any addresses to be had.
*/
- set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
- clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
cell->dns_expiry = expiry;
break;
- case -EAGAIN:
- case -ECONNREFUSED:
+ case DNS_LOOKUP_BAD:
+ case DNS_LOOKUP_GOT_LOCAL_FAILURE:
+ case DNS_LOOKUP_GOT_TEMP_FAILURE:
+ case DNS_LOOKUP_GOT_NS_FAILURE:
default:
- set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
cell->dns_expiry = now + 10;
break;
}
-
- cell->error = -EDESTADDRREQ;
} else {
- clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
- clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
-
- /* Exclusion on changing vl_addrs is achieved by a
- * non-reentrant work item.
- */
- old = rcu_dereference_protected(cell->vl_servers, true);
- rcu_assign_pointer(cell->vl_servers, vllist);
cell->dns_expiry = expiry;
-
- if (old)
- afs_put_vlserverlist(cell->net, old);
}
- if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags))
- wake_up_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET);
+ /* Replace the VL server list if the new record has servers or the old
+ * record doesn't.
+ */
+ write_lock(&cell->vl_servers_lock);
+ p = rcu_dereference_protected(cell->vl_servers, true);
+ if (vllist->nr_servers > 0 || p->nr_servers == 0) {
+ rcu_assign_pointer(cell->vl_servers, vllist);
+ cell->dns_source = vllist->source;
+ old = p;
+ }
+ write_unlock(&cell->vl_servers_lock);
+ afs_put_vlserverlist(cell->net, old);
- now = ktime_get_real_seconds();
- afs_set_cell_timer(cell->net, cell->dns_expiry - now);
- _leave("");
+out_wake:
+ smp_store_release(&cell->dns_lookup_count,
+ cell->dns_lookup_count + 1); /* vs source/status */
+ wake_up_var(&cell->dns_lookup_count);
+ _leave(" = %d", ret);
+ return ret;
}
/*
now = ktime_get_real_seconds();
cell->last_inactive = now;
expire_delay = 0;
- if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) &&
- !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags))
+ if (cell->vl_servers->nr_servers)
expire_delay = afs_cell_gc_delay;
if (atomic_dec_return(&cell->usage) > 1)
goto final_destruction;
if (cell->state == AFS_CELL_FAILED)
goto done;
- cell->state = AFS_CELL_UNSET;
+ smp_store_release(&cell->state, AFS_CELL_UNSET);
+ wake_up_var(&cell->state);
goto again;
case AFS_CELL_UNSET:
- cell->state = AFS_CELL_ACTIVATING;
+ smp_store_release(&cell->state, AFS_CELL_ACTIVATING);
+ wake_up_var(&cell->state);
goto again;
case AFS_CELL_ACTIVATING:
if (ret < 0)
goto activation_failed;
- cell->state = AFS_CELL_ACTIVE;
- smp_wmb();
- clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags);
- wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY);
+ smp_store_release(&cell->state, AFS_CELL_ACTIVE);
+ wake_up_var(&cell->state);
goto again;
case AFS_CELL_ACTIVE:
if (atomic_read(&cell->usage) > 1) {
- time64_t now = ktime_get_real_seconds();
- if (cell->dns_expiry <= now && net->live)
- afs_update_cell(cell);
+ if (test_and_clear_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) {
+ ret = afs_update_cell(cell);
+ if (ret < 0)
+ cell->error = ret;
+ }
goto done;
}
- cell->state = AFS_CELL_DEACTIVATING;
+ smp_store_release(&cell->state, AFS_CELL_DEACTIVATING);
+ wake_up_var(&cell->state);
goto again;
case AFS_CELL_DEACTIVATING:
- set_bit(AFS_CELL_FL_NOT_READY, &cell->flags);
if (atomic_read(&cell->usage) > 1)
goto reverse_deactivation;
afs_deactivate_cell(net, cell);
- cell->state = AFS_CELL_INACTIVE;
+ smp_store_release(&cell->state, AFS_CELL_INACTIVE);
+ wake_up_var(&cell->state);
goto again;
default:
cell->error = ret;
afs_deactivate_cell(net, cell);
- cell->state = AFS_CELL_FAILED;
- smp_wmb();
- if (test_and_clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags))
- wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY);
+ smp_store_release(&cell->state, AFS_CELL_FAILED); /* vs error */
+ wake_up_var(&cell->state);
goto again;
reverse_deactivation:
- cell->state = AFS_CELL_ACTIVE;
- smp_wmb();
- clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags);
- wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY);
+ smp_store_release(&cell->state, AFS_CELL_ACTIVE);
+ wake_up_var(&cell->state);
_leave(" [deact->act]");
return;
}
if (usage == 1) {
+ struct afs_vlserver_list *vllist;
time64_t expire_at = cell->last_inactive;
- if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) &&
- !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags))
+ read_lock(&cell->vl_servers_lock);
+ vllist = rcu_dereference_protected(
+ cell->vl_servers,
+ lockdep_is_held(&cell->vl_servers_lock));
+ if (vllist->nr_servers > 0)
expire_at += afs_cell_gc_delay;
+ read_unlock(&cell->vl_servers_lock);
if (purging || expire_at <= now)
sched_cell = true;
else if (expire_at < next_manage)
}
if (!purging) {
- if (cell->dns_expiry <= now)
+ if (test_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags))
sched_cell = true;
- else if (cell->dns_expiry <= next_manage)
- next_manage = cell->dns_expiry;
}
if (sched_cell)
return 0;
}
- call->cm_server = server;
+ call->server = server;
return afs_record_cm_probe(call, server);
}
return 0;
}
- call->cm_server = server;
+ call->server = server;
return afs_record_cm_probe(call, server);
}
* server holds up change visibility till it receives our reply so as
* to maintain cache coherency.
*/
- if (call->cm_server)
- afs_break_callbacks(call->cm_server, call->count, call->request);
+ if (call->server)
+ afs_break_callbacks(call->server, call->count, call->request);
afs_send_empty_reply(call);
afs_put_call(call);
{
struct afs_call *call = container_of(work, struct afs_call, work);
- _enter("{%p}", call->cm_server);
+ _enter("{%p}", call->server);
- if (call->cm_server)
- afs_init_callback_state(call->cm_server);
+ if (call->server)
+ afs_init_callback_state(call->server);
afs_send_empty_reply(call);
afs_put_call(call);
_leave("");
#include <linux/sched.h>
#include <linux/task_io_accounting_ops.h>
#include "internal.h"
+#include "afs_fs.h"
#include "xdr_fs.h"
static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
bool found;
bool one_only;
unsigned short nr_fids;
- struct afs_file_status *statuses;
- struct afs_callback *callbacks;
+ struct inode **inodes;
+ struct afs_status_cb *statuses;
struct afs_fid fids[50];
};
struct key *key)
{
struct afs_lookup_cookie *cookie;
- struct afs_cb_interest *cbi = NULL;
+ struct afs_cb_interest *dcbi, *cbi = NULL;
struct afs_super_info *as = dir->i_sb->s_fs_info;
- struct afs_iget_data data;
+ struct afs_status_cb *scb;
+ struct afs_iget_data iget_data;
struct afs_fs_cursor fc;
- struct afs_vnode *dvnode = AFS_FS_I(dir);
- struct inode *inode = NULL;
+ struct afs_server *server;
+ struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
+ struct inode *inode = NULL, *ti;
int ret, i;
_enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
cookie->nr_fids = 1; /* slot 0 is saved for the fid we actually want */
read_seqlock_excl(&dvnode->cb_lock);
- if (dvnode->cb_interest &&
- dvnode->cb_interest->server &&
- test_bit(AFS_SERVER_FL_NO_IBULK, &dvnode->cb_interest->server->flags))
- cookie->one_only = true;
+ dcbi = rcu_dereference_protected(dvnode->cb_interest,
+ lockdep_is_held(&dvnode->cb_lock.lock));
+ if (dcbi) {
+ server = dcbi->server;
+ if (server &&
+ test_bit(AFS_SERVER_FL_NO_IBULK, &server->flags))
+ cookie->one_only = true;
+ }
read_sequnlock_excl(&dvnode->cb_lock);
for (i = 0; i < 50; i++)
goto out;
/* Check to see if we already have an inode for the primary fid. */
- data.volume = dvnode->volume;
- data.fid = cookie->fids[0];
- inode = ilookup5(dir->i_sb, cookie->fids[0].vnode, afs_iget5_test, &data);
+ iget_data.fid = cookie->fids[0];
+ iget_data.volume = dvnode->volume;
+ iget_data.cb_v_break = dvnode->volume->cb_v_break;
+ iget_data.cb_s_break = 0;
+ inode = ilookup5(dir->i_sb, cookie->fids[0].vnode,
+ afs_iget5_test, &iget_data);
if (inode)
goto out;
/* Need space for examining all the selected files */
inode = ERR_PTR(-ENOMEM);
- cookie->statuses = kcalloc(cookie->nr_fids, sizeof(struct afs_file_status),
- GFP_KERNEL);
+ cookie->statuses = kvcalloc(cookie->nr_fids, sizeof(struct afs_status_cb),
+ GFP_KERNEL);
if (!cookie->statuses)
goto out;
- cookie->callbacks = kcalloc(cookie->nr_fids, sizeof(struct afs_callback),
- GFP_KERNEL);
- if (!cookie->callbacks)
+ cookie->inodes = kcalloc(cookie->nr_fids, sizeof(struct inode *),
+ GFP_KERNEL);
+ if (!cookie->inodes)
goto out_s;
+ for (i = 1; i < cookie->nr_fids; i++) {
+ scb = &cookie->statuses[i];
+
+ /* Find any inodes that already exist and get their
+ * callback counters.
+ */
+ iget_data.fid = cookie->fids[i];
+ ti = ilookup5_nowait(dir->i_sb, iget_data.fid.vnode,
+ afs_iget5_test, &iget_data);
+ if (!IS_ERR_OR_NULL(ti)) {
+ vnode = AFS_FS_I(ti);
+ scb->cb_break = afs_calc_vnode_cb_break(vnode);
+ cookie->inodes[i] = ti;
+ }
+ }
+
/* Try FS.InlineBulkStatus first. Abort codes for the individual
* lookups contained therein are stored in the reply without aborting
* the whole operation.
goto no_inline_bulk_status;
inode = ERR_PTR(-ERESTARTSYS);
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
while (afs_select_fileserver(&fc)) {
if (test_bit(AFS_SERVER_FL_NO_IBULK,
&fc.cbi->server->flags)) {
fc.ac.error = -ECONNABORTED;
break;
}
+ iget_data.cb_v_break = dvnode->volume->cb_v_break;
+ iget_data.cb_s_break = fc.cbi->server->cb_s_break;
afs_fs_inline_bulk_status(&fc,
afs_v2net(dvnode),
cookie->fids,
cookie->statuses,
- cookie->callbacks,
cookie->nr_fids, NULL);
}
* any of the lookups fails - so, for the moment, revert to
* FS.FetchStatus for just the primary fid.
*/
- cookie->nr_fids = 1;
inode = ERR_PTR(-ERESTARTSYS);
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
while (afs_select_fileserver(&fc)) {
+ iget_data.cb_v_break = dvnode->volume->cb_v_break;
+ iget_data.cb_s_break = fc.cbi->server->cb_s_break;
+ scb = &cookie->statuses[0];
afs_fs_fetch_status(&fc,
afs_v2net(dvnode),
cookie->fids,
- cookie->statuses,
- cookie->callbacks,
+ scb,
NULL);
}
if (IS_ERR(inode))
goto out_c;
- for (i = 0; i < cookie->nr_fids; i++)
- cookie->statuses[i].abort_code = 0;
-
success:
/* Turn all the files into inodes and save the first one - which is the
* one we actually want.
*/
- if (cookie->statuses[0].abort_code != 0)
- inode = ERR_PTR(afs_abort_to_error(cookie->statuses[0].abort_code));
+ scb = &cookie->statuses[0];
+ if (scb->status.abort_code != 0)
+ inode = ERR_PTR(afs_abort_to_error(scb->status.abort_code));
for (i = 0; i < cookie->nr_fids; i++) {
- struct inode *ti;
+ struct afs_status_cb *scb = &cookie->statuses[i];
+
+ if (!scb->have_status && !scb->have_error)
+ continue;
+
+ if (cookie->inodes[i]) {
+ afs_vnode_commit_status(&fc, AFS_FS_I(cookie->inodes[i]),
+ scb->cb_break, NULL, scb);
+ continue;
+ }
- if (cookie->statuses[i].abort_code != 0)
+ if (scb->status.abort_code != 0)
continue;
- ti = afs_iget(dir->i_sb, key, &cookie->fids[i],
- &cookie->statuses[i],
- &cookie->callbacks[i],
- cbi, dvnode);
+ iget_data.fid = cookie->fids[i];
+ ti = afs_iget(dir->i_sb, key, &iget_data, scb, cbi, dvnode);
+ if (!IS_ERR(ti))
+ afs_cache_permit(AFS_FS_I(ti), key,
+ 0 /* Assume vnode->cb_break is 0 */ +
+ iget_data.cb_v_break,
+ scb);
if (i == 0) {
inode = ti;
} else {
out_c:
afs_put_cb_interest(afs_v2net(dvnode), cbi);
- kfree(cookie->callbacks);
+ if (cookie->inodes) {
+ for (i = 0; i < cookie->nr_fids; i++)
+ iput(cookie->inodes[i]);
+ kfree(cookie->inodes);
+ }
out_s:
- kfree(cookie->statuses);
+ kvfree(cookie->statuses);
out:
kfree(cookie);
return inode;
*/
static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
struct dentry *new_dentry,
- struct afs_fid *newfid,
- struct afs_file_status *newstatus,
- struct afs_callback *newcb)
+ struct afs_iget_data *new_data,
+ struct afs_status_cb *new_scb)
{
struct afs_vnode *vnode;
struct inode *inode;
return;
inode = afs_iget(fc->vnode->vfs_inode.i_sb, fc->key,
- newfid, newstatus, newcb, fc->cbi, fc->vnode);
+ new_data, new_scb, fc->cbi, fc->vnode);
if (IS_ERR(inode)) {
/* ENOMEM or EINTR at a really inconvenient time - just abandon
* the new directory on the server.
vnode = AFS_FS_I(inode);
set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
- afs_vnode_commit_status(fc, vnode, 0);
+ if (fc->ac.error == 0)
+ afs_cache_permit(vnode, fc->key, vnode->cb_break, new_scb);
d_instantiate(new_dentry, inode);
}
+static void afs_prep_for_new_inode(struct afs_fs_cursor *fc,
+ struct afs_iget_data *iget_data)
+{
+ iget_data->volume = fc->vnode->volume;
+ iget_data->cb_v_break = fc->vnode->volume->cb_v_break;
+ iget_data->cb_s_break = fc->cbi->server->cb_s_break;
+}
+
/*
* create a directory on an AFS filesystem
*/
static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
- struct afs_file_status newstatus;
+ struct afs_iget_data iget_data;
+ struct afs_status_cb *scb;
struct afs_fs_cursor fc;
- struct afs_callback newcb;
struct afs_vnode *dvnode = AFS_FS_I(dir);
- struct afs_fid newfid;
struct key *key;
- u64 data_version = dvnode->status.data_version;
int ret;
mode |= S_IFDIR;
_enter("{%llx:%llu},{%pd},%ho",
dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
+ ret = -ENOMEM;
+ scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ goto error;
+
key = afs_request_key(dvnode->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
- goto error;
+ goto error_scb;
}
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
+ afs_dataversion_t data_version = dvnode->status.data_version + 1;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
- afs_fs_create(&fc, dentry->d_name.name, mode, data_version,
- &newfid, &newstatus, &newcb);
+ afs_prep_for_new_inode(&fc, &iget_data);
+ afs_fs_create(&fc, dentry->d_name.name, mode,
+ &scb[0], &iget_data.fid, &scb[1]);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
- afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, &newcb);
+ afs_check_for_remote_deletion(&fc, dvnode);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
+ &data_version, &scb[0]);
+ afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]);
ret = afs_end_vnode_operation(&fc);
if (ret < 0)
goto error_key;
if (ret == 0 &&
test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- afs_edit_dir_add(dvnode, &dentry->d_name, &newfid,
+ afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
afs_edit_dir_for_create);
key_put(key);
+ kfree(scb);
_leave(" = 0");
return 0;
error_key:
key_put(key);
+error_scb:
+ kfree(scb);
error:
d_drop(dentry);
_leave(" = %d", ret);
*/
static int afs_rmdir(struct inode *dir, struct dentry *dentry)
{
+ struct afs_status_cb *scb;
struct afs_fs_cursor fc;
struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
struct key *key;
- u64 data_version = dvnode->status.data_version;
int ret;
_enter("{%llx:%llu},{%pd}",
dvnode->fid.vid, dvnode->fid.vnode, dentry);
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ return -ENOMEM;
+
key = afs_request_key(dvnode->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
}
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
+ afs_dataversion_t data_version = dvnode->status.data_version + 1;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
- afs_fs_remove(&fc, vnode, dentry->d_name.name, true,
- data_version);
+ afs_fs_remove(&fc, vnode, dentry->d_name.name, true, scb);
}
- afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
+ &data_version, scb);
ret = afs_end_vnode_operation(&fc);
if (ret == 0) {
afs_dir_remove_subdir(dentry);
error_key:
key_put(key);
error:
+ kfree(scb);
return ret;
}
* However, if we didn't have a callback promise outstanding, or it was
* outstanding on a different server, then it won't break it either...
*/
-int afs_dir_remove_link(struct dentry *dentry, struct key *key,
- unsigned long d_version_before,
- unsigned long d_version_after)
+static int afs_dir_remove_link(struct afs_vnode *dvnode, struct dentry *dentry,
+ struct key *key)
{
- bool dir_valid;
int ret = 0;
- /* There were no intervening changes on the server if the version
- * number we got back was incremented by exactly 1.
- */
- dir_valid = (d_version_after == d_version_before + 1);
-
if (d_really_is_positive(dentry)) {
struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
/* Already done */
- } else if (dir_valid) {
+ } else if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
+ write_seqlock(&vnode->cb_lock);
drop_nlink(&vnode->vfs_inode);
if (vnode->vfs_inode.i_nlink == 0) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
- clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ __afs_break_callback(vnode);
}
+ write_sequnlock(&vnode->cb_lock);
ret = 0;
} else {
- clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ afs_break_callback(vnode);
if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
kdebug("AFS_VNODE_DELETED");
static int afs_unlink(struct inode *dir, struct dentry *dentry)
{
struct afs_fs_cursor fc;
+ struct afs_status_cb *scb;
struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
struct key *key;
- unsigned long d_version = (unsigned long)dentry->d_fsdata;
bool need_rehash = false;
- u64 data_version = dvnode->status.data_version;
int ret;
_enter("{%llx:%llu},{%pd}",
if (dentry->d_name.len >= AFSNAMEMAX)
return -ENAMETOOLONG;
+ ret = -ENOMEM;
+ scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ goto error;
+
key = afs_request_key(dvnode->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
- goto error;
+ goto error_scb;
}
/* Try to make sure we have a callback promise on the victim. */
spin_unlock(&dentry->d_lock);
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
+ afs_dataversion_t data_version = dvnode->status.data_version + 1;
+ afs_dataversion_t data_version_2 = vnode->status.data_version;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
+ fc.cb_break_2 = afs_calc_vnode_cb_break(vnode);
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc.cbi->server->flags) &&
!test_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags)) {
yfs_fs_remove_file2(&fc, vnode, dentry->d_name.name,
- data_version);
+ &scb[0], &scb[1]);
if (fc.ac.error != -ECONNABORTED ||
fc.ac.abort_code != RXGEN_OPCODE)
continue;
set_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags);
}
- afs_fs_remove(&fc, vnode, dentry->d_name.name, false,
- data_version);
+ afs_fs_remove(&fc, vnode, dentry->d_name.name, false, &scb[0]);
}
- afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
+ &data_version, &scb[0]);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break_2,
+ &data_version_2, &scb[1]);
ret = afs_end_vnode_operation(&fc);
- if (ret == 0)
- ret = afs_dir_remove_link(
- dentry, key, d_version,
- (unsigned long)dvnode->status.data_version);
+ if (ret == 0 && !(scb[1].have_status || scb[1].have_error))
+ ret = afs_dir_remove_link(dvnode, dentry, key);
if (ret == 0 &&
test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
afs_edit_dir_remove(dvnode, &dentry->d_name,
error_key:
key_put(key);
+error_scb:
+ kfree(scb);
error:
_leave(" = %d", ret);
return ret;
static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl)
{
+ struct afs_iget_data iget_data;
struct afs_fs_cursor fc;
- struct afs_file_status newstatus;
- struct afs_callback newcb;
+ struct afs_status_cb *scb;
struct afs_vnode *dvnode = AFS_FS_I(dir);
- struct afs_fid newfid;
struct key *key;
- u64 data_version = dvnode->status.data_version;
int ret;
mode |= S_IFREG;
goto error;
}
+ ret = -ENOMEM;
+ scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ goto error_scb;
+
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
+ afs_dataversion_t data_version = dvnode->status.data_version + 1;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
- afs_fs_create(&fc, dentry->d_name.name, mode, data_version,
- &newfid, &newstatus, &newcb);
+ afs_prep_for_new_inode(&fc, &iget_data);
+ afs_fs_create(&fc, dentry->d_name.name, mode,
+ &scb[0], &iget_data.fid, &scb[1]);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
- afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, &newcb);
+ afs_check_for_remote_deletion(&fc, dvnode);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
+ &data_version, &scb[0]);
+ afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]);
ret = afs_end_vnode_operation(&fc);
if (ret < 0)
goto error_key;
}
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- afs_edit_dir_add(dvnode, &dentry->d_name, &newfid,
+ afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
afs_edit_dir_for_create);
+ kfree(scb);
key_put(key);
_leave(" = 0");
return 0;
+error_scb:
+ kfree(scb);
error_key:
key_put(key);
error:
struct dentry *dentry)
{
struct afs_fs_cursor fc;
- struct afs_vnode *dvnode, *vnode;
+ struct afs_status_cb *scb;
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(from));
struct key *key;
- u64 data_version;
int ret;
- vnode = AFS_FS_I(d_inode(from));
- dvnode = AFS_FS_I(dir);
- data_version = dvnode->status.data_version;
-
_enter("{%llx:%llu},{%llx:%llu},{%pd}",
vnode->fid.vid, vnode->fid.vnode,
dvnode->fid.vid, dvnode->fid.vnode,
if (dentry->d_name.len >= AFSNAMEMAX)
goto error;
+ ret = -ENOMEM;
+ scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ goto error;
+
key = afs_request_key(dvnode->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
- goto error;
+ goto error_scb;
}
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
+ afs_dataversion_t data_version = dvnode->status.data_version + 1;
+
if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) {
afs_end_vnode_operation(&fc);
goto error_key;
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
fc.cb_break_2 = afs_calc_vnode_cb_break(vnode);
- afs_fs_link(&fc, vnode, dentry->d_name.name, data_version);
+ afs_fs_link(&fc, vnode, dentry->d_name.name,
+ &scb[0], &scb[1]);
}
- afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break_2);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
+ &data_version, &scb[0]);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break_2,
+ NULL, &scb[1]);
ihold(&vnode->vfs_inode);
d_instantiate(dentry, &vnode->vfs_inode);
afs_edit_dir_for_link);
key_put(key);
+ kfree(scb);
_leave(" = 0");
return 0;
error_key:
key_put(key);
+error_scb:
+ kfree(scb);
error:
d_drop(dentry);
_leave(" = %d", ret);
static int afs_symlink(struct inode *dir, struct dentry *dentry,
const char *content)
{
+ struct afs_iget_data iget_data;
struct afs_fs_cursor fc;
- struct afs_file_status newstatus;
+ struct afs_status_cb *scb;
struct afs_vnode *dvnode = AFS_FS_I(dir);
- struct afs_fid newfid;
struct key *key;
- u64 data_version = dvnode->status.data_version;
int ret;
_enter("{%llx:%llu},{%pd},%s",
if (strlen(content) >= AFSPATHMAX)
goto error;
+ ret = -ENOMEM;
+ scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ goto error;
+
key = afs_request_key(dvnode->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
- goto error;
+ goto error_scb;
}
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
+ afs_dataversion_t data_version = dvnode->status.data_version + 1;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
- afs_fs_symlink(&fc, dentry->d_name.name,
- content, data_version,
- &newfid, &newstatus);
+ afs_prep_for_new_inode(&fc, &iget_data);
+ afs_fs_symlink(&fc, dentry->d_name.name, content,
+ &scb[0], &iget_data.fid, &scb[1]);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
- afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, NULL);
+ afs_check_for_remote_deletion(&fc, dvnode);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
+ &data_version, &scb[0]);
+ afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]);
ret = afs_end_vnode_operation(&fc);
if (ret < 0)
goto error_key;
}
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- afs_edit_dir_add(dvnode, &dentry->d_name, &newfid,
+ afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
afs_edit_dir_for_symlink);
key_put(key);
+ kfree(scb);
_leave(" = 0");
return 0;
error_key:
key_put(key);
+error_scb:
+ kfree(scb);
error:
d_drop(dentry);
_leave(" = %d", ret);
unsigned int flags)
{
struct afs_fs_cursor fc;
+ struct afs_status_cb *scb;
struct afs_vnode *orig_dvnode, *new_dvnode, *vnode;
struct dentry *tmp = NULL, *rehash = NULL;
struct inode *new_inode;
struct key *key;
- u64 orig_data_version, new_data_version;
bool new_negative = d_is_negative(new_dentry);
int ret;
vnode = AFS_FS_I(d_inode(old_dentry));
orig_dvnode = AFS_FS_I(old_dir);
new_dvnode = AFS_FS_I(new_dir);
- orig_data_version = orig_dvnode->status.data_version;
- new_data_version = new_dvnode->status.data_version;
_enter("{%llx:%llu},{%llx:%llu},{%llx:%llu},{%pd}",
orig_dvnode->fid.vid, orig_dvnode->fid.vnode,
new_dvnode->fid.vid, new_dvnode->fid.vnode,
new_dentry);
+ ret = -ENOMEM;
+ scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ goto error;
+
key = afs_request_key(orig_dvnode->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
- goto error;
+ goto error_scb;
}
/* For non-directories, check whether the target is busy and if so,
new_dentry = tmp;
rehash = NULL;
new_negative = true;
- orig_data_version = orig_dvnode->status.data_version;
- new_data_version = new_dvnode->status.data_version;
}
}
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, orig_dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) {
+ afs_dataversion_t orig_data_version;
+ afs_dataversion_t new_data_version;
+ struct afs_status_cb *new_scb = &scb[1];
+
+ orig_data_version = orig_dvnode->status.data_version + 1;
+
if (orig_dvnode != new_dvnode) {
if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) {
afs_end_vnode_operation(&fc);
goto error_rehash;
}
+ new_data_version = new_dvnode->status.data_version;
+ } else {
+ new_data_version = orig_data_version;
+ new_scb = &scb[0];
}
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(orig_dvnode);
fc.cb_break_2 = afs_calc_vnode_cb_break(new_dvnode);
afs_fs_rename(&fc, old_dentry->d_name.name,
new_dvnode, new_dentry->d_name.name,
- orig_data_version, new_data_version);
+ &scb[0], new_scb);
}
- afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break);
- afs_vnode_commit_status(&fc, new_dvnode, fc.cb_break_2);
- if (orig_dvnode != new_dvnode)
+ afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break,
+ &orig_data_version, &scb[0]);
+ if (new_dvnode != orig_dvnode) {
+ afs_vnode_commit_status(&fc, new_dvnode, fc.cb_break_2,
+ &new_data_version, &scb[1]);
mutex_unlock(&new_dvnode->io_lock);
+ }
ret = afs_end_vnode_operation(&fc);
if (ret < 0)
goto error_rehash;
if (tmp)
dput(tmp);
key_put(key);
+error_scb:
+ kfree(scb);
error:
_leave(" = %d", ret);
return ret;
struct key *key)
{
struct afs_fs_cursor fc;
- u64 dir_data_version = dvnode->status.data_version;
+ struct afs_status_cb *scb;
int ret = -ERESTARTSYS;
_enter("%pd,%pd", old, new);
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ return -ENOMEM;
+
trace_afs_silly_rename(vnode, false);
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
+ afs_dataversion_t dir_data_version = dvnode->status.data_version + 1;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
afs_fs_rename(&fc, old->d_name.name,
dvnode, new->d_name.name,
- dir_data_version, dir_data_version);
+ scb, scb);
}
- afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
+ &dir_data_version, scb);
ret = afs_end_vnode_operation(&fc);
}
fsnotify_nameremove(old, 0);
}
+ kfree(scb);
_leave(" = %d", ret);
return ret;
}
struct dentry *dentry, struct key *key)
{
struct afs_fs_cursor fc;
- u64 dir_data_version = dvnode->status.data_version;
+ struct afs_status_cb *scb;
int ret = -ERESTARTSYS;
_enter("");
+ scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ return -ENOMEM;
+
trace_afs_silly_rename(vnode, true);
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, false)) {
+ afs_dataversion_t dir_data_version = dvnode->status.data_version + 1;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc.cbi->server->flags) &&
!test_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags)) {
yfs_fs_remove_file2(&fc, vnode, dentry->d_name.name,
- dir_data_version);
+ &scb[0], &scb[1]);
if (fc.ac.error != -ECONNABORTED ||
fc.ac.abort_code != RXGEN_OPCODE)
continue;
set_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags);
}
- afs_fs_remove(&fc, vnode, dentry->d_name.name, false,
- dir_data_version);
+ afs_fs_remove(&fc, vnode, dentry->d_name.name, false, &scb[0]);
}
- afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
+ &dir_data_version, &scb[0]);
ret = afs_end_vnode_operation(&fc);
if (ret == 0) {
drop_nlink(&vnode->vfs_inode);
afs_edit_dir_for_unlink);
}
+ kfree(scb);
_leave(" = %d", ret);
return ret;
}
return 0;
}
- ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL);
+ ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL, false);
if (ret == -ENODATA)
ret = -EDESTADDRREQ;
return ret;
struct afs_net *net = afs_sb2net(sb);
int ret;
- if (mutex_lock_interruptible(&net->proc_cells_lock) < 0)
- return -ERESTARTSYS;
+ mutex_lock(&net->proc_cells_lock);
net->dynroot_sb = sb;
hlist_for_each_entry(cell, &net->proc_cells, proc_link) {
{
struct afs_vnode *vnode = AFS_FS_I(inode);
struct afs_file *af = file->private_data;
+ int ret = 0;
_enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode);
if ((file->f_mode & FMODE_WRITE))
- return vfs_fsync(file, 0);
+ ret = vfs_fsync(file, 0);
file->private_data = NULL;
if (af->wb)
key_put(af->key);
kfree(af);
afs_prune_wb_keys(vnode);
- _leave(" = 0");
- return 0;
+ _leave(" = %d", ret);
+ return ret;
}
/*
int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *desc)
{
struct afs_fs_cursor fc;
+ struct afs_status_cb *scb;
int ret;
_enter("%s{%llx:%llu.%u},%x,,,",
vnode->fid.unique,
key_serial(key));
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ return -ENOMEM;
+
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, true)) {
+ afs_dataversion_t data_version = vnode->status.data_version;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- afs_fs_fetch_data(&fc, desc);
+ afs_fs_fetch_data(&fc, scb, desc);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_check_for_remote_deletion(&fc, vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break,
+ &data_version, scb);
ret = afs_end_vnode_operation(&fc);
}
&afs_v2net(vnode)->n_fetch_bytes);
}
+ kfree(scb);
_leave(" = %d", ret);
return ret;
}
/*
* Make pages available as they're filled.
*/
-static void afs_readpages_page_done(struct afs_call *call, struct afs_read *req)
+static void afs_readpages_page_done(struct afs_read *req)
{
#ifdef CONFIG_AFS_FSCACHE
- struct afs_vnode *vnode = call->reply[0];
+ struct afs_vnode *vnode = req->vnode;
#endif
struct page *page = req->pages[req->index];
return -ENOMEM;
refcount_set(&req->usage, 1);
+ req->vnode = vnode;
req->page_done = afs_readpages_page_done;
req->pos = first->index;
req->pos <<= PAGE_SHIFT;
{
_enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
- if (vnode->lock_state != AFS_VNODE_LOCK_WAITING_FOR_CB)
- return;
-
spin_lock(&vnode->lock);
if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
afs_next_locker(vnode, 0);
*/
void afs_lock_op_done(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
+ struct afs_vnode *vnode = call->lvnode;
if (call->error == 0) {
spin_lock(&vnode->lock);
static int afs_set_lock(struct afs_vnode *vnode, struct key *key,
afs_lock_type_t type)
{
+ struct afs_status_cb *scb;
struct afs_fs_cursor fc;
int ret;
vnode->fid.unique,
key_serial(key), type);
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ return -ENOMEM;
+
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, true)) {
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- afs_fs_set_lock(&fc, type);
+ afs_fs_set_lock(&fc, type, scb);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_check_for_remote_deletion(&fc, vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break, NULL, scb);
ret = afs_end_vnode_operation(&fc);
}
+ kfree(scb);
_leave(" = %d", ret);
return ret;
}
*/
static int afs_extend_lock(struct afs_vnode *vnode, struct key *key)
{
+ struct afs_status_cb *scb;
struct afs_fs_cursor fc;
int ret;
vnode->fid.unique,
key_serial(key));
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ return -ENOMEM;
+
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, false)) {
while (afs_select_current_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- afs_fs_extend_lock(&fc);
+ afs_fs_extend_lock(&fc, scb);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_check_for_remote_deletion(&fc, vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break, NULL, scb);
ret = afs_end_vnode_operation(&fc);
}
+ kfree(scb);
_leave(" = %d", ret);
return ret;
}
*/
static int afs_release_lock(struct afs_vnode *vnode, struct key *key)
{
+ struct afs_status_cb *scb;
struct afs_fs_cursor fc;
int ret;
vnode->fid.unique,
key_serial(key));
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ return -ENOMEM;
+
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, false)) {
while (afs_select_current_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- afs_fs_release_lock(&fc);
+ afs_fs_release_lock(&fc, scb);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_check_for_remote_deletion(&fc, vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break, NULL, scb);
ret = afs_end_vnode_operation(&fc);
}
+ kfree(scb);
_leave(" = %d", ret);
return ret;
}
posix_test_lock(file, fl);
if (fl->fl_type == F_UNLCK) {
/* no local locks; consult the server */
- ret = afs_fetch_status(vnode, key, false);
+ ret = afs_fetch_status(vnode, key, false, NULL);
if (ret < 0)
goto error;
void afs_fileserver_probe_result(struct afs_call *call)
{
struct afs_addr_list *alist = call->alist;
- struct afs_server *server = call->reply[0];
- unsigned int server_index = (long)call->reply[1];
+ struct afs_server *server = call->server;
+ unsigned int server_index = call->server_index;
unsigned int index = call->addr_ix;
unsigned int rtt = UINT_MAX;
bool have_result = false;
pr_notice("0x50: %08x\n", ntohl(x[0]));
}
-/*
- * Update the core inode struct from a returned status record.
- */
-void afs_update_inode_from_status(struct afs_vnode *vnode,
- struct afs_file_status *status,
- const afs_dataversion_t *expected_version,
- u8 flags)
-{
- struct timespec64 t;
- umode_t mode;
-
- t = status->mtime_client;
- vnode->vfs_inode.i_ctime = t;
- vnode->vfs_inode.i_mtime = t;
- vnode->vfs_inode.i_atime = t;
-
- if (flags & (AFS_VNODE_META_CHANGED | AFS_VNODE_NOT_YET_SET)) {
- vnode->vfs_inode.i_uid = make_kuid(&init_user_ns, status->owner);
- vnode->vfs_inode.i_gid = make_kgid(&init_user_ns, status->group);
- set_nlink(&vnode->vfs_inode, status->nlink);
-
- mode = vnode->vfs_inode.i_mode;
- mode &= ~S_IALLUGO;
- mode |= status->mode;
- barrier();
- vnode->vfs_inode.i_mode = mode;
- }
-
- if (!(flags & AFS_VNODE_NOT_YET_SET)) {
- if (expected_version &&
- *expected_version != status->data_version) {
- _debug("vnode modified %llx on {%llx:%llu} [exp %llx]",
- (unsigned long long) status->data_version,
- vnode->fid.vid, vnode->fid.vnode,
- (unsigned long long) *expected_version);
- vnode->invalid_before = status->data_version;
- if (vnode->status.type == AFS_FTYPE_DIR) {
- if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
- afs_stat_v(vnode, n_inval);
- } else {
- set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
- }
- } else if (vnode->status.type == AFS_FTYPE_DIR) {
- /* Expected directory change is handled elsewhere so
- * that we can locally edit the directory and save on a
- * download.
- */
- if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
- flags &= ~AFS_VNODE_DATA_CHANGED;
- }
- }
-
- if (flags & (AFS_VNODE_DATA_CHANGED | AFS_VNODE_NOT_YET_SET)) {
- inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
- i_size_write(&vnode->vfs_inode, status->size);
- }
-}
-
/*
* decode an AFSFetchStatus block
*/
-static int xdr_decode_AFSFetchStatus(struct afs_call *call,
- const __be32 **_bp,
- struct afs_file_status *status,
- struct afs_vnode *vnode,
- const afs_dataversion_t *expected_version,
- struct afs_read *read_req)
+static int xdr_decode_AFSFetchStatus(const __be32 **_bp,
+ struct afs_call *call,
+ struct afs_status_cb *scb)
{
const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp;
+ struct afs_file_status *status = &scb->status;
bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus);
u64 data_version, size;
u32 type, abort_code;
- u8 flags = 0;
abort_code = ntohl(xdr->abort_code);
* case.
*/
status->abort_code = abort_code;
+ scb->have_error = true;
return 0;
}
case AFS_FTYPE_FILE:
case AFS_FTYPE_DIR:
case AFS_FTYPE_SYMLINK:
- if (type != status->type &&
- vnode &&
- !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
- pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n",
- vnode->fid.vid,
- vnode->fid.vnode,
- vnode->fid.unique,
- status->type, type);
- goto bad;
- }
status->type = type;
break;
default:
goto bad;
}
-#define EXTRACT_M(FIELD) \
- do { \
- u32 x = ntohl(xdr->FIELD); \
- if (status->FIELD != x) { \
- flags |= AFS_VNODE_META_CHANGED; \
- status->FIELD = x; \
- } \
- } while (0)
-
- EXTRACT_M(nlink);
- EXTRACT_M(author);
- EXTRACT_M(owner);
- EXTRACT_M(caller_access); /* call ticket dependent */
- EXTRACT_M(anon_access);
- EXTRACT_M(mode);
- EXTRACT_M(group);
+ status->nlink = ntohl(xdr->nlink);
+ status->author = ntohl(xdr->author);
+ status->owner = ntohl(xdr->owner);
+ status->caller_access = ntohl(xdr->caller_access); /* Ticket dependent */
+ status->anon_access = ntohl(xdr->anon_access);
+ status->mode = ntohl(xdr->mode) & S_IALLUGO;
+ status->group = ntohl(xdr->group);
+ status->lock_count = ntohl(xdr->lock_count);
status->mtime_client.tv_sec = ntohl(xdr->mtime_client);
status->mtime_client.tv_nsec = 0;
status->mtime_server.tv_sec = ntohl(xdr->mtime_server);
status->mtime_server.tv_nsec = 0;
- status->lock_count = ntohl(xdr->lock_count);
size = (u64)ntohl(xdr->size_lo);
size |= (u64)ntohl(xdr->size_hi) << 32;
data_version = (u64)ntohl(xdr->data_version_lo);
data_version |= (u64)ntohl(xdr->data_version_hi) << 32;
- if (data_version != status->data_version) {
- status->data_version = data_version;
- flags |= AFS_VNODE_DATA_CHANGED;
- }
-
- if (read_req) {
- read_req->data_version = data_version;
- read_req->file_size = size;
- }
+ status->data_version = data_version;
+ scb->have_status = true;
*_bp = (const void *)*_bp + sizeof(*xdr);
-
- if (vnode) {
- if (test_bit(AFS_VNODE_UNSET, &vnode->flags))
- flags |= AFS_VNODE_NOT_YET_SET;
- afs_update_inode_from_status(vnode, status, expected_version,
- flags);
- }
-
return 0;
bad:
return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
}
-/*
- * Decode the file status. We need to lock the target vnode if we're going to
- * update its status so that stat() sees the attributes update atomically.
- */
-static int afs_decode_status(struct afs_call *call,
- const __be32 **_bp,
- struct afs_file_status *status,
- struct afs_vnode *vnode,
- const afs_dataversion_t *expected_version,
- struct afs_read *read_req)
+static time64_t xdr_decode_expiry(struct afs_call *call, u32 expiry)
{
- int ret;
-
- if (!vnode)
- return xdr_decode_AFSFetchStatus(call, _bp, status, vnode,
- expected_version, read_req);
-
- write_seqlock(&vnode->cb_lock);
- ret = xdr_decode_AFSFetchStatus(call, _bp, status, vnode,
- expected_version, read_req);
- write_sequnlock(&vnode->cb_lock);
- return ret;
+ return ktime_divns(call->reply_time, NSEC_PER_SEC) + expiry;
}
-/*
- * decode an AFSCallBack block
- */
-static void xdr_decode_AFSCallBack(struct afs_call *call,
- struct afs_vnode *vnode,
- const __be32 **_bp)
+static void xdr_decode_AFSCallBack(const __be32 **_bp,
+ struct afs_call *call,
+ struct afs_status_cb *scb)
{
- struct afs_cb_interest *old, *cbi = call->cbi;
+ struct afs_callback *cb = &scb->callback;
const __be32 *bp = *_bp;
- u32 cb_expiry;
-
- write_seqlock(&vnode->cb_lock);
-
- if (!afs_cb_is_broken(call->cb_break, vnode, cbi)) {
- vnode->cb_version = ntohl(*bp++);
- cb_expiry = ntohl(*bp++);
- vnode->cb_type = ntohl(*bp++);
- vnode->cb_expires_at = cb_expiry + ktime_get_real_seconds();
- old = vnode->cb_interest;
- if (old != call->cbi) {
- vnode->cb_interest = cbi;
- cbi = old;
- }
- set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
- } else {
- bp += 3;
- }
- write_sequnlock(&vnode->cb_lock);
- call->cbi = cbi;
- *_bp = bp;
-}
-
-static ktime_t xdr_decode_expiry(struct afs_call *call, u32 expiry)
-{
- return ktime_add_ns(call->reply_time, expiry * NSEC_PER_SEC);
-}
-
-static void xdr_decode_AFSCallBack_raw(struct afs_call *call,
- const __be32 **_bp,
- struct afs_callback *cb)
-{
- const __be32 *bp = *_bp;
-
- cb->version = ntohl(*bp++);
+ bp++; /* version */
cb->expires_at = xdr_decode_expiry(call, ntohl(*bp++));
- cb->type = ntohl(*bp++);
+ bp++; /* type */
+ scb->have_cb = true;
*_bp = bp;
}
*/
static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
if (ret < 0)
return ret;
- _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
-
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = afs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- xdr_decode_AFSCallBack(call, vnode, &bp);
- xdr_decode_AFSVolSync(&bp, call->reply[1]);
+ xdr_decode_AFSCallBack(&bp, call, call->out_scb);
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
/*
* fetch the status information for a file
*/
-int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync,
- bool new_inode)
+int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_status_cb *scb,
+ struct afs_volsync *volsync)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_fetch_file_status(fc, volsync, new_inode);
+ return yfs_fs_fetch_file_status(fc, scb, volsync);
_enter(",%x,{%llx:%llu},,",
key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
}
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = volsync;
- call->expected_version = new_inode ? 1 : vnode->status.data_version;
- call->want_reply_time = true;
+ call->out_scb = scb;
+ call->out_volsync = volsync;
/* marshall the parameters */
bp = call->request;
bp[2] = htonl(vnode->fid.vnode);
bp[3] = htonl(vnode->fid.unique);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
*/
static int afs_deliver_fs_fetch_data(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
- struct afs_read *req = call->reply[2];
+ struct afs_read *req = call->read_request;
const __be32 *bp;
unsigned int size;
int ret;
if (req->offset == PAGE_SIZE) {
req->offset = 0;
if (req->page_done)
- req->page_done(call, req);
+ req->page_done(req);
req->index++;
if (req->remain > 0)
goto begin_page;
return ret;
bp = call->buffer;
- ret = afs_decode_status(call, &bp, &vnode->status, vnode,
- &vnode->status.data_version, req);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- xdr_decode_AFSCallBack(call, vnode, &bp);
- xdr_decode_AFSVolSync(&bp, call->reply[1]);
+ xdr_decode_AFSCallBack(&bp, call, call->out_scb);
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
+
+ req->data_version = call->out_scb->status.data_version;
+ req->file_size = call->out_scb->status.size;
call->unmarshall++;
zero_user_segment(req->pages[req->index],
req->offset, PAGE_SIZE);
if (req->page_done)
- req->page_done(call, req);
+ req->page_done(req);
req->offset = 0;
}
static void afs_fetch_data_destructor(struct afs_call *call)
{
- struct afs_read *req = call->reply[2];
+ struct afs_read *req = call->read_request;
afs_put_read(req);
afs_flat_call_destructor(call);
/*
* fetch data from a very large file
*/
-static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req)
+static int afs_fs_fetch_data64(struct afs_fs_cursor *fc,
+ struct afs_status_cb *scb,
+ struct afs_read *req)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = NULL; /* volsync */
- call->reply[2] = req;
- call->expected_version = vnode->status.data_version;
- call->want_reply_time = true;
+ call->out_scb = scb;
+ call->out_volsync = NULL;
+ call->read_request = req;
/* marshall the parameters */
bp = call->request;
bp[7] = htonl(lower_32_bits(req->len));
refcount_inc(&req->usage);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
/*
* fetch data from a file
*/
-int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
+int afs_fs_fetch_data(struct afs_fs_cursor *fc,
+ struct afs_status_cb *scb,
+ struct afs_read *req)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_fetch_data(fc, req);
+ return yfs_fs_fetch_data(fc, scb, req);
if (upper_32_bits(req->pos) ||
upper_32_bits(req->len) ||
upper_32_bits(req->pos + req->len))
- return afs_fs_fetch_data64(fc, req);
+ return afs_fs_fetch_data64(fc, scb, req);
_enter("");
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = NULL; /* volsync */
- call->reply[2] = req;
- call->expected_version = vnode->status.data_version;
- call->want_reply_time = true;
+ call->out_scb = scb;
+ call->out_volsync = NULL;
+ call->read_request = req;
/* marshall the parameters */
bp = call->request;
bp[5] = htonl(lower_32_bits(req->len));
refcount_inc(&req->usage);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
*/
static int afs_deliver_fs_create_vnode(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
- _enter("{%u}", call->unmarshall);
-
ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- xdr_decode_AFSFid(&bp, call->reply[1]);
- ret = afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+ xdr_decode_AFSFid(&bp, call->out_fid);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- ret = afs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- xdr_decode_AFSCallBack_raw(call, &bp, call->reply[3]);
- /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
+ xdr_decode_AFSCallBack(&bp, call, call->out_scb);
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
int afs_fs_create(struct afs_fs_cursor *fc,
const char *name,
umode_t mode,
- u64 current_data_version,
+ struct afs_status_cb *dvnode_scb,
struct afs_fid *newfid,
- struct afs_file_status *newstatus,
- struct afs_callback *newcb)
+ struct afs_status_cb *new_scb)
{
- struct afs_vnode *vnode = fc->vnode;
+ struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
- struct afs_net *net = afs_v2net(vnode);
+ struct afs_net *net = afs_v2net(dvnode);
size_t namesz, reqsz, padsz;
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)){
if (S_ISDIR(mode))
- return yfs_fs_make_dir(fc, name, mode, current_data_version,
- newfid, newstatus, newcb);
+ return yfs_fs_make_dir(fc, name, mode, dvnode_scb,
+ newfid, new_scb);
else
- return yfs_fs_create_file(fc, name, mode, current_data_version,
- newfid, newstatus, newcb);
+ return yfs_fs_create_file(fc, name, mode, dvnode_scb,
+ newfid, new_scb);
}
_enter("");
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = newfid;
- call->reply[2] = newstatus;
- call->reply[3] = newcb;
- call->expected_version = current_data_version + 1;
- call->want_reply_time = true;
+ call->out_dir_scb = dvnode_scb;
+ call->out_fid = newfid;
+ call->out_scb = new_scb;
/* marshall the parameters */
bp = call->request;
*bp++ = htonl(S_ISDIR(mode) ? FSMAKEDIR : FSCREATEFILE);
- *bp++ = htonl(vnode->fid.vid);
- *bp++ = htonl(vnode->fid.vnode);
- *bp++ = htonl(vnode->fid.unique);
+ *bp++ = htonl(dvnode->fid.vid);
+ *bp++ = htonl(dvnode->fid.vnode);
+ *bp++ = htonl(dvnode->fid.unique);
*bp++ = htonl(namesz);
memcpy(bp, name, namesz);
bp = (void *) bp + namesz;
bp = (void *) bp + padsz;
}
*bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
- *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
+ *bp++ = htonl(dvnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = htonl(mode & S_IALLUGO); /* unix mode */
*bp++ = 0; /* segment size */
afs_use_fs_server(call, fc->cbi);
- trace_afs_make_fs_call1(call, &vnode->fid, name);
+ trace_afs_make_fs_call1(call, &dvnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
/*
- * Deliver reply data to any operation that returns file status and volume
+ * Deliver reply data to any operation that returns directory status and volume
* sync.
*/
-static int afs_deliver_fs_status_and_vol(struct afs_call *call)
+static int afs_deliver_fs_dir_status_and_vol(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
- _enter("{%u}", call->unmarshall);
-
ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = afs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
static const struct afs_call_type afs_RXFSRemoveFile = {
.name = "FS.RemoveFile",
.op = afs_FS_RemoveFile,
- .deliver = afs_deliver_fs_status_and_vol,
+ .deliver = afs_deliver_fs_dir_status_and_vol,
.destructor = afs_flat_call_destructor,
};
static const struct afs_call_type afs_RXFSRemoveDir = {
.name = "FS.RemoveDir",
.op = afs_FS_RemoveDir,
- .deliver = afs_deliver_fs_status_and_vol,
+ .deliver = afs_deliver_fs_dir_status_and_vol,
.destructor = afs_flat_call_destructor,
};
* remove a file or directory
*/
int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
- const char *name, bool isdir, u64 current_data_version)
+ const char *name, bool isdir, struct afs_status_cb *dvnode_scb)
{
struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_remove(fc, vnode, name, isdir, current_data_version);
+ return yfs_fs_remove(fc, vnode, name, isdir, dvnode_scb);
_enter("");
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = dvnode;
- call->reply[1] = vnode;
- call->expected_version = current_data_version + 1;
+ call->out_dir_scb = dvnode_scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call1(call, &dvnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
*/
static int afs_deliver_fs_link(struct afs_call *call)
{
- struct afs_vnode *dvnode = call->reply[0], *vnode = call->reply[1];
const __be32 *bp;
int ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = afs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- ret = afs_decode_status(call, &bp, &dvnode->status, dvnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
* make a hard link
*/
int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
- const char *name, u64 current_data_version)
+ const char *name,
+ struct afs_status_cb *dvnode_scb,
+ struct afs_status_cb *vnode_scb)
{
struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_link(fc, vnode, name, current_data_version);
+ return yfs_fs_link(fc, vnode, name, dvnode_scb, vnode_scb);
_enter("");
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = dvnode;
- call->reply[1] = vnode;
- call->expected_version = current_data_version + 1;
+ call->out_dir_scb = dvnode_scb;
+ call->out_scb = vnode_scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call1(call, &vnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
*/
static int afs_deliver_fs_symlink(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- xdr_decode_AFSFid(&bp, call->reply[1]);
- ret = afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+ xdr_decode_AFSFid(&bp, call->out_fid);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- ret = afs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
int afs_fs_symlink(struct afs_fs_cursor *fc,
const char *name,
const char *contents,
- u64 current_data_version,
+ struct afs_status_cb *dvnode_scb,
struct afs_fid *newfid,
- struct afs_file_status *newstatus)
+ struct afs_status_cb *new_scb)
{
- struct afs_vnode *vnode = fc->vnode;
+ struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
- struct afs_net *net = afs_v2net(vnode);
+ struct afs_net *net = afs_v2net(dvnode);
size_t namesz, reqsz, padsz, c_namesz, c_padsz;
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_symlink(fc, name, contents, current_data_version,
- newfid, newstatus);
+ return yfs_fs_symlink(fc, name, contents, dvnode_scb,
+ newfid, new_scb);
_enter("");
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = newfid;
- call->reply[2] = newstatus;
- call->expected_version = current_data_version + 1;
+ call->out_dir_scb = dvnode_scb;
+ call->out_fid = newfid;
+ call->out_scb = new_scb;
/* marshall the parameters */
bp = call->request;
*bp++ = htonl(FSSYMLINK);
- *bp++ = htonl(vnode->fid.vid);
- *bp++ = htonl(vnode->fid.vnode);
- *bp++ = htonl(vnode->fid.unique);
+ *bp++ = htonl(dvnode->fid.vid);
+ *bp++ = htonl(dvnode->fid.vnode);
+ *bp++ = htonl(dvnode->fid.unique);
*bp++ = htonl(namesz);
memcpy(bp, name, namesz);
bp = (void *) bp + namesz;
bp = (void *) bp + c_padsz;
}
*bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
- *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
+ *bp++ = htonl(dvnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = htonl(S_IRWXUGO); /* unix mode */
*bp++ = 0; /* segment size */
afs_use_fs_server(call, fc->cbi);
- trace_afs_make_fs_call1(call, &vnode->fid, name);
+ trace_afs_make_fs_call1(call, &dvnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
*/
static int afs_deliver_fs_rename(struct afs_call *call)
{
- struct afs_vnode *orig_dvnode = call->reply[0], *new_dvnode = call->reply[1];
const __be32 *bp;
int ret;
- _enter("{%u}", call->unmarshall);
-
ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = afs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- if (new_dvnode != orig_dvnode) {
- ret = afs_decode_status(call, &bp, &new_dvnode->status, new_dvnode,
- &call->expected_version_2, NULL);
+ if (call->out_dir_scb != call->out_scb) {
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
}
- /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
};
/*
- * create a symbolic link
+ * Rename/move a file or directory.
*/
int afs_fs_rename(struct afs_fs_cursor *fc,
const char *orig_name,
struct afs_vnode *new_dvnode,
const char *new_name,
- u64 current_orig_data_version,
- u64 current_new_data_version)
+ struct afs_status_cb *orig_dvnode_scb,
+ struct afs_status_cb *new_dvnode_scb)
{
struct afs_vnode *orig_dvnode = fc->vnode;
struct afs_call *call;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
return yfs_fs_rename(fc, orig_name,
new_dvnode, new_name,
- current_orig_data_version,
- current_new_data_version);
+ orig_dvnode_scb,
+ new_dvnode_scb);
_enter("");
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = orig_dvnode;
- call->reply[1] = new_dvnode;
- call->expected_version = current_orig_data_version + 1;
- call->expected_version_2 = current_new_data_version + 1;
+ call->out_dir_scb = orig_dvnode_scb;
+ call->out_scb = new_dvnode_scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call2(call, &orig_dvnode->fid, orig_name, new_name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
*/
static int afs_deliver_fs_store_data(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = afs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
-
- afs_pages_written_back(vnode, call);
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
struct address_space *mapping,
pgoff_t first, pgoff_t last,
unsigned offset, unsigned to,
- loff_t size, loff_t pos, loff_t i_size)
+ loff_t size, loff_t pos, loff_t i_size,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
call->key = fc->key;
call->mapping = mapping;
- call->reply[0] = vnode;
call->first = first;
call->last = last;
call->first_offset = offset;
call->last_to = to;
call->send_pages = true;
- call->expected_version = vnode->status.data_version + 1;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
*bp++ = htonl((u32) i_size);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
*/
int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
pgoff_t first, pgoff_t last,
- unsigned offset, unsigned to)
+ unsigned offset, unsigned to,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_store_data(fc, mapping, first, last, offset, to);
+ return yfs_fs_store_data(fc, mapping, first, last, offset, to, scb);
_enter(",%x,{%llx:%llu},,",
key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32)
return afs_fs_store_data64(fc, mapping, first, last, offset, to,
- size, pos, i_size);
+ size, pos, i_size, scb);
call = afs_alloc_flat_call(net, &afs_RXFSStoreData,
(4 + 6 + 3) * 4,
call->key = fc->key;
call->mapping = mapping;
- call->reply[0] = vnode;
call->first = first;
call->last = last;
call->first_offset = offset;
call->last_to = to;
call->send_pages = true;
- call->expected_version = vnode->status.data_version + 1;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
*/
static int afs_deliver_fs_store_status(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = afs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
* set the attributes on a very large file, using FS.StoreData rather than
* FS.StoreStatus so as to alter the file size also
*/
-static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr)
+static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->expected_version = vnode->status.data_version + 1;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
* set the attributes on a file, using FS.StoreData rather than FS.StoreStatus
* so as to alter the file size also
*/
-static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
+static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
ASSERT(attr->ia_valid & ATTR_SIZE);
if (attr->ia_size >> 32)
- return afs_fs_setattr_size64(fc, attr);
+ return afs_fs_setattr_size64(fc, attr, scb);
call = afs_alloc_flat_call(net, &afs_RXFSStoreData_as_Status,
(4 + 6 + 3) * 4,
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->expected_version = vnode->status.data_version + 1;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
* set the attributes on a file, using FS.StoreData if there's a change in file
* size, and FS.StoreStatus otherwise
*/
-int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
+int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_setattr(fc, attr);
+ return yfs_fs_setattr(fc, attr, scb);
if (attr->ia_valid & ATTR_SIZE)
- return afs_fs_setattr_size(fc, attr);
+ return afs_fs_setattr_size(fc, attr, scb);
_enter(",%x,{%llx:%llu},,",
key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->expected_version = vnode->status.data_version;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
return ret;
bp = call->buffer;
- xdr_decode_AFSFetchVolumeStatus(&bp, call->reply[1]);
+ xdr_decode_AFSFetchVolumeStatus(&bp, call->out_volstatus);
call->unmarshall++;
afs_extract_to_tmp(call);
return afs_protocol_error(call, -EBADMSG,
afs_eproto_volname_len);
size = (call->count + 3) & ~3; /* It's padded */
- afs_extract_begin(call, call->reply[2], size);
+ afs_extract_to_buf(call, size);
call->unmarshall++;
/* Fall through - and extract the volume name */
if (ret < 0)
return ret;
- p = call->reply[2];
+ p = call->buffer;
p[call->count] = 0;
_debug("volname '%s'", p);
afs_extract_to_tmp(call);
return afs_protocol_error(call, -EBADMSG,
afs_eproto_offline_msg_len);
size = (call->count + 3) & ~3; /* It's padded */
- afs_extract_begin(call, call->reply[2], size);
+ afs_extract_to_buf(call, size);
call->unmarshall++;
/* Fall through - and extract the offline message */
if (ret < 0)
return ret;
- p = call->reply[2];
+ p = call->buffer;
p[call->count] = 0;
_debug("offline '%s'", p);
return afs_protocol_error(call, -EBADMSG,
afs_eproto_motd_len);
size = (call->count + 3) & ~3; /* It's padded */
- afs_extract_begin(call, call->reply[2], size);
+ afs_extract_to_buf(call, size);
call->unmarshall++;
/* Fall through - and extract the message of the day */
if (ret < 0)
return ret;
- p = call->reply[2];
+ p = call->buffer;
p[call->count] = 0;
_debug("motd '%s'", p);
return 0;
}
-/*
- * destroy an FS.GetVolumeStatus call
- */
-static void afs_get_volume_status_call_destructor(struct afs_call *call)
-{
- kfree(call->reply[2]);
- call->reply[2] = NULL;
- afs_flat_call_destructor(call);
-}
-
/*
* FS.GetVolumeStatus operation type
*/
.name = "FS.GetVolumeStatus",
.op = afs_FS_GetVolumeStatus,
.deliver = afs_deliver_fs_get_volume_status,
- .destructor = afs_get_volume_status_call_destructor,
+ .destructor = afs_flat_call_destructor,
};
/*
struct afs_call *call;
struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
- void *tmpbuf;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
return yfs_fs_get_volume_status(fc, vs);
_enter("");
- tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL);
- if (!tmpbuf)
- return -ENOMEM;
-
- call = afs_alloc_flat_call(net, &afs_RXFSGetVolumeStatus, 2 * 4, 12 * 4);
- if (!call) {
- kfree(tmpbuf);
+ call = afs_alloc_flat_call(net, &afs_RXFSGetVolumeStatus, 2 * 4,
+ max(12 * 4, AFSOPAQUEMAX + 1));
+ if (!call)
return -ENOMEM;
- }
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = vs;
- call->reply[2] = tmpbuf;
+ call->out_volstatus = vs;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
/*
* Set a lock on a file
*/
-int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
+int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_set_lock(fc, type);
+ return yfs_fs_set_lock(fc, type, scb);
_enter("");
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->want_reply_time = true;
+ call->lvnode = vnode;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_calli(call, &vnode->fid, type);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
/*
* extend a lock on a file
*/
-int afs_fs_extend_lock(struct afs_fs_cursor *fc)
+int afs_fs_extend_lock(struct afs_fs_cursor *fc, struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_extend_lock(fc);
+ return yfs_fs_extend_lock(fc, scb);
_enter("");
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->want_reply_time = true;
+ call->lvnode = vnode;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
/*
* release a lock on a file
*/
-int afs_fs_release_lock(struct afs_fs_cursor *fc)
+int afs_fs_release_lock(struct afs_fs_cursor *fc, struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_release_lock(fc);
+ return yfs_fs_release_lock(fc, scb);
_enter("");
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
+ call->lvnode = vnode;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
return 0;
}
-static void afs_destroy_fs_get_capabilities(struct afs_call *call)
-{
- struct afs_server *server = call->reply[0];
-
- afs_put_server(call->net, server);
- afs_flat_call_destructor(call);
-}
-
/*
* FS.GetCapabilities operation type
*/
.op = afs_FS_GetCapabilities,
.deliver = afs_deliver_fs_get_capabilities,
.done = afs_fileserver_probe_result,
- .destructor = afs_destroy_fs_get_capabilities,
+ .destructor = afs_flat_call_destructor,
};
/*
return ERR_PTR(-ENOMEM);
call->key = key;
- call->reply[0] = afs_get_server(server);
- call->reply[1] = (void *)(long)server_index;
+ call->server = afs_get_server(server);
+ call->server_index = server_index;
call->upgrade = true;
- call->want_reply_time = true;
call->async = true;
+ call->max_lifespan = AFS_PROBE_MAX_LIFESPAN;
/* marshall the parameters */
bp = call->request;
*/
static int afs_deliver_fs_fetch_status(struct afs_call *call)
{
- struct afs_file_status *status = call->reply[1];
- struct afs_callback *callback = call->reply[2];
- struct afs_volsync *volsync = call->reply[3];
- struct afs_fid *fid = call->reply[0];
const __be32 *bp;
int ret;
if (ret < 0)
return ret;
- _enter("{%llx:%llu}", fid->vid, fid->vnode);
-
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = afs_decode_status(call, &bp, status, NULL,
- &call->expected_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- xdr_decode_AFSCallBack_raw(call, &bp, callback);
- xdr_decode_AFSVolSync(&bp, volsync);
+ xdr_decode_AFSCallBack(&bp, call, call->out_scb);
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
int afs_fs_fetch_status(struct afs_fs_cursor *fc,
struct afs_net *net,
struct afs_fid *fid,
- struct afs_file_status *status,
- struct afs_callback *callback,
+ struct afs_status_cb *scb,
struct afs_volsync *volsync)
{
struct afs_call *call;
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_fetch_status(fc, net, fid, status, callback, volsync);
+ return yfs_fs_fetch_status(fc, net, fid, scb, volsync);
_enter(",%x,{%llx:%llu},,",
key_serial(fc->key), fid->vid, fid->vnode);
}
call->key = fc->key;
- call->reply[0] = fid;
- call->reply[1] = status;
- call->reply[2] = callback;
- call->reply[3] = volsync;
- call->expected_version = 1; /* vnode->status.data_version */
- call->want_reply_time = true;
+ call->out_fid = fid;
+ call->out_scb = scb;
+ call->out_volsync = volsync;
/* marshall the parameters */
bp = call->request;
bp[2] = htonl(fid->vnode);
bp[3] = htonl(fid->unique);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
*/
static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
{
- struct afs_file_status *statuses;
- struct afs_callback *callbacks;
- struct afs_vnode *vnode = call->reply[0];
+ struct afs_status_cb *scb;
const __be32 *bp;
u32 tmp;
int ret;
return ret;
bp = call->buffer;
- statuses = call->reply[1];
- ret = afs_decode_status(call, &bp, &statuses[call->count],
- call->count == 0 ? vnode : NULL,
- NULL, NULL);
+ scb = &call->out_scb[call->count];
+ ret = xdr_decode_AFSFetchStatus(&bp, call, scb);
if (ret < 0)
return ret;
_debug("unmarshall CB array");
bp = call->buffer;
- callbacks = call->reply[2];
- callbacks[call->count].version = ntohl(bp[0]);
- callbacks[call->count].expires_at = xdr_decode_expiry(call, ntohl(bp[1]));
- callbacks[call->count].type = ntohl(bp[2]);
- statuses = call->reply[1];
- if (call->count == 0 && vnode && statuses[0].abort_code == 0)
- xdr_decode_AFSCallBack(call, vnode, &bp);
+ scb = &call->out_scb[call->count];
+ xdr_decode_AFSCallBack(&bp, call, scb);
call->count++;
if (call->count < call->count2)
goto more_cbs;
return ret;
bp = call->buffer;
- xdr_decode_AFSVolSync(&bp, call->reply[3]);
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
call->unmarshall++;
int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
struct afs_net *net,
struct afs_fid *fids,
- struct afs_file_status *statuses,
- struct afs_callback *callbacks,
+ struct afs_status_cb *statuses,
unsigned int nr_fids,
struct afs_volsync *volsync)
{
int i;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_inline_bulk_status(fc, net, fids, statuses, callbacks,
+ return yfs_fs_inline_bulk_status(fc, net, fids, statuses,
nr_fids, volsync);
_enter(",%x,{%llx:%llu},%u",
}
call->key = fc->key;
- call->reply[0] = NULL; /* vnode for fid[0] */
- call->reply[1] = statuses;
- call->reply[2] = callbacks;
- call->reply[3] = volsync;
+ call->out_scb = statuses;
+ call->out_volsync = volsync;
call->count2 = nr_fids;
- call->want_reply_time = true;
/* marshall the parameters */
bp = call->request;
*bp++ = htonl(fids[i].unique);
}
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &fids[0]);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
*/
static int afs_deliver_fs_fetch_acl(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[1];
struct afs_acl *acl;
const __be32 *bp;
unsigned int size;
acl = kmalloc(struct_size(acl, data, size), GFP_KERNEL);
if (!acl)
return -ENOMEM;
- call->reply[0] = acl;
+ call->ret_acl = acl;
acl->size = call->count2;
afs_extract_begin(call, acl->data, size);
call->unmarshall++;
return ret;
bp = call->buffer;
- ret = afs_decode_status(call, &bp, &vnode->status, vnode,
- &vnode->status.data_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- xdr_decode_AFSVolSync(&bp, call->reply[2]);
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
call->unmarshall++;
static void afs_destroy_fs_fetch_acl(struct afs_call *call)
{
- kfree(call->reply[0]);
+ kfree(call->ret_acl);
afs_flat_call_destructor(call);
}
/*
* Fetch the ACL for a file.
*/
-struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *fc)
+struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *fc,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
}
call->key = fc->key;
- call->reply[0] = NULL;
- call->reply[1] = vnode;
- call->reply[2] = NULL; /* volsync */
- call->ret_reply0 = true;
+ call->ret_acl = NULL;
+ call->out_scb = scb;
+ call->out_volsync = NULL;
/* marshall the parameters */
bp = call->request;
bp[2] = htonl(vnode->fid.vnode);
bp[3] = htonl(vnode->fid.unique);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
afs_make_call(&fc->ac, call, GFP_KERNEL);
return (struct afs_acl *)afs_wait_for_call_to_complete(call, &fc->ac);
}
+/*
+ * Deliver reply data to any operation that returns file status and volume
+ * sync.
+ */
+static int afs_deliver_fs_file_status_and_vol(struct afs_call *call)
+{
+ const __be32 *bp;
+ int ret;
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
+ if (ret < 0)
+ return ret;
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
/*
* FS.StoreACL operation type
*/
static const struct afs_call_type afs_RXFSStoreACL = {
.name = "FS.StoreACL",
.op = afs_FS_StoreACL,
- .deliver = afs_deliver_fs_status_and_vol,
+ .deliver = afs_deliver_fs_file_status_and_vol,
.destructor = afs_flat_call_destructor,
};
/*
* Fetch the ACL for a file.
*/
-int afs_fs_store_acl(struct afs_fs_cursor *fc, const struct afs_acl *acl)
+int afs_fs_store_acl(struct afs_fs_cursor *fc, const struct afs_acl *acl,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
}
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[2] = NULL; /* volsync */
+ call->out_scb = scb;
+ call->out_volsync = NULL;
/* marshall the parameters */
bp = call->request;
#include <linux/namei.h>
#include <linux/iversion.h>
#include "internal.h"
+#include "afs_fs.h"
static const struct inode_operations afs_symlink_inode_operations = {
.get_link = page_get_link,
* Initialise an inode from the vnode status.
*/
static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key,
- struct afs_vnode *parent_vnode)
+ struct afs_cb_interest *cbi,
+ struct afs_vnode *parent_vnode,
+ struct afs_status_cb *scb)
{
+ struct afs_cb_interest *old_cbi = NULL;
+ struct afs_file_status *status = &scb->status;
struct inode *inode = AFS_VNODE_TO_I(vnode);
+ struct timespec64 t;
_debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu",
- vnode->status.type,
- vnode->status.nlink,
- (unsigned long long) vnode->status.size,
- vnode->status.data_version,
- vnode->status.mode);
+ status->type,
+ status->nlink,
+ (unsigned long long) status->size,
+ status->data_version,
+ status->mode);
- read_seqlock_excl(&vnode->cb_lock);
+ write_seqlock(&vnode->cb_lock);
- afs_update_inode_from_status(vnode, &vnode->status, NULL,
- AFS_VNODE_NOT_YET_SET);
+ vnode->status = *status;
- switch (vnode->status.type) {
+ t = status->mtime_client;
+ inode->i_ctime = t;
+ inode->i_mtime = t;
+ inode->i_atime = t;
+ inode->i_uid = make_kuid(&init_user_ns, status->owner);
+ inode->i_gid = make_kgid(&init_user_ns, status->group);
+ set_nlink(&vnode->vfs_inode, status->nlink);
+
+ switch (status->type) {
case AFS_FTYPE_FILE:
- inode->i_mode = S_IFREG | vnode->status.mode;
+ inode->i_mode = S_IFREG | status->mode;
inode->i_op = &afs_file_inode_operations;
inode->i_fop = &afs_file_operations;
inode->i_mapping->a_ops = &afs_fs_aops;
break;
case AFS_FTYPE_DIR:
- inode->i_mode = S_IFDIR | vnode->status.mode;
+ inode->i_mode = S_IFDIR | status->mode;
inode->i_op = &afs_dir_inode_operations;
inode->i_fop = &afs_dir_file_operations;
inode->i_mapping->a_ops = &afs_dir_aops;
break;
case AFS_FTYPE_SYMLINK:
/* Symlinks with a mode of 0644 are actually mountpoints. */
- if ((vnode->status.mode & 0777) == 0644) {
+ if ((status->mode & 0777) == 0644) {
inode->i_flags |= S_AUTOMOUNT;
set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
inode->i_fop = &afs_mntpt_file_operations;
inode->i_mapping->a_ops = &afs_fs_aops;
} else {
- inode->i_mode = S_IFLNK | vnode->status.mode;
+ inode->i_mode = S_IFLNK | status->mode;
inode->i_op = &afs_symlink_inode_operations;
inode->i_mapping->a_ops = &afs_fs_aops;
}
break;
default:
dump_vnode(vnode, parent_vnode);
- read_sequnlock_excl(&vnode->cb_lock);
+ write_sequnlock(&vnode->cb_lock);
return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type);
}
* for consistency with other AFS clients.
*/
inode->i_blocks = ((i_size_read(inode) + 1023) >> 10) << 1;
- vnode->invalid_before = vnode->status.data_version;
+ i_size_write(&vnode->vfs_inode, status->size);
+
+ vnode->invalid_before = status->data_version;
+ inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
+
+ if (!scb->have_cb) {
+ /* it's a symlink we just created (the fileserver
+ * didn't give us a callback) */
+ vnode->cb_expires_at = ktime_get_real_seconds();
+ } else {
+ vnode->cb_expires_at = scb->callback.expires_at;
+ old_cbi = rcu_dereference_protected(vnode->cb_interest,
+ lockdep_is_held(&vnode->cb_lock.lock));
+ if (cbi != old_cbi)
+ rcu_assign_pointer(vnode->cb_interest, afs_get_cb_interest(cbi));
+ else
+ old_cbi = NULL;
+ set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ }
- read_sequnlock_excl(&vnode->cb_lock);
+ write_sequnlock(&vnode->cb_lock);
+ afs_put_cb_interest(afs_v2net(vnode), old_cbi);
return 0;
}
+/*
+ * Update the core inode struct from a returned status record.
+ */
+static void afs_apply_status(struct afs_fs_cursor *fc,
+ struct afs_vnode *vnode,
+ struct afs_status_cb *scb,
+ const afs_dataversion_t *expected_version)
+{
+ struct afs_file_status *status = &scb->status;
+ struct timespec64 t;
+ umode_t mode;
+ bool data_changed = false;
+
+ BUG_ON(test_bit(AFS_VNODE_UNSET, &vnode->flags));
+
+ if (status->type != vnode->status.type) {
+ pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n",
+ vnode->fid.vid,
+ vnode->fid.vnode,
+ vnode->fid.unique,
+ status->type, vnode->status.type);
+ afs_protocol_error(NULL, -EBADMSG, afs_eproto_bad_status);
+ return;
+ }
+
+ if (status->nlink != vnode->status.nlink)
+ set_nlink(&vnode->vfs_inode, status->nlink);
+
+ if (status->owner != vnode->status.owner)
+ vnode->vfs_inode.i_uid = make_kuid(&init_user_ns, status->owner);
+
+ if (status->group != vnode->status.group)
+ vnode->vfs_inode.i_gid = make_kgid(&init_user_ns, status->group);
+
+ if (status->mode != vnode->status.mode) {
+ mode = vnode->vfs_inode.i_mode;
+ mode &= ~S_IALLUGO;
+ mode |= status->mode;
+ WRITE_ONCE(vnode->vfs_inode.i_mode, mode);
+ }
+
+ t = status->mtime_client;
+ vnode->vfs_inode.i_ctime = t;
+ vnode->vfs_inode.i_mtime = t;
+ vnode->vfs_inode.i_atime = t;
+
+ if (vnode->status.data_version != status->data_version)
+ data_changed = true;
+
+ vnode->status = *status;
+
+ if (expected_version &&
+ *expected_version != status->data_version) {
+ kdebug("vnode modified %llx on {%llx:%llu} [exp %llx] %s",
+ (unsigned long long) status->data_version,
+ vnode->fid.vid, vnode->fid.vnode,
+ (unsigned long long) *expected_version,
+ fc->type ? fc->type->name : "???");
+ vnode->invalid_before = status->data_version;
+ if (vnode->status.type == AFS_FTYPE_DIR) {
+ if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+ afs_stat_v(vnode, n_inval);
+ } else {
+ set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
+ }
+ } else if (vnode->status.type == AFS_FTYPE_DIR) {
+ /* Expected directory change is handled elsewhere so
+ * that we can locally edit the directory and save on a
+ * download.
+ */
+ if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+ data_changed = false;
+ }
+
+ if (data_changed) {
+ inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
+ i_size_write(&vnode->vfs_inode, status->size);
+ }
+}
+
+/*
+ * Apply a callback to a vnode.
+ */
+static void afs_apply_callback(struct afs_fs_cursor *fc,
+ struct afs_vnode *vnode,
+ struct afs_status_cb *scb,
+ unsigned int cb_break)
+{
+ struct afs_cb_interest *old;
+ struct afs_callback *cb = &scb->callback;
+
+ if (!afs_cb_is_broken(cb_break, vnode, fc->cbi)) {
+ vnode->cb_expires_at = cb->expires_at;
+ old = rcu_dereference_protected(vnode->cb_interest,
+ lockdep_is_held(&vnode->cb_lock.lock));
+ if (old != fc->cbi) {
+ rcu_assign_pointer(vnode->cb_interest, afs_get_cb_interest(fc->cbi));
+ afs_put_cb_interest(afs_v2net(vnode), old);
+ }
+ set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ }
+}
+
+/*
+ * Apply the received status and callback to an inode all in the same critical
+ * section to avoid races with afs_validate().
+ */
+void afs_vnode_commit_status(struct afs_fs_cursor *fc,
+ struct afs_vnode *vnode,
+ unsigned int cb_break,
+ const afs_dataversion_t *expected_version,
+ struct afs_status_cb *scb)
+{
+ if (fc->ac.error != 0)
+ return;
+
+ write_seqlock(&vnode->cb_lock);
+
+ if (scb->have_error) {
+ if (scb->status.abort_code == VNOVNODE) {
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ clear_nlink(&vnode->vfs_inode);
+ __afs_break_callback(vnode);
+ }
+ } else {
+ if (scb->have_status)
+ afs_apply_status(fc, vnode, scb, expected_version);
+ if (scb->have_cb)
+ afs_apply_callback(fc, vnode, scb, cb_break);
+ }
+
+ write_sequnlock(&vnode->cb_lock);
+
+ if (fc->ac.error == 0 && scb->have_status)
+ afs_cache_permit(vnode, fc->key, cb_break, scb);
+}
+
/*
* Fetch file status from the volume.
*/
-int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode)
+int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool is_new,
+ afs_access_t *_caller_access)
{
+ struct afs_status_cb *scb;
struct afs_fs_cursor fc;
int ret;
vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
vnode->flags);
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ return -ENOMEM;
+
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, true)) {
+ afs_dataversion_t data_version = vnode->status.data_version;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- afs_fs_fetch_file_status(&fc, NULL, new_inode);
+ afs_fs_fetch_file_status(&fc, scb, NULL);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ if (fc.error) {
+ /* Do nothing. */
+ } else if (is_new) {
+ ret = afs_inode_init_from_status(vnode, key, fc.cbi,
+ NULL, scb);
+ fc.error = ret;
+ if (ret == 0)
+ afs_cache_permit(vnode, key, fc.cb_break, scb);
+ } else {
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break,
+ &data_version, scb);
+ }
+ afs_check_for_remote_deletion(&fc, vnode);
ret = afs_end_vnode_operation(&fc);
}
+ if (ret == 0 && _caller_access)
+ *_caller_access = scb->status.caller_access;
+ kfree(scb);
_leave(" = %d", ret);
return ret;
}
*/
int afs_iget5_test(struct inode *inode, void *opaque)
{
- struct afs_iget_data *data = opaque;
+ struct afs_iget_data *iget_data = opaque;
struct afs_vnode *vnode = AFS_FS_I(inode);
- return memcmp(&vnode->fid, &data->fid, sizeof(data->fid)) == 0;
+ return memcmp(&vnode->fid, &iget_data->fid, sizeof(iget_data->fid)) == 0;
}
/*
*/
static int afs_iget5_set(struct inode *inode, void *opaque)
{
- struct afs_iget_data *data = opaque;
+ struct afs_iget_data *iget_data = opaque;
struct afs_vnode *vnode = AFS_FS_I(inode);
- vnode->fid = data->fid;
- vnode->volume = data->volume;
+ vnode->fid = iget_data->fid;
+ vnode->volume = iget_data->volume;
+ vnode->cb_v_break = iget_data->cb_v_break;
+ vnode->cb_s_break = iget_data->cb_s_break;
/* YFS supports 96-bit vnode IDs, but Linux only supports
* 64-bit inode numbers.
*/
- inode->i_ino = data->fid.vnode;
- inode->i_generation = data->fid.unique;
+ inode->i_ino = iget_data->fid.vnode;
+ inode->i_generation = iget_data->fid.unique;
return 0;
}
*/
struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
{
- struct afs_iget_data data;
struct afs_super_info *as;
struct afs_vnode *vnode;
struct inode *inode;
static atomic_t afs_autocell_ino;
+ struct afs_iget_data iget_data = {
+ .cb_v_break = 0,
+ .cb_s_break = 0,
+ };
+
_enter("");
as = sb->s_fs_info;
if (as->volume) {
- data.volume = as->volume;
- data.fid.vid = as->volume->vid;
+ iget_data.volume = as->volume;
+ iget_data.fid.vid = as->volume->vid;
}
if (root) {
- data.fid.vnode = 1;
- data.fid.unique = 1;
+ iget_data.fid.vnode = 1;
+ iget_data.fid.unique = 1;
} else {
- data.fid.vnode = atomic_inc_return(&afs_autocell_ino);
- data.fid.unique = 0;
+ iget_data.fid.vnode = atomic_inc_return(&afs_autocell_ino);
+ iget_data.fid.unique = 0;
}
- inode = iget5_locked(sb, data.fid.vnode,
+ inode = iget5_locked(sb, iget_data.fid.vnode,
afs_iget5_pseudo_dir_test, afs_iget5_set,
- &data);
+ &iget_data);
if (!inode) {
_leave(" = -ENOMEM");
return ERR_PTR(-ENOMEM);
}
_debug("GOT INODE %p { ino=%lu, vl=%llx, vn=%llx, u=%x }",
- inode, inode->i_ino, data.fid.vid, data.fid.vnode,
- data.fid.unique);
+ inode, inode->i_ino, iget_data.fid.vid, iget_data.fid.vnode,
+ iget_data.fid.unique);
vnode = AFS_FS_I(inode);
* inode retrieval
*/
struct inode *afs_iget(struct super_block *sb, struct key *key,
- struct afs_fid *fid, struct afs_file_status *status,
- struct afs_callback *cb, struct afs_cb_interest *cbi,
+ struct afs_iget_data *iget_data,
+ struct afs_status_cb *scb,
+ struct afs_cb_interest *cbi,
struct afs_vnode *parent_vnode)
{
- struct afs_iget_data data = { .fid = *fid };
struct afs_super_info *as;
struct afs_vnode *vnode;
+ struct afs_fid *fid = &iget_data->fid;
struct inode *inode;
int ret;
_enter(",{%llx:%llu.%u},,", fid->vid, fid->vnode, fid->unique);
as = sb->s_fs_info;
- data.volume = as->volume;
+ iget_data->volume = as->volume;
inode = iget5_locked(sb, fid->vnode, afs_iget5_test, afs_iget5_set,
- &data);
+ iget_data);
if (!inode) {
_leave(" = -ENOMEM");
return ERR_PTR(-ENOMEM);
return inode;
}
- if (!status) {
+ if (!scb) {
/* it's a remotely extant inode */
- ret = afs_fetch_status(vnode, key, true);
+ ret = afs_fetch_status(vnode, key, true, NULL);
if (ret < 0)
goto bad_inode;
} else {
- /* it's an inode we just created */
- memcpy(&vnode->status, status, sizeof(vnode->status));
-
- if (!cb) {
- /* it's a symlink we just created (the fileserver
- * didn't give us a callback) */
- vnode->cb_version = 0;
- vnode->cb_type = 0;
- vnode->cb_expires_at = ktime_get();
- } else {
- vnode->cb_version = cb->version;
- vnode->cb_type = cb->type;
- vnode->cb_expires_at = cb->expires_at;
- vnode->cb_interest = afs_get_cb_interest(cbi);
- set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
- }
-
- vnode->cb_expires_at += ktime_get_real_seconds();
+ ret = afs_inode_init_from_status(vnode, key, cbi, parent_vnode,
+ scb);
+ if (ret < 0)
+ goto bad_inode;
}
- ret = afs_inode_init_from_status(vnode, key, parent_vnode);
- if (ret < 0)
- goto bad_inode;
-
afs_get_inode_cache(vnode);
/* success */
clear_bit(AFS_VNODE_UNSET, &vnode->flags);
inode->i_flags |= S_NOATIME;
unlock_new_inode(inode);
- _leave(" = %p [CB { v=%u t=%u }]", inode, vnode->cb_version, vnode->cb_type);
+ _leave(" = %p", inode);
return inode;
/* failure */
invalidate_inode_pages2(vnode->vfs_inode.i_mapping);
}
+/*
+ * Check the validity of a vnode/inode.
+ */
+bool afs_check_validity(struct afs_vnode *vnode)
+{
+ struct afs_cb_interest *cbi;
+ struct afs_server *server;
+ struct afs_volume *volume = vnode->volume;
+ time64_t now = ktime_get_real_seconds();
+ bool valid, need_clear = false;
+ unsigned int cb_break, cb_s_break, cb_v_break;
+ int seq = 0;
+
+ do {
+ read_seqbegin_or_lock(&vnode->cb_lock, &seq);
+ cb_v_break = READ_ONCE(volume->cb_v_break);
+ cb_break = vnode->cb_break;
+
+ if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+ cbi = rcu_dereference(vnode->cb_interest);
+ server = rcu_dereference(cbi->server);
+ cb_s_break = READ_ONCE(server->cb_s_break);
+
+ if (vnode->cb_s_break != cb_s_break ||
+ vnode->cb_v_break != cb_v_break) {
+ vnode->cb_s_break = cb_s_break;
+ vnode->cb_v_break = cb_v_break;
+ need_clear = true;
+ valid = false;
+ } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
+ need_clear = true;
+ valid = false;
+ } else if (vnode->cb_expires_at - 10 <= now) {
+ need_clear = true;
+ valid = false;
+ } else {
+ valid = true;
+ }
+ } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+ valid = true;
+ } else {
+ vnode->cb_v_break = cb_v_break;
+ valid = false;
+ }
+
+ } while (need_seqretry(&vnode->cb_lock, seq));
+
+ done_seqretry(&vnode->cb_lock, seq);
+
+ if (need_clear) {
+ write_seqlock(&vnode->cb_lock);
+ if (cb_break == vnode->cb_break)
+ __afs_break_callback(vnode);
+ write_sequnlock(&vnode->cb_lock);
+ valid = false;
+ }
+
+ return valid;
+}
+
/*
* validate a vnode/inode
* - there are several things we need to check
*/
int afs_validate(struct afs_vnode *vnode, struct key *key)
{
- time64_t now = ktime_get_real_seconds();
bool valid;
int ret;
vnode->fid.vid, vnode->fid.vnode, vnode->flags,
key_serial(key));
- /* Quickly check the callback state. Ideally, we'd use read_seqbegin
- * here, but we have no way to pass the net namespace to the RCU
- * cleanup for the server record.
- */
- read_seqlock_excl(&vnode->cb_lock);
-
- if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
- if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break ||
- vnode->cb_v_break != vnode->volume->cb_v_break) {
- vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
- vnode->cb_v_break = vnode->volume->cb_v_break;
- valid = false;
- } else if (vnode->status.type == AFS_FTYPE_DIR &&
- (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) ||
- vnode->cb_expires_at - 10 <= now)) {
- valid = false;
- } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) ||
- vnode->cb_expires_at - 10 <= now) {
- valid = false;
- } else {
- valid = true;
- }
- } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
- valid = true;
- } else {
- vnode->cb_v_break = vnode->volume->cb_v_break;
- valid = false;
- }
-
- read_sequnlock_excl(&vnode->cb_lock);
+ rcu_read_lock();
+ valid = afs_check_validity(vnode);
+ rcu_read_unlock();
if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
clear_nlink(&vnode->vfs_inode);
* access */
if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
_debug("not promised");
- ret = afs_fetch_status(vnode, key, false);
+ ret = afs_fetch_status(vnode, key, false, NULL);
if (ret < 0) {
if (ret == -ENOENT) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
*/
void afs_evict_inode(struct inode *inode)
{
+ struct afs_cb_interest *cbi;
struct afs_vnode *vnode;
vnode = AFS_FS_I(inode);
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
- if (vnode->cb_interest) {
- afs_put_cb_interest(afs_i2net(inode), vnode->cb_interest);
- vnode->cb_interest = NULL;
+ write_seqlock(&vnode->cb_lock);
+ cbi = rcu_dereference_protected(vnode->cb_interest,
+ lockdep_is_held(&vnode->cb_lock.lock));
+ if (cbi) {
+ afs_put_cb_interest(afs_i2net(inode), cbi);
+ rcu_assign_pointer(vnode->cb_interest, NULL);
}
+ write_sequnlock(&vnode->cb_lock);
while (!list_empty(&vnode->wb_keys)) {
struct afs_wb_key *wbk = list_entry(vnode->wb_keys.next,
}
#endif
+ afs_prune_wb_keys(vnode);
afs_put_permits(rcu_access_pointer(vnode->permit_cache));
key_put(vnode->silly_key);
vnode->silly_key = NULL;
int afs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct afs_fs_cursor fc;
+ struct afs_status_cb *scb;
struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
struct key *key;
- int ret;
+ int ret = -ENOMEM;
_enter("{%llx:%llu},{n=%pd},%x",
vnode->fid.vid, vnode->fid.vnode, dentry,
return 0;
}
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ goto error;
+
/* flush any dirty data outstanding on a regular file */
if (S_ISREG(vnode->vfs_inode.i_mode))
filemap_write_and_wait(vnode->vfs_inode.i_mapping);
key = afs_request_key(vnode->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
- goto error;
+ goto error_scb;
}
}
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, false)) {
+ afs_dataversion_t data_version = vnode->status.data_version;
+
+ if (attr->ia_valid & ATTR_SIZE)
+ data_version++;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- afs_fs_setattr(&fc, attr);
+ afs_fs_setattr(&fc, attr, scb);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_check_for_remote_deletion(&fc, vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break,
+ &data_version, scb);
ret = afs_end_vnode_operation(&fc);
}
if (!(attr->ia_valid & ATTR_FILE))
key_put(key);
+error_scb:
+ kfree(scb);
error:
_leave(" = %d", ret);
return ret;
struct afs_iget_data {
struct afs_fid fid;
struct afs_volume *volume; /* volume on which resides */
+ unsigned int cb_v_break; /* Pre-fetch volume break count */
+ unsigned int cb_s_break; /* Pre-fetch server break count */
};
enum afs_call_state {
struct rxrpc_call *rxcall; /* RxRPC call handle */
struct key *key; /* security for this call */
struct afs_net *net; /* The network namespace */
- struct afs_server *cm_server; /* Server affected by incoming CM call */
+ union {
+ struct afs_server *server;
+ struct afs_vlserver *vlserver;
+ };
struct afs_cb_interest *cbi; /* Callback interest for server used */
+ struct afs_vnode *lvnode; /* vnode being locked */
void *request; /* request data (first part) */
struct address_space *mapping; /* Pages being written from */
struct iov_iter iter; /* Buffer iterator */
struct bio_vec bvec[1];
};
void *buffer; /* reply receive buffer */
- void *reply[4]; /* Where to put the reply */
+ union {
+ long ret0; /* Value to reply with instead of 0 */
+ struct afs_addr_list *ret_alist;
+ struct afs_vldb_entry *ret_vldb;
+ struct afs_acl *ret_acl;
+ };
+ struct afs_fid *out_fid;
+ struct afs_status_cb *out_dir_scb;
+ struct afs_status_cb *out_scb;
+ struct yfs_acl *out_yacl;
+ struct afs_volsync *out_volsync;
+ struct afs_volume_status *out_volstatus;
+ struct afs_read *read_request;
+ unsigned int server_index;
pgoff_t first; /* first page in mapping to deal with */
pgoff_t last; /* last page in mapping to deal with */
atomic_t usage;
int error; /* error code */
u32 abort_code; /* Remote abort ID or 0 */
u32 epoch;
+ unsigned int max_lifespan; /* Maximum lifespan to set if not 0 */
unsigned request_size; /* size of request data */
unsigned reply_max; /* maximum size of reply */
unsigned first_offset; /* offset into mapping[first] */
- unsigned int cb_break; /* cb_break + cb_s_break before the call */
union {
unsigned last_to; /* amount of mapping[last] */
unsigned count2; /* count used in unmarshalling */
bool send_pages; /* T if data from mapping should be sent */
bool need_attention; /* T if RxRPC poked us */
bool async; /* T if asynchronous */
- bool ret_reply0; /* T if should return reply[0] on success */
bool upgrade; /* T to request service upgrade */
- bool want_reply_time; /* T if want reply_time */
+ bool have_reply_time; /* T if have got reply_time */
+ bool intr; /* T if interruptible */
u16 service_id; /* Actual service ID (after upgrade) */
unsigned int debug_id; /* Trace ID */
u32 operation_ID; /* operation ID for an incoming call */
} __attribute__((packed));
__be64 tmp64;
};
- afs_dataversion_t expected_version; /* Updated version expected from store */
- afs_dataversion_t expected_version_2; /* 2nd updated version expected from store */
ktime_t reply_time; /* Time of first reply packet */
};
unsigned int index; /* Which page we're reading into */
unsigned int nr_pages;
unsigned int offset; /* offset into current page */
- void (*page_done)(struct afs_call *, struct afs_read *);
+ struct afs_vnode *vnode;
+ void (*page_done)(struct afs_read *);
struct page **pages;
struct page *array[];
};
time64_t last_inactive; /* Time of last drop of usage count */
atomic_t usage;
unsigned long flags;
-#define AFS_CELL_FL_NOT_READY 0 /* The cell record is not ready for use */
-#define AFS_CELL_FL_NO_GC 1 /* The cell was added manually, don't auto-gc */
-#define AFS_CELL_FL_NOT_FOUND 2 /* Permanent DNS error */
-#define AFS_CELL_FL_DNS_FAIL 3 /* Failed to access DNS */
-#define AFS_CELL_FL_NO_LOOKUP_YET 4 /* Not completed first DNS lookup yet */
+#define AFS_CELL_FL_NO_GC 0 /* The cell was added manually, don't auto-gc */
+#define AFS_CELL_FL_DO_LOOKUP 1 /* DNS lookup requested */
enum afs_cell_state state;
short error;
+ enum dns_record_source dns_source:8; /* Latest source of data from lookup */
+ enum dns_lookup_status dns_status:8; /* Latest status of data from lookup */
+ unsigned int dns_lookup_count; /* Counter of DNS lookups */
/* Active fileserver interaction state. */
struct list_head proc_volumes; /* procfs volume list */
struct afs_vol_interest {
struct hlist_node srv_link; /* Link in server->cb_volumes */
struct hlist_head cb_interests; /* List of callback interests on the server */
- afs_volid_t vid; /* Volume ID to match */
+ union {
+ struct rcu_head rcu;
+ afs_volid_t vid; /* Volume ID to match */
+ };
unsigned int usage;
};
struct afs_vol_interest *vol_interest;
struct afs_server *server; /* Server on which this interest resides */
struct super_block *sb; /* Superblock on which inodes reside */
- afs_volid_t vid; /* Volume ID to match */
+ union {
+ struct rcu_head rcu;
+ afs_volid_t vid; /* Volume ID to match */
+ };
refcount_t usage;
};
afs_lock_type_t lock_type : 8;
/* outstanding callback notification on this file */
- struct afs_cb_interest *cb_interest; /* Server on which this resides */
+ struct afs_cb_interest __rcu *cb_interest; /* Server on which this resides */
unsigned int cb_s_break; /* Mass break counter on ->server */
unsigned int cb_v_break; /* Mass break counter on ->volume */
unsigned int cb_break; /* Break counter on vnode */
seqlock_t cb_lock; /* Lock for ->cb_interest, ->status, ->cb_*break */
time64_t cb_expires_at; /* time at which callback expires */
- unsigned cb_version; /* callback version */
- afs_callback_type_t cb_type; /* type of callback */
};
static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode)
* Cursor for iterating over a set of fileservers.
*/
struct afs_fs_cursor {
+ const struct afs_call_type *type; /* Type of call done */
struct afs_addr_cursor ac;
struct afs_vnode *vnode;
struct afs_server_list *server_list; /* Current server list (pins ref) */
#define AFS_FS_CURSOR_VNOVOL 0x0008 /* Set if seen VNOVOL */
#define AFS_FS_CURSOR_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */
#define AFS_FS_CURSOR_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */
+#define AFS_FS_CURSOR_INTR 0x0040 /* Set if op is interruptible */
unsigned short nr_iterations; /* Number of server iterations */
};
extern const struct dentry_operations afs_fs_dentry_operations;
extern void afs_d_release(struct dentry *);
-extern int afs_dir_remove_link(struct dentry *, struct key *, unsigned long, unsigned long);
/*
* dir_edit.c
/*
* fsclient.c
*/
-#define AFS_VNODE_NOT_YET_SET 0x01
-#define AFS_VNODE_META_CHANGED 0x02
-#define AFS_VNODE_DATA_CHANGED 0x04
-extern void afs_update_inode_from_status(struct afs_vnode *, struct afs_file_status *,
- const afs_dataversion_t *, u8);
-
-extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *, bool);
+extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_status_cb *,
+ struct afs_volsync *);
extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *);
-extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *);
-extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t, u64,
- struct afs_fid *, struct afs_file_status *, struct afs_callback *);
-extern int afs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, u64);
-extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64);
-extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64,
- struct afs_fid *, struct afs_file_status *);
+extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_status_cb *, struct afs_read *);
+extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t,
+ struct afs_status_cb *, struct afs_fid *, struct afs_status_cb *);
+extern int afs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool,
+ struct afs_status_cb *);
+extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *,
+ struct afs_status_cb *, struct afs_status_cb *);
+extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *,
+ struct afs_status_cb *, struct afs_fid *, struct afs_status_cb *);
extern int afs_fs_rename(struct afs_fs_cursor *, const char *,
- struct afs_vnode *, const char *, u64, u64);
+ struct afs_vnode *, const char *,
+ struct afs_status_cb *, struct afs_status_cb *);
extern int afs_fs_store_data(struct afs_fs_cursor *, struct address_space *,
- pgoff_t, pgoff_t, unsigned, unsigned);
-extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *);
+ pgoff_t, pgoff_t, unsigned, unsigned, struct afs_status_cb *);
+extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *, struct afs_status_cb *);
extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *);
-extern int afs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t);
-extern int afs_fs_extend_lock(struct afs_fs_cursor *);
-extern int afs_fs_release_lock(struct afs_fs_cursor *);
+extern int afs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t, struct afs_status_cb *);
+extern int afs_fs_extend_lock(struct afs_fs_cursor *, struct afs_status_cb *);
+extern int afs_fs_release_lock(struct afs_fs_cursor *, struct afs_status_cb *);
extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *,
struct afs_addr_cursor *, struct key *);
extern struct afs_call *afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
struct afs_addr_cursor *, struct key *,
unsigned int);
extern int afs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *,
- struct afs_fid *, struct afs_file_status *,
- struct afs_callback *, unsigned int,
- struct afs_volsync *);
+ struct afs_fid *, struct afs_status_cb *,
+ unsigned int, struct afs_volsync *);
extern int afs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *,
- struct afs_fid *, struct afs_file_status *,
- struct afs_callback *, struct afs_volsync *);
+ struct afs_fid *, struct afs_status_cb *,
+ struct afs_volsync *);
struct afs_acl {
u32 size;
u8 data[];
};
-extern struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *);
-extern int afs_fs_store_acl(struct afs_fs_cursor *, const struct afs_acl *);
+extern struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *, struct afs_status_cb *);
+extern int afs_fs_store_acl(struct afs_fs_cursor *, const struct afs_acl *,
+ struct afs_status_cb *);
/*
* fs_probe.c
/*
* inode.c
*/
-extern int afs_fetch_status(struct afs_vnode *, struct key *, bool);
+extern void afs_vnode_commit_status(struct afs_fs_cursor *,
+ struct afs_vnode *,
+ unsigned int,
+ const afs_dataversion_t *,
+ struct afs_status_cb *);
+extern int afs_fetch_status(struct afs_vnode *, struct key *, bool, afs_access_t *);
extern int afs_iget5_test(struct inode *, void *);
extern struct inode *afs_iget_pseudo_dir(struct super_block *, bool);
extern struct inode *afs_iget(struct super_block *, struct key *,
- struct afs_fid *, struct afs_file_status *,
- struct afs_callback *,
+ struct afs_iget_data *, struct afs_status_cb *,
struct afs_cb_interest *,
struct afs_vnode *);
extern void afs_zap_data(struct afs_vnode *);
+extern bool afs_check_validity(struct afs_vnode *);
extern int afs_validate(struct afs_vnode *, struct key *);
extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int);
extern int afs_setattr(struct dentry *, struct iattr *);
* rotate.c
*/
extern bool afs_begin_vnode_operation(struct afs_fs_cursor *, struct afs_vnode *,
- struct key *);
+ struct key *, bool);
extern bool afs_select_fileserver(struct afs_fs_cursor *);
extern bool afs_select_current_fileserver(struct afs_fs_cursor *);
extern int afs_end_vnode_operation(struct afs_fs_cursor *);
extern int afs_extract_data(struct afs_call *, bool);
extern int afs_protocol_error(struct afs_call *, int, enum afs_eproto_cause);
+static inline void afs_set_fc_call(struct afs_call *call, struct afs_fs_cursor *fc)
+{
+ call->intr = fc->flags & AFS_FS_CURSOR_INTR;
+ fc->type = call->type;
+}
+
static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t size)
{
call->kvec[0].iov_base = buf;
*/
extern void afs_put_permits(struct afs_permits *);
extern void afs_clear_permits(struct afs_vnode *);
-extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int);
+extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int,
+ struct afs_status_cb *);
extern void afs_zap_permits(struct rcu_head *);
extern struct key *afs_request_key(struct afs_cell *);
extern int afs_check_permit(struct afs_vnode *, struct key *, afs_access_t *);
struct page *page, void *fsdata);
extern int afs_writepage(struct page *, struct writeback_control *);
extern int afs_writepages(struct address_space *, struct writeback_control *);
-extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
extern int afs_fsync(struct file *, loff_t, loff_t, int);
extern vm_fault_t afs_page_mkwrite(struct vm_fault *vmf);
/*
* yfsclient.c
*/
-extern int yfs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *, bool);
-extern int yfs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *);
-extern int yfs_fs_create_file(struct afs_fs_cursor *, const char *, umode_t, u64,
- struct afs_fid *, struct afs_file_status *, struct afs_callback *);
-extern int yfs_fs_make_dir(struct afs_fs_cursor *, const char *, umode_t, u64,
- struct afs_fid *, struct afs_file_status *, struct afs_callback *);
-extern int yfs_fs_remove_file2(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64);
-extern int yfs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, u64);
-extern int yfs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64);
-extern int yfs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64,
- struct afs_fid *, struct afs_file_status *);
-extern int yfs_fs_rename(struct afs_fs_cursor *, const char *,
- struct afs_vnode *, const char *, u64, u64);
+extern int yfs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_status_cb *,
+ struct afs_volsync *);
+extern int yfs_fs_fetch_data(struct afs_fs_cursor *, struct afs_status_cb *, struct afs_read *);
+extern int yfs_fs_create_file(struct afs_fs_cursor *, const char *, umode_t, struct afs_status_cb *,
+ struct afs_fid *, struct afs_status_cb *);
+extern int yfs_fs_make_dir(struct afs_fs_cursor *, const char *, umode_t, struct afs_status_cb *,
+ struct afs_fid *, struct afs_status_cb *);
+extern int yfs_fs_remove_file2(struct afs_fs_cursor *, struct afs_vnode *, const char *,
+ struct afs_status_cb *, struct afs_status_cb *);
+extern int yfs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool,
+ struct afs_status_cb *);
+extern int yfs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *,
+ struct afs_status_cb *, struct afs_status_cb *);
+extern int yfs_fs_symlink(struct afs_fs_cursor *, const char *, const char *,
+ struct afs_status_cb *, struct afs_fid *, struct afs_status_cb *);
+extern int yfs_fs_rename(struct afs_fs_cursor *, const char *, struct afs_vnode *, const char *,
+ struct afs_status_cb *, struct afs_status_cb *);
extern int yfs_fs_store_data(struct afs_fs_cursor *, struct address_space *,
- pgoff_t, pgoff_t, unsigned, unsigned);
-extern int yfs_fs_setattr(struct afs_fs_cursor *, struct iattr *);
+ pgoff_t, pgoff_t, unsigned, unsigned, struct afs_status_cb *);
+extern int yfs_fs_setattr(struct afs_fs_cursor *, struct iattr *, struct afs_status_cb *);
extern int yfs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *);
-extern int yfs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t);
-extern int yfs_fs_extend_lock(struct afs_fs_cursor *);
-extern int yfs_fs_release_lock(struct afs_fs_cursor *);
+extern int yfs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t, struct afs_status_cb *);
+extern int yfs_fs_extend_lock(struct afs_fs_cursor *, struct afs_status_cb *);
+extern int yfs_fs_release_lock(struct afs_fs_cursor *, struct afs_status_cb *);
extern int yfs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *,
- struct afs_fid *, struct afs_file_status *,
- struct afs_callback *, struct afs_volsync *);
+ struct afs_fid *, struct afs_status_cb *,
+ struct afs_volsync *);
extern int yfs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *,
- struct afs_fid *, struct afs_file_status *,
- struct afs_callback *, unsigned int,
- struct afs_volsync *);
+ struct afs_fid *, struct afs_status_cb *,
+ unsigned int, struct afs_volsync *);
struct yfs_acl {
struct afs_acl *acl; /* Dir/file/symlink ACL */
};
extern void yfs_free_opaque_acl(struct yfs_acl *);
-extern struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *, unsigned int);
-extern int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *, const struct afs_acl *);
+extern struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *, struct yfs_acl *,
+ struct afs_status_cb *);
+extern int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *, const struct afs_acl *,
+ struct afs_status_cb *);
/*
* Miscellaneous inline functions.
return &vnode->vfs_inode;
}
-static inline void afs_vnode_commit_status(struct afs_fs_cursor *fc,
- struct afs_vnode *vnode,
- unsigned int cb_break)
-{
- if (fc->ac.error == 0)
- afs_cache_permit(vnode, fc->key, cb_break);
-}
-
static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc,
struct afs_vnode *vnode)
{
seq_printf(m, "%3u %6lld %2u %s\n",
atomic_read(&cell->usage),
cell->dns_expiry - ktime_get_real_seconds(),
- vllist ? vllist->nr_servers : 0,
+ vllist->nr_servers,
cell->name);
return 0;
}
if (v == SEQ_START_TOKEN) {
seq_printf(m, "# source %s, status %s\n",
- dns_record_sources[vllist->source],
- dns_lookup_statuses[vllist->status]);
+ dns_record_sources[vllist ? vllist->source : 0],
+ dns_lookup_statuses[vllist ? vllist->status : 0]);
return 0;
}
if (pos == 0)
return SEQ_START_TOKEN;
- if (!vllist || pos - 1 >= vllist->nr_servers)
+ if (pos - 1 >= vllist->nr_servers)
return NULL;
return &vllist->servers[pos - 1];
* them here also using the io_lock.
*/
bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
- struct key *key)
+ struct key *key, bool intr)
{
memset(fc, 0, sizeof(*fc));
fc->vnode = vnode;
fc->ac.error = SHRT_MAX;
fc->error = -EDESTADDRREQ;
- if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
- fc->error = -EINTR;
- fc->flags |= AFS_FS_CURSOR_STOP;
- return false;
+ if (intr) {
+ fc->flags |= AFS_FS_CURSOR_INTR;
+ if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
+ fc->error = -EINTR;
+ fc->flags |= AFS_FS_CURSOR_STOP;
+ return false;
+ }
+ } else {
+ mutex_lock(&vnode->io_lock);
}
if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
fc->untried = (1UL << fc->server_list->nr_servers) - 1;
fc->index = READ_ONCE(fc->server_list->preferred);
- cbi = vnode->cb_interest;
+ cbi = rcu_dereference_protected(vnode->cb_interest,
+ lockdep_is_held(&vnode->io_lock));
if (cbi) {
/* See if the vnode's preferred record is still available */
for (i = 0; i < fc->server_list->nr_servers; i++) {
/* Note that the callback promise is effectively broken */
write_seqlock(&vnode->cb_lock);
- ASSERTCMP(cbi, ==, vnode->cb_interest);
- vnode->cb_interest = NULL;
+ ASSERTCMP(cbi, ==, rcu_access_pointer(vnode->cb_interest));
+ rcu_assign_pointer(vnode->cb_interest, NULL);
if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
vnode->cb_break++;
write_sequnlock(&vnode->cb_lock);
*/
static bool afs_sleep_and_retry(struct afs_fs_cursor *fc)
{
- msleep_interruptible(1000);
- if (signal_pending(current)) {
- fc->error = -ERESTARTSYS;
- return false;
+ if (fc->flags & AFS_FS_CURSOR_INTR) {
+ msleep_interruptible(1000);
+ if (signal_pending(current)) {
+ fc->error = -ERESTARTSYS;
+ return false;
+ }
+ } else {
+ msleep(1000);
}
return true;
if (error < 0)
goto failed_set_error;
- fc->cbi = afs_get_cb_interest(vnode->cb_interest);
+ fc->cbi = afs_get_cb_interest(
+ rcu_dereference_protected(vnode->cb_interest,
+ lockdep_is_held(&vnode->io_lock)));
read_lock(&server->fs_lock);
alist = rcu_dereference_protected(server->addresses,
s->probe.abort_code);
}
+ error = e.error;
+
failed_set_error:
fc->error = error;
failed:
bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
{
struct afs_vnode *vnode = fc->vnode;
- struct afs_cb_interest *cbi = vnode->cb_interest;
+ struct afs_cb_interest *cbi;
struct afs_addr_list *alist;
int error = fc->ac.error;
_enter("");
+ cbi = rcu_dereference_protected(vnode->cb_interest,
+ lockdep_is_held(&vnode->io_lock));
+
switch (error) {
case SHRT_MAX:
if (!cbi) {
return false;
}
- fc->cbi = afs_get_cb_interest(vnode->cb_interest);
+ fc->cbi = afs_get_cb_interest(cbi);
read_lock(&cbi->server->fs_lock);
alist = rcu_dereference_protected(cbi->server->addresses,
if (call->type->destructor)
call->type->destructor(call);
- afs_put_server(call->net, call->cm_server);
+ afs_put_server(call->net, call->server);
afs_put_cb_interest(call->net, call->cbi);
afs_put_addrlist(call->alist);
kfree(call->request);
afs_wake_up_async_call :
afs_wake_up_call_waiter),
call->upgrade,
+ call->intr,
call->debug_id);
if (IS_ERR(rxcall)) {
ret = PTR_ERR(rxcall);
call->rxcall = rxcall;
+ if (call->max_lifespan)
+ rxrpc_kernel_set_max_life(call->net->socket, rxcall,
+ call->max_lifespan);
+
/* send the request */
iov[0].iov_base = call->request;
iov[0].iov_len = call->request_size;
return;
}
- if (call->want_reply_time &&
+ if (!call->have_reply_time &&
rxrpc_kernel_get_reply_time(call->net->socket,
call->rxcall,
&call->reply_time))
- call->want_reply_time = false;
+ call->have_reply_time = true;
ret = call->type->deliver(call);
state = READ_ONCE(call->state);
break;
}
- if (timeout == 0 &&
+ if (call->intr && timeout == 0 &&
life == last_life && signal_pending(current)) {
if (stalled)
break;
ret = ac->error;
switch (ret) {
case 0:
- if (call->ret_reply0) {
- ret = (long)call->reply[0];
- call->reply[0] = NULL;
- }
+ ret = call->ret0;
+ call->ret0 = 0;
+
/* Fall through */
case -ECONNABORTED:
ac->responded = true;
permits = rcu_dereference_protected(vnode->permit_cache,
lockdep_is_held(&vnode->lock));
RCU_INIT_POINTER(vnode->permit_cache, NULL);
- vnode->cb_break++;
spin_unlock(&vnode->lock);
- if (permits)
- afs_put_permits(permits);
+ afs_put_permits(permits);
}
/*
* as the ACL *may* have changed.
*/
void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
- unsigned int cb_break)
+ unsigned int cb_break, struct afs_status_cb *scb)
{
struct afs_permits *permits, *xpermits, *replacement, *zap, *new = NULL;
- afs_access_t caller_access = READ_ONCE(vnode->status.caller_access);
+ afs_access_t caller_access = scb->status.caller_access;
size_t size = 0;
bool changed = false;
int i, j;
}
if (afs_cb_is_broken(cb_break, vnode,
- vnode->cb_interest)) {
+ rcu_dereference(vnode->cb_interest))) {
changed = true;
break;
}
}
}
- if (afs_cb_is_broken(cb_break, vnode, vnode->cb_interest))
+ if (afs_cb_is_broken(cb_break, vnode, rcu_dereference(vnode->cb_interest)))
goto someone_else_changed_it;
/* We need a ref on any permits list we want to copy as we'll have to
kfree(new);
+ rcu_read_lock();
spin_lock(&vnode->lock);
zap = rcu_access_pointer(vnode->permit_cache);
- if (!afs_cb_is_broken(cb_break, vnode, vnode->cb_interest) &&
+ if (!afs_cb_is_broken(cb_break, vnode, rcu_dereference(vnode->cb_interest)) &&
zap == permits)
rcu_assign_pointer(vnode->permit_cache, replacement);
else
zap = replacement;
spin_unlock(&vnode->lock);
+ rcu_read_unlock();
afs_put_permits(zap);
out_put:
afs_put_permits(permits);
*/
_debug("no valid permit");
- ret = afs_fetch_status(vnode, key, false);
+ ret = afs_fetch_status(vnode, key, false, _access);
if (ret < 0) {
*_access = 0;
_leave(" = %d", ret);
return ret;
}
- *_access = vnode->status.caller_access;
}
_leave(" = 0 [access %x]", *_access);
alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
&server->uuid);
if (IS_ERR(alist)) {
- fc->ac.error = PTR_ERR(alist);
- _leave(" = f [%d]", fc->ac.error);
+ if ((PTR_ERR(alist) == -ERESTARTSYS ||
+ PTR_ERR(alist) == -EINTR) &&
+ !(fc->flags & AFS_FS_CURSOR_INTR) &&
+ server->addresses) {
+ _leave(" = t [intr]");
+ return true;
+ }
+ fc->error = PTR_ERR(alist);
+ _leave(" = f [%d]", fc->error);
return false;
}
ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
TASK_INTERRUPTIBLE);
if (ret == -ERESTARTSYS) {
- fc->ac.error = ret;
+ if (!(fc->flags & AFS_FS_CURSOR_INTR) && server->addresses) {
+ _leave(" = t [intr]");
+ return true;
+ }
+ fc->error = ret;
_leave(" = f [intr]");
return false;
}
static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx)
{
struct afs_super_info *as = AFS_FS_S(sb);
- struct afs_fid fid;
+ struct afs_iget_data iget_data;
struct inode *inode = NULL;
int ret;
} else {
sprintf(sb->s_id, "%llu", as->volume->vid);
afs_activate_volume(as->volume);
- fid.vid = as->volume->vid;
- fid.vnode = 1;
- fid.vnode_hi = 0;
- fid.unique = 1;
- inode = afs_iget(sb, ctx->key, &fid, NULL, NULL, NULL, NULL);
+ iget_data.fid.vid = as->volume->vid;
+ iget_data.fid.vnode = 1;
+ iget_data.fid.vnode_hi = 0;
+ iget_data.fid.unique = 1;
+ iget_data.cb_v_break = as->volume->cb_v_break;
+ iget_data.cb_s_break = 0;
+ inode = afs_iget(sb, ctx->key, &iget_data, NULL, NULL, NULL);
}
if (IS_ERR(inode))
vnode->volume = NULL;
vnode->lock_key = NULL;
vnode->permit_cache = NULL;
- vnode->cb_interest = NULL;
+ RCU_INIT_POINTER(vnode->cb_interest, NULL);
#ifdef CONFIG_AFS_FSCACHE
vnode->cache = NULL;
#endif
vnode->flags = 1 << AFS_VNODE_UNSET;
- vnode->cb_type = 0;
vnode->lock_state = AFS_VNODE_LOCK_NONE;
init_rwsem(&vnode->rmdir_lock);
_debug("DESTROY INODE %p", inode);
- ASSERTCMP(vnode->cb_interest, ==, NULL);
+ ASSERTCMP(rcu_access_pointer(vnode->cb_interest), ==, NULL);
atomic_dec(&afs_count_active_inodes);
}
return PTR_ERR(key);
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, true)) {
fc.flags |= AFS_FS_CURSOR_NO_VSLEEP;
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
}
afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
ret = afs_end_vnode_operation(&fc);
}
if (bs.status > NR__dns_lookup_status)
bs.status = NR__dns_lookup_status;
+ /* See if we can update an old server record */
server = NULL;
- if (previous) {
- /* See if we can update an old server record */
- for (i = 0; i < previous->nr_servers; i++) {
- struct afs_vlserver *p = previous->servers[i].server;
-
- if (p->name_len == bs.name_len &&
- p->port == bs.port &&
- strncasecmp(b, p->name, bs.name_len) == 0) {
- server = afs_get_vlserver(p);
- break;
- }
+ for (i = 0; i < previous->nr_servers; i++) {
+ struct afs_vlserver *p = previous->servers[i].server;
+
+ if (p->name_len == bs.name_len &&
+ p->port == bs.port &&
+ strncasecmp(b, p->name, bs.name_len) == 0) {
+ server = afs_get_vlserver(p);
+ break;
}
}
void afs_vlserver_probe_result(struct afs_call *call)
{
struct afs_addr_list *alist = call->alist;
- struct afs_vlserver *server = call->reply[0];
- unsigned int server_index = (long)call->reply[1];
+ struct afs_vlserver *server = call->vlserver;
+ unsigned int server_index = call->server_index;
unsigned int index = call->addr_ix;
unsigned int rtt = UINT_MAX;
bool have_result = false;
static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
{
struct afs_cell *cell = vc->cell;
+ unsigned int dns_lookup_count;
+
+ if (cell->dns_source == DNS_RECORD_UNAVAILABLE ||
+ cell->dns_expiry <= ktime_get_real_seconds()) {
+ dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count);
+ set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags);
+ queue_work(afs_wq, &cell->manager);
+
+ if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
+ if (wait_var_event_interruptible(
+ &cell->dns_lookup_count,
+ smp_load_acquire(&cell->dns_lookup_count)
+ != dns_lookup_count) < 0) {
+ vc->error = -ERESTARTSYS;
+ return false;
+ }
+ }
- if (wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET,
- TASK_INTERRUPTIBLE)) {
- vc->error = -ERESTARTSYS;
- return false;
+ /* Status load is ordered after lookup counter load */
+ if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
+ vc->error = -EDESTADDRREQ;
+ return false;
+ }
}
read_lock(&cell->vl_servers_lock);
rcu_dereference_protected(cell->vl_servers,
lockdep_is_held(&cell->vl_servers_lock)));
read_unlock(&cell->vl_servers_lock);
- if (!vc->server_list || !vc->server_list->nr_servers)
+ if (!vc->server_list->nr_servers)
return false;
vc->untried = (1UL << vc->server_list->nr_servers) - 1;
/* unmarshall the reply once we've received all of it */
uvldb = call->buffer;
- entry = call->reply[0];
+ entry = call->ret_vldb;
nr_servers = ntohl(uvldb->nServers);
if (nr_servers > AFS_NMAXNSERVERS)
static void afs_destroy_vl_get_entry_by_name_u(struct afs_call *call)
{
- kfree(call->reply[0]);
+ kfree(call->ret_vldb);
afs_flat_call_destructor(call);
}
}
call->key = vc->key;
- call->reply[0] = entry;
- call->ret_reply0 = true;
+ call->ret_vldb = entry;
+ call->max_lifespan = AFS_VL_MAX_LIFESPAN;
/* Marshall the parameters */
bp = call->request;
if (!alist)
return -ENOMEM;
alist->version = uniquifier;
- call->reply[0] = alist;
+ call->ret_alist = alist;
call->count = count;
call->count2 = nentries;
call->unmarshall++;
if (ret < 0)
return ret;
- alist = call->reply[0];
+ alist = call->ret_alist;
bp = call->buffer;
count = min(call->count, 4U);
for (i = 0; i < count; i++)
static void afs_vl_get_addrs_u_destructor(struct afs_call *call)
{
- afs_put_server(call->net, (struct afs_server *)call->reply[0]);
- kfree(call->reply[1]);
+ afs_put_addrlist(call->ret_alist);
return afs_flat_call_destructor(call);
}
return ERR_PTR(-ENOMEM);
call->key = vc->key;
- call->reply[0] = NULL;
- call->ret_reply0 = true;
+ call->ret_alist = NULL;
+ call->max_lifespan = AFS_VL_MAX_LIFESPAN;
/* Marshall the parameters */
bp = call->request;
static void afs_destroy_vl_get_capabilities(struct afs_call *call)
{
- struct afs_vlserver *server = call->reply[0];
-
- afs_put_vlserver(call->net, server);
+ afs_put_vlserver(call->net, call->vlserver);
afs_flat_call_destructor(call);
}
return ERR_PTR(-ENOMEM);
call->key = key;
- call->reply[0] = afs_get_vlserver(server);
- call->reply[1] = (void *)(long)server_index;
+ call->vlserver = afs_get_vlserver(server);
+ call->server_index = server_index;
call->upgrade = true;
- call->want_reply_time = true;
call->async = true;
+ call->max_lifespan = AFS_PROBE_MAX_LIFESPAN;
/* marshall the parameters */
bp = call->request;
if (!alist)
return -ENOMEM;
alist->version = uniquifier;
- call->reply[0] = alist;
+ call->ret_alist = alist;
if (call->count == 0)
goto extract_volendpoints;
if (ret < 0)
return ret;
- alist = call->reply[0];
+ alist = call->ret_alist;
bp = call->buffer;
switch (call->count2) {
case YFS_ENDPOINT_IPV4:
break;
}
- alist = call->reply[0];
_leave(" = 0 [done]");
return 0;
}
return ERR_PTR(-ENOMEM);
call->key = vc->key;
- call->reply[0] = NULL;
- call->ret_reply0 = true;
+ call->ret_alist = NULL;
+ call->max_lifespan = AFS_VL_MAX_LIFESPAN;
/* Marshall the parameters */
bp = call->request;
_leave("");
}
+/*
+ * completion of write to server
+ */
+static void afs_pages_written_back(struct afs_vnode *vnode,
+ pgoff_t first, pgoff_t last)
+{
+ struct pagevec pv;
+ unsigned long priv;
+ unsigned count, loop;
+
+ _enter("{%llx:%llu},{%lx-%lx}",
+ vnode->fid.vid, vnode->fid.vnode, first, last);
+
+ pagevec_init(&pv);
+
+ do {
+ _debug("done %lx-%lx", first, last);
+
+ count = last - first + 1;
+ if (count > PAGEVEC_SIZE)
+ count = PAGEVEC_SIZE;
+ pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping,
+ first, count, pv.pages);
+ ASSERTCMP(pv.nr, ==, count);
+
+ for (loop = 0; loop < count; loop++) {
+ priv = page_private(pv.pages[loop]);
+ trace_afs_page_dirty(vnode, tracepoint_string("clear"),
+ pv.pages[loop]->index, priv);
+ set_page_private(pv.pages[loop], 0);
+ end_page_writeback(pv.pages[loop]);
+ }
+ first += count;
+ __pagevec_release(&pv);
+ } while (first <= last);
+
+ afs_prune_wb_keys(vnode);
+ _leave("");
+}
+
/*
* write to a file
*/
{
struct afs_vnode *vnode = AFS_FS_I(mapping->host);
struct afs_fs_cursor fc;
+ struct afs_status_cb *scb;
struct afs_wb_key *wbk = NULL;
struct list_head *p;
int ret = -ENOKEY, ret2;
vnode->fid.unique,
first, last, offset, to);
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS);
+ if (!scb)
+ return -ENOMEM;
+
spin_lock(&vnode->wb_lock);
p = vnode->wb_keys.next;
spin_unlock(&vnode->wb_lock);
afs_put_wb_key(wbk);
+ kfree(scb);
_leave(" = %d [no keys]", ret);
return ret;
_debug("USE WB KEY %u", key_serial(wbk->key));
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, wbk->key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, wbk->key, false)) {
+ afs_dataversion_t data_version = vnode->status.data_version + 1;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- afs_fs_store_data(&fc, mapping, first, last, offset, to);
+ afs_fs_store_data(&fc, mapping, first, last, offset, to, scb);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_check_for_remote_deletion(&fc, vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break,
+ &data_version, scb);
+ if (fc.ac.error == 0)
+ afs_pages_written_back(vnode, first, last);
ret = afs_end_vnode_operation(&fc);
}
}
afs_put_wb_key(wbk);
+ kfree(scb);
_leave(" = %d", ret);
return ret;
}
return ret;
}
-/*
- * completion of write to server
- */
-void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
-{
- struct pagevec pv;
- unsigned long priv;
- unsigned count, loop;
- pgoff_t first = call->first, last = call->last;
-
- _enter("{%llx:%llu},{%lx-%lx}",
- vnode->fid.vid, vnode->fid.vnode, first, last);
-
- pagevec_init(&pv);
-
- do {
- _debug("done %lx-%lx", first, last);
-
- count = last - first + 1;
- if (count > PAGEVEC_SIZE)
- count = PAGEVEC_SIZE;
- pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping,
- first, count, pv.pages);
- ASSERTCMP(pv.nr, ==, count);
-
- for (loop = 0; loop < count; loop++) {
- priv = page_private(pv.pages[loop]);
- trace_afs_page_dirty(vnode, tracepoint_string("clear"),
- pv.pages[loop]->index, priv);
- set_page_private(pv.pages[loop], 0);
- end_page_writeback(pv.pages[loop]);
- }
- first += count;
- __pagevec_release(&pv);
- } while (first <= last);
-
- afs_prune_wb_keys(vnode);
- _leave("");
-}
-
/*
* write to an AFS file
*/
void *buffer, size_t size)
{
struct afs_fs_cursor fc;
+ struct afs_status_cb *scb;
struct afs_vnode *vnode = AFS_FS_I(inode);
struct afs_acl *acl = NULL;
struct key *key;
- int ret;
+ int ret = -ENOMEM;
+
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS);
+ if (!scb)
+ goto error;
key = afs_request_key(vnode->volume->cell);
- if (IS_ERR(key))
- return PTR_ERR(key);
+ if (IS_ERR(key)) {
+ ret = PTR_ERR(key);
+ goto error_scb;
+ }
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, true)) {
+ afs_dataversion_t data_version = vnode->status.data_version;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- acl = afs_fs_fetch_acl(&fc);
+ acl = afs_fs_fetch_acl(&fc, scb);
}
afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break,
+ &data_version, scb);
ret = afs_end_vnode_operation(&fc);
}
if (ret == 0) {
ret = acl->size;
if (size > 0) {
- ret = -ERANGE;
- if (acl->size > size)
- return -ERANGE;
- memcpy(buffer, acl->data, acl->size);
- ret = acl->size;
+ if (acl->size <= size)
+ memcpy(buffer, acl->data, acl->size);
+ else
+ ret = -ERANGE;
}
kfree(acl);
}
key_put(key);
+error_scb:
+ kfree(scb);
+error:
return ret;
}
const void *buffer, size_t size, int flags)
{
struct afs_fs_cursor fc;
+ struct afs_status_cb *scb;
struct afs_vnode *vnode = AFS_FS_I(inode);
struct afs_acl *acl = NULL;
struct key *key;
- int ret;
+ int ret = -ENOMEM;
if (flags == XATTR_CREATE)
return -EINVAL;
- key = afs_request_key(vnode->volume->cell);
- if (IS_ERR(key))
- return PTR_ERR(key);
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS);
+ if (!scb)
+ goto error;
acl = kmalloc(sizeof(*acl) + size, GFP_KERNEL);
- if (!acl) {
- key_put(key);
- return -ENOMEM;
+ if (!acl)
+ goto error_scb;
+
+ key = afs_request_key(vnode->volume->cell);
+ if (IS_ERR(key)) {
+ ret = PTR_ERR(key);
+ goto error_acl;
}
acl->size = size;
memcpy(acl->data, buffer, size);
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, true)) {
+ afs_dataversion_t data_version = vnode->status.data_version;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- afs_fs_store_acl(&fc, acl);
+ afs_fs_store_acl(&fc, acl, scb);
}
afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break,
+ &data_version, scb);
ret = afs_end_vnode_operation(&fc);
}
- kfree(acl);
key_put(key);
+error_acl:
+ kfree(acl);
+error_scb:
+ kfree(scb);
+error:
return ret;
}
void *buffer, size_t size)
{
struct afs_fs_cursor fc;
+ struct afs_status_cb *scb;
struct afs_vnode *vnode = AFS_FS_I(inode);
struct yfs_acl *yacl = NULL;
struct key *key;
- unsigned int flags = 0;
char buf[16], *data;
- int which = 0, dsize, ret;
+ int which = 0, dsize, ret = -ENOMEM;
if (strcmp(name, "acl") == 0)
which = 0;
else
return -EOPNOTSUPP;
+ yacl = kzalloc(sizeof(struct yfs_acl), GFP_KERNEL);
+ if (!yacl)
+ goto error;
+
if (which == 0)
- flags |= YFS_ACL_WANT_ACL;
+ yacl->flags |= YFS_ACL_WANT_ACL;
else if (which == 3)
- flags |= YFS_ACL_WANT_VOL_ACL;
+ yacl->flags |= YFS_ACL_WANT_VOL_ACL;
+
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS);
+ if (!scb)
+ goto error_yacl;
key = afs_request_key(vnode->volume->cell);
- if (IS_ERR(key))
- return PTR_ERR(key);
+ if (IS_ERR(key)) {
+ ret = PTR_ERR(key);
+ goto error_scb;
+ }
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, true)) {
+ afs_dataversion_t data_version = vnode->status.data_version;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- yacl = yfs_fs_fetch_opaque_acl(&fc, flags);
+ yfs_fs_fetch_opaque_acl(&fc, yacl, scb);
}
afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break,
+ &data_version, scb);
ret = afs_end_vnode_operation(&fc);
}
- if (ret == 0) {
- switch (which) {
- case 0:
- data = yacl->acl->data;
- dsize = yacl->acl->size;
- break;
- case 1:
- data = buf;
- dsize = snprintf(buf, sizeof(buf), "%u",
- yacl->inherit_flag);
- break;
- case 2:
- data = buf;
- dsize = snprintf(buf, sizeof(buf), "%u",
- yacl->num_cleaned);
- break;
- case 3:
- data = yacl->vol_acl->data;
- dsize = yacl->vol_acl->size;
- break;
- default:
- ret = -EOPNOTSUPP;
- goto out;
- }
+ if (ret < 0)
+ goto error_key;
+
+ switch (which) {
+ case 0:
+ data = yacl->acl->data;
+ dsize = yacl->acl->size;
+ break;
+ case 1:
+ data = buf;
+ dsize = snprintf(buf, sizeof(buf), "%u", yacl->inherit_flag);
+ break;
+ case 2:
+ data = buf;
+ dsize = snprintf(buf, sizeof(buf), "%u", yacl->num_cleaned);
+ break;
+ case 3:
+ data = yacl->vol_acl->data;
+ dsize = yacl->vol_acl->size;
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ goto error_key;
+ }
- ret = dsize;
- if (size > 0) {
- if (dsize > size) {
- ret = -ERANGE;
- goto out;
- }
- memcpy(buffer, data, dsize);
+ ret = dsize;
+ if (size > 0) {
+ if (dsize > size) {
+ ret = -ERANGE;
+ goto error_key;
}
+ memcpy(buffer, data, dsize);
}
-out:
- yfs_free_opaque_acl(yacl);
+error_key:
key_put(key);
+error_scb:
+ kfree(scb);
+error_yacl:
+ yfs_free_opaque_acl(yacl);
+error:
return ret;
}
const void *buffer, size_t size, int flags)
{
struct afs_fs_cursor fc;
+ struct afs_status_cb *scb;
struct afs_vnode *vnode = AFS_FS_I(inode);
struct afs_acl *acl = NULL;
struct key *key;
- int ret;
+ int ret = -ENOMEM;
if (flags == XATTR_CREATE ||
strcmp(name, "acl") != 0)
return -EINVAL;
- key = afs_request_key(vnode->volume->cell);
- if (IS_ERR(key))
- return PTR_ERR(key);
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS);
+ if (!scb)
+ goto error;
acl = kmalloc(sizeof(*acl) + size, GFP_KERNEL);
- if (!acl) {
- key_put(key);
- return -ENOMEM;
- }
+ if (!acl)
+ goto error_scb;
acl->size = size;
memcpy(acl->data, buffer, size);
+ key = afs_request_key(vnode->volume->cell);
+ if (IS_ERR(key)) {
+ ret = PTR_ERR(key);
+ goto error_acl;
+ }
+
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, true)) {
+ afs_dataversion_t data_version = vnode->status.data_version;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- yfs_fs_store_opaque_acl2(&fc, acl);
+ yfs_fs_store_opaque_acl2(&fc, acl, scb);
}
afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break,
+ &data_version, scb);
ret = afs_end_vnode_operation(&fc);
}
+error_acl:
kfree(acl);
key_put(key);
+error_scb:
+ kfree(scb);
+error:
return ret;
}
/*
* Decode a YFSFetchStatus block
*/
-static int xdr_decode_YFSFetchStatus(struct afs_call *call,
- const __be32 **_bp,
- struct afs_file_status *status,
- struct afs_vnode *vnode,
- const afs_dataversion_t *expected_version,
- struct afs_read *read_req)
+static int xdr_decode_YFSFetchStatus(const __be32 **_bp,
+ struct afs_call *call,
+ struct afs_status_cb *scb)
{
const struct yfs_xdr_YFSFetchStatus *xdr = (const void *)*_bp;
+ struct afs_file_status *status = &scb->status;
u32 type;
- u8 flags = 0;
status->abort_code = ntohl(xdr->abort_code);
if (status->abort_code != 0) {
- if (vnode && status->abort_code == VNOVNODE) {
- set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ if (status->abort_code == VNOVNODE)
status->nlink = 0;
- __afs_break_callback(vnode);
- }
+ scb->have_error = true;
return 0;
}
case AFS_FTYPE_FILE:
case AFS_FTYPE_DIR:
case AFS_FTYPE_SYMLINK:
- if (type != status->type &&
- vnode &&
- !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
- pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n",
- vnode->fid.vid,
- vnode->fid.vnode,
- vnode->fid.unique,
- status->type, type);
- goto bad;
- }
status->type = type;
break;
default:
goto bad;
}
-#define EXTRACT_M4(FIELD) \
- do { \
- u32 x = ntohl(xdr->FIELD); \
- if (status->FIELD != x) { \
- flags |= AFS_VNODE_META_CHANGED; \
- status->FIELD = x; \
- } \
- } while (0)
-
-#define EXTRACT_M8(FIELD) \
- do { \
- u64 x = xdr_to_u64(xdr->FIELD); \
- if (status->FIELD != x) { \
- flags |= AFS_VNODE_META_CHANGED; \
- status->FIELD = x; \
- } \
- } while (0)
-
-#define EXTRACT_D8(FIELD) \
- do { \
- u64 x = xdr_to_u64(xdr->FIELD); \
- if (status->FIELD != x) { \
- flags |= AFS_VNODE_DATA_CHANGED; \
- status->FIELD = x; \
- } \
- } while (0)
-
- EXTRACT_M4(nlink);
- EXTRACT_D8(size);
- EXTRACT_D8(data_version);
- EXTRACT_M8(author);
- EXTRACT_M8(owner);
- EXTRACT_M8(group);
- EXTRACT_M4(mode);
- EXTRACT_M4(caller_access); /* call ticket dependent */
- EXTRACT_M4(anon_access);
-
- status->mtime_client = xdr_to_time(xdr->mtime_client);
- status->mtime_server = xdr_to_time(xdr->mtime_server);
- status->lock_count = ntohl(xdr->lock_count);
-
- if (read_req) {
- read_req->data_version = status->data_version;
- read_req->file_size = status->size;
- }
+ status->nlink = ntohl(xdr->nlink);
+ status->author = xdr_to_u64(xdr->author);
+ status->owner = xdr_to_u64(xdr->owner);
+ status->caller_access = ntohl(xdr->caller_access); /* Ticket dependent */
+ status->anon_access = ntohl(xdr->anon_access);
+ status->mode = ntohl(xdr->mode) & S_IALLUGO;
+ status->group = xdr_to_u64(xdr->group);
+ status->lock_count = ntohl(xdr->lock_count);
+
+ status->mtime_client = xdr_to_time(xdr->mtime_client);
+ status->mtime_server = xdr_to_time(xdr->mtime_server);
+ status->size = xdr_to_u64(xdr->size);
+ status->data_version = xdr_to_u64(xdr->data_version);
+ scb->have_status = true;
*_bp += xdr_size(xdr);
-
- if (vnode) {
- if (test_bit(AFS_VNODE_UNSET, &vnode->flags))
- flags |= AFS_VNODE_NOT_YET_SET;
- afs_update_inode_from_status(vnode, status, expected_version,
- flags);
- }
-
return 0;
bad:
return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
}
-/*
- * Decode the file status. We need to lock the target vnode if we're going to
- * update its status so that stat() sees the attributes update atomically.
- */
-static int yfs_decode_status(struct afs_call *call,
- const __be32 **_bp,
- struct afs_file_status *status,
- struct afs_vnode *vnode,
- const afs_dataversion_t *expected_version,
- struct afs_read *read_req)
-{
- int ret;
-
- if (!vnode)
- return xdr_decode_YFSFetchStatus(call, _bp, status, vnode,
- expected_version, read_req);
-
- write_seqlock(&vnode->cb_lock);
- ret = xdr_decode_YFSFetchStatus(call, _bp, status, vnode,
- expected_version, read_req);
- write_sequnlock(&vnode->cb_lock);
- return ret;
-}
-
/*
* Decode a YFSCallBack block
*/
-static void xdr_decode_YFSCallBack(struct afs_call *call,
- struct afs_vnode *vnode,
- const __be32 **_bp)
-{
- struct yfs_xdr_YFSCallBack *xdr = (void *)*_bp;
- struct afs_cb_interest *old, *cbi = call->cbi;
- u64 cb_expiry;
-
- write_seqlock(&vnode->cb_lock);
-
- if (!afs_cb_is_broken(call->cb_break, vnode, cbi)) {
- cb_expiry = xdr_to_u64(xdr->expiration_time);
- do_div(cb_expiry, 10 * 1000 * 1000);
- vnode->cb_version = ntohl(xdr->version);
- vnode->cb_type = ntohl(xdr->type);
- vnode->cb_expires_at = cb_expiry + ktime_get_real_seconds();
- old = vnode->cb_interest;
- if (old != call->cbi) {
- vnode->cb_interest = cbi;
- cbi = old;
- }
- set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
- }
-
- write_sequnlock(&vnode->cb_lock);
- call->cbi = cbi;
- *_bp += xdr_size(xdr);
-}
-
-static void xdr_decode_YFSCallBack_raw(const __be32 **_bp,
- struct afs_callback *cb)
+static void xdr_decode_YFSCallBack(const __be32 **_bp,
+ struct afs_call *call,
+ struct afs_status_cb *scb)
{
struct yfs_xdr_YFSCallBack *x = (void *)*_bp;
- u64 cb_expiry;
-
- cb_expiry = xdr_to_u64(x->expiration_time);
- do_div(cb_expiry, 10 * 1000 * 1000);
- cb->version = ntohl(x->version);
- cb->type = ntohl(x->type);
- cb->expires_at = cb_expiry + ktime_get_real_seconds();
+ struct afs_callback *cb = &scb->callback;
+ ktime_t cb_expiry;
+ cb_expiry = call->reply_time;
+ cb_expiry = ktime_add(cb_expiry, xdr_to_u64(x->expiration_time) * 100);
+ cb->expires_at = ktime_divns(cb_expiry, NSEC_PER_SEC);
+ scb->have_cb = true;
*_bp += xdr_size(x);
}
}
/*
- * deliver reply data to an FS.FetchStatus
+ * Deliver a reply that's a status, callback and volsync.
*/
-static int yfs_deliver_fs_fetch_status_vnode(struct afs_call *call)
+static int yfs_deliver_fs_status_cb_and_volsync(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
if (ret < 0)
return ret;
- _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
-
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- xdr_decode_YFSCallBack(call, vnode, &bp);
- xdr_decode_YFSVolSync(&bp, call->reply[1]);
+ xdr_decode_YFSCallBack(&bp, call, call->out_scb);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * Deliver reply data to operations that just return a file status and a volume
+ * sync record.
+ */
+static int yfs_deliver_status_and_volsync(struct afs_call *call)
+{
+ const __be32 *bp;
+ int ret;
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
+ if (ret < 0)
+ return ret;
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
static const struct afs_call_type yfs_RXYFSFetchStatus_vnode = {
.name = "YFS.FetchStatus(vnode)",
.op = yfs_FS_FetchStatus,
- .deliver = yfs_deliver_fs_fetch_status_vnode,
+ .deliver = yfs_deliver_fs_status_cb_and_volsync,
.destructor = afs_flat_call_destructor,
};
/*
* Fetch the status information for a file.
*/
-int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync,
- bool new_inode)
+int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_status_cb *scb,
+ struct afs_volsync *volsync)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
}
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = volsync;
- call->expected_version = new_inode ? 1 : vnode->status.data_version;
+ call->out_scb = scb;
+ call->out_volsync = volsync;
/* marshall the parameters */
bp = call->request;
bp = xdr_encode_YFSFid(bp, &vnode->fid);
yfs_check_req(call, bp);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
*/
static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
- struct afs_read *req = call->reply[2];
+ struct afs_read *req = call->read_request;
const __be32 *bp;
unsigned int size;
int ret;
if (req->offset == PAGE_SIZE) {
req->offset = 0;
if (req->page_done)
- req->page_done(call, req);
+ req->page_done(req);
req->index++;
if (req->remain > 0)
goto begin_page;
return ret;
bp = call->buffer;
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
- &vnode->status.data_version, req);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- xdr_decode_YFSCallBack(call, vnode, &bp);
- xdr_decode_YFSVolSync(&bp, call->reply[1]);
+ xdr_decode_YFSCallBack(&bp, call, call->out_scb);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
+
+ req->data_version = call->out_scb->status.data_version;
+ req->file_size = call->out_scb->status.size;
call->unmarshall++;
zero_user_segment(req->pages[req->index],
req->offset, PAGE_SIZE);
if (req->page_done)
- req->page_done(call, req);
+ req->page_done(req);
req->offset = 0;
}
static void yfs_fetch_data_destructor(struct afs_call *call)
{
- struct afs_read *req = call->reply[2];
-
- afs_put_read(req);
+ afs_put_read(call->read_request);
afs_flat_call_destructor(call);
}
/*
* Fetch data from a file.
*/
-int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
+int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_status_cb *scb,
+ struct afs_read *req)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = NULL; /* volsync */
- call->reply[2] = req;
- call->expected_version = vnode->status.data_version;
- call->want_reply_time = true;
+ call->out_scb = scb;
+ call->out_volsync = NULL;
+ call->read_request = req;
/* marshall the parameters */
bp = call->request;
yfs_check_req(call, bp);
refcount_inc(&req->usage);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
*/
static int yfs_deliver_fs_create_vnode(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- xdr_decode_YFSFid(&bp, call->reply[1]);
- ret = yfs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+ xdr_decode_YFSFid(&bp, call->out_fid);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- xdr_decode_YFSCallBack_raw(&bp, call->reply[3]);
- xdr_decode_YFSVolSync(&bp, NULL);
+ xdr_decode_YFSCallBack(&bp, call, call->out_scb);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
int yfs_fs_create_file(struct afs_fs_cursor *fc,
const char *name,
umode_t mode,
- u64 current_data_version,
+ struct afs_status_cb *dvnode_scb,
struct afs_fid *newfid,
- struct afs_file_status *newstatus,
- struct afs_callback *newcb)
+ struct afs_status_cb *new_scb)
{
- struct afs_vnode *vnode = fc->vnode;
+ struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
- struct afs_net *net = afs_v2net(vnode);
+ struct afs_net *net = afs_v2net(dvnode);
size_t namesz, reqsz, rplsz;
__be32 *bp;
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = newfid;
- call->reply[2] = newstatus;
- call->reply[3] = newcb;
- call->expected_version = current_data_version + 1;
+ call->out_dir_scb = dvnode_scb;
+ call->out_fid = newfid;
+ call->out_scb = new_scb;
/* marshall the parameters */
bp = call->request;
bp = xdr_encode_u32(bp, YFSCREATEFILE);
bp = xdr_encode_u32(bp, 0); /* RPC flags */
- bp = xdr_encode_YFSFid(bp, &vnode->fid);
+ bp = xdr_encode_YFSFid(bp, &dvnode->fid);
bp = xdr_encode_string(bp, name, namesz);
bp = xdr_encode_YFSStoreStatus_mode(bp, mode);
bp = xdr_encode_u32(bp, yfs_LockNone); /* ViceLockType */
yfs_check_req(call, bp);
afs_use_fs_server(call, fc->cbi);
- trace_afs_make_fs_call1(call, &vnode->fid, name);
+ trace_afs_make_fs_call1(call, &dvnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
int yfs_fs_make_dir(struct afs_fs_cursor *fc,
const char *name,
umode_t mode,
- u64 current_data_version,
+ struct afs_status_cb *dvnode_scb,
struct afs_fid *newfid,
- struct afs_file_status *newstatus,
- struct afs_callback *newcb)
+ struct afs_status_cb *new_scb)
{
- struct afs_vnode *vnode = fc->vnode;
+ struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
- struct afs_net *net = afs_v2net(vnode);
+ struct afs_net *net = afs_v2net(dvnode);
size_t namesz, reqsz, rplsz;
__be32 *bp;
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = newfid;
- call->reply[2] = newstatus;
- call->reply[3] = newcb;
- call->expected_version = current_data_version + 1;
+ call->out_dir_scb = dvnode_scb;
+ call->out_fid = newfid;
+ call->out_scb = new_scb;
/* marshall the parameters */
bp = call->request;
bp = xdr_encode_u32(bp, YFSMAKEDIR);
bp = xdr_encode_u32(bp, 0); /* RPC flags */
- bp = xdr_encode_YFSFid(bp, &vnode->fid);
+ bp = xdr_encode_YFSFid(bp, &dvnode->fid);
bp = xdr_encode_string(bp, name, namesz);
bp = xdr_encode_YFSStoreStatus_mode(bp, mode);
yfs_check_req(call, bp);
afs_use_fs_server(call, fc->cbi);
- trace_afs_make_fs_call1(call, &vnode->fid, name);
+ trace_afs_make_fs_call1(call, &dvnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
*/
static int yfs_deliver_fs_remove_file2(struct afs_call *call)
{
- struct afs_vnode *dvnode = call->reply[0];
- struct afs_vnode *vnode = call->reply[1];
struct afs_fid fid;
const __be32 *bp;
int ret;
if (ret < 0)
return ret;
- /* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
xdr_decode_YFSFid(&bp, &fid);
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
/* Was deleted if vnode->status.abort_code == VNOVNODE. */
- xdr_decode_YFSVolSync(&bp, NULL);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
return 0;
}
* Remove a file and retrieve new file status.
*/
int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
- const char *name, u64 current_data_version)
+ const char *name, struct afs_status_cb *dvnode_scb,
+ struct afs_status_cb *vnode_scb)
{
struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = dvnode;
- call->reply[1] = vnode;
- call->expected_version = current_data_version + 1;
+ call->out_dir_scb = dvnode_scb;
+ call->out_scb = vnode_scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call1(call, &dvnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
*/
static int yfs_deliver_fs_remove(struct afs_call *call)
{
- struct afs_vnode *dvnode = call->reply[0];
const __be32 *bp;
int ret;
if (ret < 0)
return ret;
- /* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- xdr_decode_YFSVolSync(&bp, NULL);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
return 0;
}
* remove a file or directory
*/
int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
- const char *name, bool isdir, u64 current_data_version)
+ const char *name, bool isdir,
+ struct afs_status_cb *dvnode_scb)
{
struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = dvnode;
- call->reply[1] = vnode;
- call->expected_version = current_data_version + 1;
+ call->out_dir_scb = dvnode_scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call1(call, &dvnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
*/
static int yfs_deliver_fs_link(struct afs_call *call)
{
- struct afs_vnode *dvnode = call->reply[0], *vnode = call->reply[1];
const __be32 *bp;
int ret;
if (ret < 0)
return ret;
- /* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- xdr_decode_YFSVolSync(&bp, NULL);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
}
* Make a hard link.
*/
int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
- const char *name, u64 current_data_version)
+ const char *name,
+ struct afs_status_cb *dvnode_scb,
+ struct afs_status_cb *vnode_scb)
{
struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = dvnode;
- call->reply[1] = vnode;
- call->expected_version = current_data_version + 1;
+ call->out_dir_scb = dvnode_scb;
+ call->out_scb = vnode_scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call1(call, &vnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
*/
static int yfs_deliver_fs_symlink(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- xdr_decode_YFSFid(&bp, call->reply[1]);
- ret = yfs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+ xdr_decode_YFSFid(&bp, call->out_fid);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- xdr_decode_YFSVolSync(&bp, NULL);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
int yfs_fs_symlink(struct afs_fs_cursor *fc,
const char *name,
const char *contents,
- u64 current_data_version,
+ struct afs_status_cb *dvnode_scb,
struct afs_fid *newfid,
- struct afs_file_status *newstatus)
+ struct afs_status_cb *vnode_scb)
{
struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = dvnode;
- call->reply[1] = newfid;
- call->reply[2] = newstatus;
- call->expected_version = current_data_version + 1;
+ call->out_dir_scb = dvnode_scb;
+ call->out_fid = newfid;
+ call->out_scb = vnode_scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call1(call, &dvnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
*/
static int yfs_deliver_fs_rename(struct afs_call *call)
{
- struct afs_vnode *orig_dvnode = call->reply[0];
- struct afs_vnode *new_dvnode = call->reply[1];
const __be32 *bp;
int ret;
if (ret < 0)
return ret;
- /* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = yfs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- if (new_dvnode != orig_dvnode) {
- ret = yfs_decode_status(call, &bp, &new_dvnode->status, new_dvnode,
- &call->expected_version_2, NULL);
+ if (call->out_dir_scb != call->out_scb) {
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
}
- xdr_decode_YFSVolSync(&bp, NULL);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
}
const char *orig_name,
struct afs_vnode *new_dvnode,
const char *new_name,
- u64 current_orig_data_version,
- u64 current_new_data_version)
+ struct afs_status_cb *orig_dvnode_scb,
+ struct afs_status_cb *new_dvnode_scb)
{
struct afs_vnode *orig_dvnode = fc->vnode;
struct afs_call *call;
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = orig_dvnode;
- call->reply[1] = new_dvnode;
- call->expected_version = current_orig_data_version + 1;
- call->expected_version_2 = current_new_data_version + 1;
+ call->out_dir_scb = orig_dvnode_scb;
+ call->out_scb = new_dvnode_scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call2(call, &orig_dvnode->fid, orig_name, new_name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
-/*
- * Deliver reply data to a YFS.StoreData64 operation.
- */
-static int yfs_deliver_fs_store_data(struct afs_call *call)
-{
- struct afs_vnode *vnode = call->reply[0];
- const __be32 *bp;
- int ret;
-
- _enter("");
-
- ret = afs_transfer_reply(call);
- if (ret < 0)
- return ret;
-
- /* unmarshall the reply once we've received all of it */
- bp = call->buffer;
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
- if (ret < 0)
- return ret;
- xdr_decode_YFSVolSync(&bp, NULL);
-
- afs_pages_written_back(vnode, call);
-
- _leave(" = 0 [done]");
- return 0;
-}
-
/*
* YFS.StoreData64 operation type.
*/
static const struct afs_call_type yfs_RXYFSStoreData64 = {
.name = "YFS.StoreData64",
.op = yfs_FS_StoreData64,
- .deliver = yfs_deliver_fs_store_data,
+ .deliver = yfs_deliver_status_and_volsync,
.destructor = afs_flat_call_destructor,
};
*/
int yfs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
pgoff_t first, pgoff_t last,
- unsigned offset, unsigned to)
+ unsigned offset, unsigned to,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
call->key = fc->key;
call->mapping = mapping;
- call->reply[0] = vnode;
call->first = first;
call->last = last;
call->first_offset = offset;
call->last_to = to;
call->send_pages = true;
- call->expected_version = vnode->status.data_version + 1;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
-/*
- * deliver reply data to an FS.StoreStatus
- */
-static int yfs_deliver_fs_store_status(struct afs_call *call)
-{
- struct afs_vnode *vnode = call->reply[0];
- const __be32 *bp;
- int ret;
-
- _enter("");
-
- ret = afs_transfer_reply(call);
- if (ret < 0)
- return ret;
-
- /* unmarshall the reply once we've received all of it */
- bp = call->buffer;
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
- if (ret < 0)
- return ret;
- xdr_decode_YFSVolSync(&bp, NULL);
-
- _leave(" = 0 [done]");
- return 0;
-}
-
/*
* YFS.StoreStatus operation type
*/
static const struct afs_call_type yfs_RXYFSStoreStatus = {
.name = "YFS.StoreStatus",
.op = yfs_FS_StoreStatus,
- .deliver = yfs_deliver_fs_store_status,
+ .deliver = yfs_deliver_status_and_volsync,
.destructor = afs_flat_call_destructor,
};
static const struct afs_call_type yfs_RXYFSStoreData64_as_Status = {
.name = "YFS.StoreData64",
.op = yfs_FS_StoreData64,
- .deliver = yfs_deliver_fs_store_status,
+ .deliver = yfs_deliver_status_and_volsync,
.destructor = afs_flat_call_destructor,
};
* Set the attributes on a file, using YFS.StoreData64 rather than
* YFS.StoreStatus so as to alter the file size also.
*/
-static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
+static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->expected_version = vnode->status.data_version + 1;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
* Set the attributes on a file, using YFS.StoreData64 if there's a change in
* file size, and YFS.StoreStatus otherwise.
*/
-int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
+int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
__be32 *bp;
if (attr->ia_valid & ATTR_SIZE)
- return yfs_fs_setattr_size(fc, attr);
+ return yfs_fs_setattr_size(fc, attr, scb);
_enter(",%x,{%llx:%llu},,",
key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->expected_version = vnode->status.data_version;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
return ret;
bp = call->buffer;
- xdr_decode_YFSFetchVolumeStatus(&bp, call->reply[1]);
+ xdr_decode_YFSFetchVolumeStatus(&bp, call->out_volstatus);
call->unmarshall++;
afs_extract_to_tmp(call);
return afs_protocol_error(call, -EBADMSG,
afs_eproto_volname_len);
size = (call->count + 3) & ~3; /* It's padded */
- afs_extract_begin(call, call->reply[2], size);
+ afs_extract_to_buf(call, size);
call->unmarshall++;
/* Fall through - and extract the volume name */
if (ret < 0)
return ret;
- p = call->reply[2];
+ p = call->buffer;
p[call->count] = 0;
_debug("volname '%s'", p);
afs_extract_to_tmp(call);
return afs_protocol_error(call, -EBADMSG,
afs_eproto_offline_msg_len);
size = (call->count + 3) & ~3; /* It's padded */
- afs_extract_begin(call, call->reply[2], size);
+ afs_extract_to_buf(call, size);
call->unmarshall++;
/* Fall through - and extract the offline message */
if (ret < 0)
return ret;
- p = call->reply[2];
+ p = call->buffer;
p[call->count] = 0;
_debug("offline '%s'", p);
return afs_protocol_error(call, -EBADMSG,
afs_eproto_motd_len);
size = (call->count + 3) & ~3; /* It's padded */
- afs_extract_begin(call, call->reply[2], size);
+ afs_extract_to_buf(call, size);
call->unmarshall++;
/* Fall through - and extract the message of the day */
if (ret < 0)
return ret;
- p = call->reply[2];
+ p = call->buffer;
p[call->count] = 0;
_debug("motd '%s'", p);
return 0;
}
-/*
- * Destroy a YFS.GetVolumeStatus call.
- */
-static void yfs_get_volume_status_call_destructor(struct afs_call *call)
-{
- kfree(call->reply[2]);
- call->reply[2] = NULL;
- afs_flat_call_destructor(call);
-}
-
/*
* YFS.GetVolumeStatus operation type
*/
.name = "YFS.GetVolumeStatus",
.op = yfs_FS_GetVolumeStatus,
.deliver = yfs_deliver_fs_get_volume_status,
- .destructor = yfs_get_volume_status_call_destructor,
+ .destructor = afs_flat_call_destructor,
};
/*
struct afs_call *call;
struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
- void *tmpbuf;
_enter("");
- tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL);
- if (!tmpbuf)
- return -ENOMEM;
-
call = afs_alloc_flat_call(net, &yfs_RXYFSGetVolumeStatus,
sizeof(__be32) * 2 +
sizeof(struct yfs_xdr_u64),
- sizeof(struct yfs_xdr_YFSFetchVolumeStatus) +
- sizeof(__be32));
- if (!call) {
- kfree(tmpbuf);
+ max_t(size_t,
+ sizeof(struct yfs_xdr_YFSFetchVolumeStatus) +
+ sizeof(__be32),
+ AFSOPAQUEMAX + 1));
+ if (!call)
return -ENOMEM;
- }
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = vs;
- call->reply[2] = tmpbuf;
+ call->out_volstatus = vs;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
-/*
- * Deliver reply data to operations that just return a file status and a volume
- * sync record.
- */
-static int yfs_deliver_status_and_volsync(struct afs_call *call)
-{
- struct afs_vnode *vnode = call->reply[0];
- const __be32 *bp;
- int ret;
-
- _enter("{%u}", call->unmarshall);
-
- ret = afs_transfer_reply(call);
- if (ret < 0)
- return ret;
-
- /* unmarshall the reply once we've received all of it */
- bp = call->buffer;
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
- if (ret < 0)
- return ret;
- xdr_decode_YFSVolSync(&bp, NULL);
-
- _leave(" = 0 [done]");
- return 0;
-}
-
/*
* YFS.SetLock operation type
*/
/*
* Set a lock on a file
*/
-int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
+int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->want_reply_time = true;
+ call->lvnode = vnode;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_calli(call, &vnode->fid, type);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
/*
* extend a lock on a file
*/
-int yfs_fs_extend_lock(struct afs_fs_cursor *fc)
+int yfs_fs_extend_lock(struct afs_fs_cursor *fc, struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->want_reply_time = true;
+ call->lvnode = vnode;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
/*
* release a lock on a file
*/
-int yfs_fs_release_lock(struct afs_fs_cursor *fc)
+int yfs_fs_release_lock(struct afs_fs_cursor *fc, struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
+ call->lvnode = vnode;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
-/*
- * Deliver reply data to an FS.FetchStatus with no vnode.
- */
-static int yfs_deliver_fs_fetch_status(struct afs_call *call)
-{
- struct afs_file_status *status = call->reply[1];
- struct afs_callback *callback = call->reply[2];
- struct afs_volsync *volsync = call->reply[3];
- struct afs_vnode *vnode = call->reply[0];
- const __be32 *bp;
- int ret;
-
- ret = afs_transfer_reply(call);
- if (ret < 0)
- return ret;
-
- _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
-
- /* unmarshall the reply once we've received all of it */
- bp = call->buffer;
- ret = yfs_decode_status(call, &bp, status, vnode,
- &call->expected_version, NULL);
- if (ret < 0)
- return ret;
- xdr_decode_YFSCallBack_raw(&bp, callback);
- xdr_decode_YFSVolSync(&bp, volsync);
-
- _leave(" = 0 [done]");
- return 0;
-}
-
/*
* YFS.FetchStatus operation type
*/
static const struct afs_call_type yfs_RXYFSFetchStatus = {
.name = "YFS.FetchStatus",
.op = yfs_FS_FetchStatus,
- .deliver = yfs_deliver_fs_fetch_status,
+ .deliver = yfs_deliver_fs_status_cb_and_volsync,
.destructor = afs_flat_call_destructor,
};
int yfs_fs_fetch_status(struct afs_fs_cursor *fc,
struct afs_net *net,
struct afs_fid *fid,
- struct afs_file_status *status,
- struct afs_callback *callback,
+ struct afs_status_cb *scb,
struct afs_volsync *volsync)
{
struct afs_call *call;
}
call->key = fc->key;
- call->reply[0] = NULL; /* vnode for fid[0] */
- call->reply[1] = status;
- call->reply[2] = callback;
- call->reply[3] = volsync;
- call->expected_version = 1; /* vnode->status.data_version */
+ call->out_scb = scb;
+ call->out_volsync = volsync;
/* marshall the parameters */
bp = call->request;
bp = xdr_encode_YFSFid(bp, fid);
yfs_check_req(call, bp);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
*/
static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
{
- struct afs_file_status *statuses;
- struct afs_callback *callbacks;
- struct afs_vnode *vnode = call->reply[0];
+ struct afs_status_cb *scb;
const __be32 *bp;
u32 tmp;
int ret;
return ret;
bp = call->buffer;
- statuses = call->reply[1];
- ret = yfs_decode_status(call, &bp, &statuses[call->count],
- call->count == 0 ? vnode : NULL,
- NULL, NULL);
+ scb = &call->out_scb[call->count];
+ ret = xdr_decode_YFSFetchStatus(&bp, call, scb);
if (ret < 0)
return ret;
_debug("unmarshall CB array");
bp = call->buffer;
- callbacks = call->reply[2];
- xdr_decode_YFSCallBack_raw(&bp, &callbacks[call->count]);
- statuses = call->reply[1];
- if (call->count == 0 && vnode && statuses[0].abort_code == 0) {
- bp = call->buffer;
- xdr_decode_YFSCallBack(call, vnode, &bp);
- }
+ scb = &call->out_scb[call->count];
+ xdr_decode_YFSCallBack(&bp, call, scb);
call->count++;
if (call->count < call->count2)
goto more_cbs;
return ret;
bp = call->buffer;
- xdr_decode_YFSVolSync(&bp, call->reply[3]);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
call->unmarshall++;
int yfs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
struct afs_net *net,
struct afs_fid *fids,
- struct afs_file_status *statuses,
- struct afs_callback *callbacks,
+ struct afs_status_cb *statuses,
unsigned int nr_fids,
struct afs_volsync *volsync)
{
}
call->key = fc->key;
- call->reply[0] = NULL; /* vnode for fid[0] */
- call->reply[1] = statuses;
- call->reply[2] = callbacks;
- call->reply[3] = volsync;
+ call->out_scb = statuses;
+ call->out_volsync = volsync;
call->count2 = nr_fids;
/* marshall the parameters */
bp = xdr_encode_YFSFid(bp, &fids[i]);
yfs_check_req(call, bp);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &fids[0]);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
*/
static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call)
{
- struct afs_volsync *volsync = call->reply[2];
- struct afs_vnode *vnode = call->reply[1];
- struct yfs_acl *yacl = call->reply[0];
+ struct yfs_acl *yacl = call->out_yacl;
struct afs_acl *acl;
const __be32 *bp;
unsigned int size;
bp = call->buffer;
yacl->inherit_flag = ntohl(*bp++);
yacl->num_cleaned = ntohl(*bp++);
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- xdr_decode_YFSVolSync(&bp, volsync);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
call->unmarshall++;
}
}
-static void yfs_destroy_fs_fetch_opaque_acl(struct afs_call *call)
-{
- yfs_free_opaque_acl(call->reply[0]);
- afs_flat_call_destructor(call);
-}
-
/*
* YFS.FetchOpaqueACL operation type
*/
.name = "YFS.FetchOpaqueACL",
.op = yfs_FS_FetchOpaqueACL,
.deliver = yfs_deliver_fs_fetch_opaque_acl,
- .destructor = yfs_destroy_fs_fetch_opaque_acl,
+ .destructor = afs_flat_call_destructor,
};
/*
* Fetch the YFS advanced ACLs for a file.
*/
struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *fc,
- unsigned int flags)
+ struct yfs_acl *yacl,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
- struct yfs_acl *yacl;
struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
sizeof(__be32) * 2 +
sizeof(struct yfs_xdr_YFSFetchStatus) +
sizeof(struct yfs_xdr_YFSVolSync));
- if (!call)
- goto nomem;
-
- yacl = kzalloc(sizeof(struct yfs_acl), GFP_KERNEL);
- if (!yacl)
- goto nomem_call;
+ if (!call) {
+ fc->ac.error = -ENOMEM;
+ return ERR_PTR(-ENOMEM);
+ }
- yacl->flags = flags;
call->key = fc->key;
- call->reply[0] = yacl;
- call->reply[1] = vnode;
- call->reply[2] = NULL; /* volsync */
- call->ret_reply0 = true;
+ call->out_yacl = yacl;
+ call->out_scb = scb;
+ call->out_volsync = NULL;
/* marshall the parameters */
bp = call->request;
bp = xdr_encode_YFSFid(bp, &vnode->fid);
yfs_check_req(call, bp);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
afs_make_call(&fc->ac, call, GFP_KERNEL);
return (struct yfs_acl *)afs_wait_for_call_to_complete(call, &fc->ac);
-
-nomem_call:
- afs_put_call(call);
-nomem:
- fc->ac.error = -ENOMEM;
- return ERR_PTR(-ENOMEM);
}
/*
/*
* Fetch the YFS ACL for a file.
*/
-int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *fc, const struct afs_acl *acl)
+int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *fc, const struct afs_acl *acl,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
}
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[2] = NULL; /* volsync */
+ call->out_scb = scb;
+ call->out_volsync = NULL;
/* marshall the parameters */
bp = call->request;
int have = ci->i_snap_caps;
if ((have & mask) == mask) {
- dout("__ceph_caps_issued_mask %p snap issued %s"
- " (mask %s)\n", &ci->vfs_inode,
+ dout("__ceph_caps_issued_mask ino 0x%lx snap issued %s"
+ " (mask %s)\n", ci->vfs_inode.i_ino,
ceph_cap_string(have),
ceph_cap_string(mask));
return 1;
if (!__cap_is_valid(cap))
continue;
if ((cap->issued & mask) == mask) {
- dout("__ceph_caps_issued_mask %p cap %p issued %s"
- " (mask %s)\n", &ci->vfs_inode, cap,
+ dout("__ceph_caps_issued_mask ino 0x%lx cap %p issued %s"
+ " (mask %s)\n", ci->vfs_inode.i_ino, cap,
ceph_cap_string(cap->issued),
ceph_cap_string(mask));
if (touch)
/* does a combination of caps satisfy mask? */
have |= cap->issued;
if ((have & mask) == mask) {
- dout("__ceph_caps_issued_mask %p combo issued %s"
- " (mask %s)\n", &ci->vfs_inode,
+ dout("__ceph_caps_issued_mask ino 0x%lx combo issued %s"
+ " (mask %s)\n", ci->vfs_inode.i_ino,
ceph_cap_string(cap->issued),
ceph_cap_string(mask));
if (touch) {
if (datasync)
goto out;
- inode_lock(inode);
-
dirty = try_flush_caps(inode, &flush_tid);
dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
ret = wait_event_interruptible(ci->i_cap_wq,
caps_are_flushed(inode, flush_tid));
}
- inode_unlock(inode);
out:
dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret);
return ret;
* to (when applicable), and check against max_size here as well.
* Note that caller is responsible for ensuring max_size increases are
* requested from the MDS.
+ *
+ * Returns 0 if caps were not able to be acquired (yet), a 1 if they were,
+ * or a negative error code.
+ *
+ * FIXME: how does a 0 return differ from -EAGAIN?
*/
static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
- loff_t endoff, bool nonblock, int *got, int *err)
+ loff_t endoff, bool nonblock, int *got)
{
struct inode *inode = &ci->vfs_inode;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
if ((file_wanted & need) != need) {
dout("try_get_cap_refs need %s file_wanted %s, EBADF\n",
ceph_cap_string(need), ceph_cap_string(file_wanted));
- *err = -EBADF;
- ret = 1;
+ ret = -EBADF;
goto out_unlock;
}
if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) {
dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
inode, endoff, ci->i_max_size);
- if (endoff > ci->i_requested_max_size) {
- *err = -EAGAIN;
- ret = 1;
- }
+ if (endoff > ci->i_requested_max_size)
+ ret = -EAGAIN;
goto out_unlock;
}
/*
* task isn't in TASK_RUNNING state
*/
if (nonblock) {
- *err = -EAGAIN;
- ret = 1;
+ ret = -EAGAIN;
goto out_unlock;
}
if (session_readonly) {
dout("get_cap_refs %p needed %s but mds%d readonly\n",
inode, ceph_cap_string(need), ci->i_auth_cap->mds);
- *err = -EROFS;
- ret = 1;
+ ret = -EROFS;
goto out_unlock;
}
if (READ_ONCE(mdsc->fsc->mount_state) ==
CEPH_MOUNT_SHUTDOWN) {
dout("get_cap_refs %p forced umount\n", inode);
- *err = -EIO;
- ret = 1;
+ ret = -EIO;
goto out_unlock;
}
mds_wanted = __ceph_caps_mds_wanted(ci, false);
if (need & ~(mds_wanted & need)) {
dout("get_cap_refs %p caps were dropped"
" (session killed?)\n", inode);
- *err = -ESTALE;
- ret = 1;
+ ret = -ESTALE;
goto out_unlock;
}
if (!(file_wanted & ~mds_wanted))
int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want,
bool nonblock, int *got)
{
- int ret, err = 0;
+ int ret;
BUG_ON(need & ~CEPH_CAP_FILE_RD);
BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO|CEPH_CAP_FILE_SHARED));
if (ret < 0)
return ret;
- ret = try_get_cap_refs(ci, need, want, 0, nonblock, got, &err);
- if (ret) {
- if (err == -EAGAIN) {
- ret = 0;
- } else if (err < 0) {
- ret = err;
- }
- }
- return ret;
+ ret = try_get_cap_refs(ci, need, want, 0, nonblock, got);
+ return ret == -EAGAIN ? 0 : ret;
}
/*
int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
loff_t endoff, int *got, struct page **pinned_page)
{
- int _got, ret, err = 0;
+ int _got, ret;
ret = ceph_pool_perm_check(ci, need);
if (ret < 0)
if (endoff > 0)
check_max_size(&ci->vfs_inode, endoff);
- err = 0;
_got = 0;
ret = try_get_cap_refs(ci, need, want, endoff,
- false, &_got, &err);
- if (ret) {
- if (err == -EAGAIN)
- continue;
- if (err < 0)
- ret = err;
- } else {
+ false, &_got);
+ if (ret == -EAGAIN) {
+ continue;
+ } else if (!ret) {
+ int err;
+
DEFINE_WAIT_FUNC(wait, woken_wake_function);
add_wait_queue(&ci->i_cap_wq, &wait);
- while (!try_get_cap_refs(ci, need, want, endoff,
- true, &_got, &err)) {
+ while (!(err = try_get_cap_refs(ci, need, want, endoff,
+ true, &_got))) {
if (signal_pending(current)) {
ret = -ERESTARTSYS;
break;
}
remove_wait_queue(&ci->i_cap_wq, &wait);
-
if (err == -EAGAIN)
continue;
- if (err < 0)
- ret = err;
}
- if (ret < 0) {
- if (err == -ESTALE) {
- /* session was killed, try renew caps */
- ret = ceph_renew_caps(&ci->vfs_inode);
- if (ret == 0)
- continue;
- }
+ if (ret == -ESTALE) {
+ /* session was killed, try renew caps */
+ ret = ceph_renew_caps(&ci->vfs_inode);
+ if (ret == 0)
+ continue;
return ret;
}
}
/*
- * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps. If it
+ * For a soon-to-be unlinked file, drop the LINK caps. If it
* looks like the link count will hit 0, drop any other caps (other
* than PIN) we don't specifically want (due to the file still being
* open).
struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr;
int state = mdsmap->m_info[i].state;
seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
- ceph_pr_addr(&addr->in_addr),
+ ceph_pr_addr(addr),
ceph_mds_state_name(state));
}
return 0;
req->r_dentry,
path ? path : "");
spin_unlock(&req->r_dentry->d_lock);
- kfree(path);
+ ceph_mdsc_free_path(path, pathlen);
} else if (req->r_path1) {
seq_printf(s, " #%llx/%s", req->r_ino1.ino,
req->r_path1);
req->r_old_dentry,
path ? path : "");
spin_unlock(&req->r_old_dentry->d_lock);
- kfree(path);
+ ceph_mdsc_free_path(path, pathlen);
} else if (req->r_path2 && req->r_op != CEPH_MDS_OP_SYMLINK) {
if (req->r_ino2.ino)
seq_printf(s, " #%llx/%s", req->r_ino2.ino,
return 0;
}
+static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p)
+{
+ struct seq_file *s = p;
+
+ seq_printf(s, "0x%-17lx%-17s%-17s\n", inode->i_ino,
+ ceph_cap_string(cap->issued),
+ ceph_cap_string(cap->implemented));
+ return 0;
+}
+
static int caps_show(struct seq_file *s, void *p)
{
struct ceph_fs_client *fsc = s->private;
- int total, avail, used, reserved, min;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ int total, avail, used, reserved, min, i;
ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min);
seq_printf(s, "total\t\t%d\n"
"avail\t\t%d\n"
"used\t\t%d\n"
"reserved\t%d\n"
- "min\t%d\n",
+ "min\t\t%d\n\n",
total, avail, used, reserved, min);
+ seq_printf(s, "ino issued implemented\n");
+ seq_printf(s, "-----------------------------------------------\n");
+
+ mutex_lock(&mdsc->mutex);
+ for (i = 0; i < mdsc->max_sessions; i++) {
+ struct ceph_mds_session *session;
+
+ session = __ceph_lookup_mds_session(mdsc, i);
+ if (!session)
+ continue;
+ mutex_unlock(&mdsc->mutex);
+ mutex_lock(&session->s_mutex);
+ ceph_iterate_session_caps(session, caps_show_cb, s);
+ mutex_unlock(&session->s_mutex);
+ ceph_put_mds_session(session);
+ mutex_lock(&mdsc->mutex);
+ }
+ mutex_unlock(&mdsc->mutex);
+
return 0;
}
u64 ino, parent_ino;
} __attribute__ ((packed));
+/*
+ * fh for snapped inode
+ */
+struct ceph_nfs_snapfh {
+ u64 ino;
+ u64 snapid;
+ u64 parent_ino;
+ u32 hash;
+} __attribute__ ((packed));
+
+static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len,
+ struct inode *parent_inode)
+{
+ const static int snap_handle_length =
+ sizeof(struct ceph_nfs_snapfh) >> 2;
+ struct ceph_nfs_snapfh *sfh = (void *)rawfh;
+ u64 snapid = ceph_snap(inode);
+ int ret;
+ bool no_parent = true;
+
+ if (*max_len < snap_handle_length) {
+ *max_len = snap_handle_length;
+ ret = FILEID_INVALID;
+ goto out;
+ }
+
+ ret = -EINVAL;
+ if (snapid != CEPH_SNAPDIR) {
+ struct inode *dir;
+ struct dentry *dentry = d_find_alias(inode);
+ if (!dentry)
+ goto out;
+
+ rcu_read_lock();
+ dir = d_inode_rcu(dentry->d_parent);
+ if (ceph_snap(dir) != CEPH_SNAPDIR) {
+ sfh->parent_ino = ceph_ino(dir);
+ sfh->hash = ceph_dentry_hash(dir, dentry);
+ no_parent = false;
+ }
+ rcu_read_unlock();
+ dput(dentry);
+ }
+
+ if (no_parent) {
+ if (!S_ISDIR(inode->i_mode))
+ goto out;
+ sfh->parent_ino = sfh->ino;
+ sfh->hash = 0;
+ }
+ sfh->ino = ceph_ino(inode);
+ sfh->snapid = snapid;
+
+ *max_len = snap_handle_length;
+ ret = FILEID_BTRFS_WITH_PARENT;
+out:
+ dout("encode_snapfh %llx.%llx ret=%d\n", ceph_vinop(inode), ret);
+ return ret;
+}
+
static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
struct inode *parent_inode)
{
+ const static int handle_length =
+ sizeof(struct ceph_nfs_fh) >> 2;
+ const static int connected_handle_length =
+ sizeof(struct ceph_nfs_confh) >> 2;
int type;
- struct ceph_nfs_fh *fh = (void *)rawfh;
- struct ceph_nfs_confh *cfh = (void *)rawfh;
- int connected_handle_length = sizeof(*cfh)/4;
- int handle_length = sizeof(*fh)/4;
- /* don't re-export snaps */
if (ceph_snap(inode) != CEPH_NOSNAP)
- return -EINVAL;
+ return ceph_encode_snapfh(inode, rawfh, max_len, parent_inode);
if (parent_inode && (*max_len < connected_handle_length)) {
*max_len = connected_handle_length;
}
if (parent_inode) {
+ struct ceph_nfs_confh *cfh = (void *)rawfh;
dout("encode_fh %llx with parent %llx\n",
ceph_ino(inode), ceph_ino(parent_inode));
cfh->ino = ceph_ino(inode);
*max_len = connected_handle_length;
type = FILEID_INO32_GEN_PARENT;
} else {
+ struct ceph_nfs_fh *fh = (void *)rawfh;
dout("encode_fh %llx\n", ceph_ino(inode));
fh->ino = ceph_ino(inode);
*max_len = handle_length;
return type;
}
-static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
+static struct inode *__lookup_inode(struct super_block *sb, u64 ino)
{
struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
struct inode *inode;
mask = CEPH_STAT_CAP_INODE;
if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
mask |= CEPH_CAP_XATTR_SHARED;
- req->r_args.getattr.mask = cpu_to_le32(mask);
+ req->r_args.lookupino.mask = cpu_to_le32(mask);
req->r_ino1 = vino;
req->r_num_caps = 1;
ihold(inode);
ceph_mdsc_put_request(req);
if (!inode)
- return ERR_PTR(-ESTALE);
- if (inode->i_nlink == 0) {
- iput(inode);
- return ERR_PTR(-ESTALE);
- }
+ return err < 0 ? ERR_PTR(err) : ERR_PTR(-ESTALE);
}
+ return inode;
+}
+
+struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino)
+{
+ struct inode *inode = __lookup_inode(sb, ino);
+ if (IS_ERR(inode))
+ return inode;
+ if (inode->i_nlink == 0) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
+ return inode;
+}
+static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
+{
+ struct inode *inode = __lookup_inode(sb, ino);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+ if (inode->i_nlink == 0) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
return d_obtain_alias(inode);
}
+static struct dentry *__snapfh_to_dentry(struct super_block *sb,
+ struct ceph_nfs_snapfh *sfh,
+ bool want_parent)
+{
+ struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
+ struct ceph_mds_request *req;
+ struct inode *inode;
+ struct ceph_vino vino;
+ int mask;
+ int err;
+ bool unlinked = false;
+
+ if (want_parent) {
+ vino.ino = sfh->parent_ino;
+ if (sfh->snapid == CEPH_SNAPDIR)
+ vino.snap = CEPH_NOSNAP;
+ else if (sfh->ino == sfh->parent_ino)
+ vino.snap = CEPH_SNAPDIR;
+ else
+ vino.snap = sfh->snapid;
+ } else {
+ vino.ino = sfh->ino;
+ vino.snap = sfh->snapid;
+ }
+ inode = ceph_find_inode(sb, vino);
+ if (inode)
+ return d_obtain_alias(inode);
+
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
+ USE_ANY_MDS);
+ if (IS_ERR(req))
+ return ERR_CAST(req);
+
+ mask = CEPH_STAT_CAP_INODE;
+ if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
+ mask |= CEPH_CAP_XATTR_SHARED;
+ req->r_args.lookupino.mask = cpu_to_le32(mask);
+ if (vino.snap < CEPH_NOSNAP) {
+ req->r_args.lookupino.snapid = cpu_to_le64(vino.snap);
+ if (!want_parent && sfh->ino != sfh->parent_ino) {
+ req->r_args.lookupino.parent =
+ cpu_to_le64(sfh->parent_ino);
+ req->r_args.lookupino.hash =
+ cpu_to_le32(sfh->hash);
+ }
+ }
+
+ req->r_ino1 = vino;
+ req->r_num_caps = 1;
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+ inode = req->r_target_inode;
+ if (inode) {
+ if (vino.snap == CEPH_SNAPDIR) {
+ if (inode->i_nlink == 0)
+ unlinked = true;
+ inode = ceph_get_snapdir(inode);
+ } else if (ceph_snap(inode) == vino.snap) {
+ ihold(inode);
+ } else {
+ /* mds does not support lookup snapped inode */
+ err = -EOPNOTSUPP;
+ inode = NULL;
+ }
+ }
+ ceph_mdsc_put_request(req);
+
+ if (want_parent) {
+ dout("snapfh_to_parent %llx.%llx\n err=%d\n",
+ vino.ino, vino.snap, err);
+ } else {
+ dout("snapfh_to_dentry %llx.%llx parent %llx hash %x err=%d",
+ vino.ino, vino.snap, sfh->parent_ino, sfh->hash, err);
+ }
+ if (!inode)
+ return ERR_PTR(-ESTALE);
+ /* see comments in ceph_get_parent() */
+ return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode);
+}
+
/*
* convert regular fh to dentry
*/
{
struct ceph_nfs_fh *fh = (void *)fid->raw;
+ if (fh_type == FILEID_BTRFS_WITH_PARENT) {
+ struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
+ return __snapfh_to_dentry(sb, sfh, false);
+ }
+
if (fh_type != FILEID_INO32_GEN &&
fh_type != FILEID_INO32_GEN_PARENT)
return NULL;
static struct dentry *ceph_get_parent(struct dentry *child)
{
- /* don't re-export snaps */
- if (ceph_snap(d_inode(child)) != CEPH_NOSNAP)
- return ERR_PTR(-EINVAL);
-
- dout("get_parent %p ino %llx.%llx\n",
- child, ceph_vinop(d_inode(child)));
- return __get_parent(child->d_sb, child, 0);
+ struct inode *inode = d_inode(child);
+ struct dentry *dn;
+
+ if (ceph_snap(inode) != CEPH_NOSNAP) {
+ struct inode* dir;
+ bool unlinked = false;
+ /* do not support non-directory */
+ if (!d_is_dir(child)) {
+ dn = ERR_PTR(-EINVAL);
+ goto out;
+ }
+ dir = __lookup_inode(inode->i_sb, ceph_ino(inode));
+ if (IS_ERR(dir)) {
+ dn = ERR_CAST(dir);
+ goto out;
+ }
+ /* There can be multiple paths to access snapped inode.
+ * For simplicity, treat snapdir of head inode as parent */
+ if (ceph_snap(inode) != CEPH_SNAPDIR) {
+ struct inode *snapdir = ceph_get_snapdir(dir);
+ if (dir->i_nlink == 0)
+ unlinked = true;
+ iput(dir);
+ if (IS_ERR(snapdir)) {
+ dn = ERR_CAST(snapdir);
+ goto out;
+ }
+ dir = snapdir;
+ }
+ /* If directory has already been deleted, futher get_parent
+ * will fail. Do not mark snapdir dentry as disconnected,
+ * this prevent exportfs from doing futher get_parent. */
+ if (unlinked)
+ dn = d_obtain_root(dir);
+ else
+ dn = d_obtain_alias(dir);
+ } else {
+ dn = __get_parent(child->d_sb, child, 0);
+ }
+out:
+ dout("get_parent %p ino %llx.%llx err=%ld\n",
+ child, ceph_vinop(inode), (IS_ERR(dn) ? PTR_ERR(dn) : 0));
+ return dn;
}
/*
struct ceph_nfs_confh *cfh = (void *)fid->raw;
struct dentry *dentry;
+ if (fh_type == FILEID_BTRFS_WITH_PARENT) {
+ struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
+ return __snapfh_to_dentry(sb, sfh, true);
+ }
+
if (fh_type != FILEID_INO32_GEN_PARENT)
return NULL;
if (fh_len < sizeof(*cfh) / 4)
return dentry;
}
+static int __get_snap_name(struct dentry *parent, char *name,
+ struct dentry *child)
+{
+ struct inode *inode = d_inode(child);
+ struct inode *dir = d_inode(parent);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_mds_request *req = NULL;
+ char *last_name = NULL;
+ unsigned next_offset = 2;
+ int err = -EINVAL;
+
+ if (ceph_ino(inode) != ceph_ino(dir))
+ goto out;
+ if (ceph_snap(inode) == CEPH_SNAPDIR) {
+ if (ceph_snap(dir) == CEPH_NOSNAP) {
+ strcpy(name, fsc->mount_options->snapdir_name);
+ err = 0;
+ }
+ goto out;
+ }
+ if (ceph_snap(dir) != CEPH_SNAPDIR)
+ goto out;
+
+ while (1) {
+ struct ceph_mds_reply_info_parsed *rinfo;
+ struct ceph_mds_reply_dir_entry *rde;
+ int i;
+
+ req = ceph_mdsc_create_request(fsc->mdsc, CEPH_MDS_OP_LSSNAP,
+ USE_AUTH_MDS);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ req = NULL;
+ goto out;
+ }
+ err = ceph_alloc_readdir_reply_buffer(req, inode);
+ if (err)
+ goto out;
+
+ req->r_direct_mode = USE_AUTH_MDS;
+ req->r_readdir_offset = next_offset;
+ req->r_args.readdir.flags =
+ cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
+ if (last_name) {
+ req->r_path2 = last_name;
+ last_name = NULL;
+ }
+
+ req->r_inode = dir;
+ ihold(dir);
+ req->r_dentry = dget(parent);
+
+ inode_lock(dir);
+ err = ceph_mdsc_do_request(fsc->mdsc, NULL, req);
+ inode_unlock(dir);
+
+ if (err < 0)
+ goto out;
+
+ rinfo = &req->r_reply_info;
+ for (i = 0; i < rinfo->dir_nr; i++) {
+ rde = rinfo->dir_entries + i;
+ BUG_ON(!rde->inode.in);
+ if (ceph_snap(inode) ==
+ le64_to_cpu(rde->inode.in->snapid)) {
+ memcpy(name, rde->name, rde->name_len);
+ name[rde->name_len] = '\0';
+ err = 0;
+ goto out;
+ }
+ }
+
+ if (rinfo->dir_end)
+ break;
+
+ BUG_ON(rinfo->dir_nr <= 0);
+ rde = rinfo->dir_entries + (rinfo->dir_nr - 1);
+ next_offset += rinfo->dir_nr;
+ last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL);
+ if (!last_name) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ ceph_mdsc_put_request(req);
+ req = NULL;
+ }
+ err = -ENOENT;
+out:
+ if (req)
+ ceph_mdsc_put_request(req);
+ kfree(last_name);
+ dout("get_snap_name %p ino %llx.%llx err=%d\n",
+ child, ceph_vinop(inode), err);
+ return err;
+}
+
static int ceph_get_name(struct dentry *parent, char *name,
struct dentry *child)
{
struct ceph_mds_client *mdsc;
struct ceph_mds_request *req;
+ struct inode *inode = d_inode(child);
int err;
- mdsc = ceph_inode_to_client(d_inode(child))->mdsc;
+ if (ceph_snap(inode) != CEPH_NOSNAP)
+ return __get_snap_name(parent, name, child);
+
+ mdsc = ceph_inode_to_client(inode)->mdsc;
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
USE_ANY_MDS);
if (IS_ERR(req))
inode_lock(d_inode(parent));
- req->r_inode = d_inode(child);
- ihold(d_inode(child));
+ req->r_inode = inode;
+ ihold(inode);
req->r_ino2 = ceph_vino(d_inode(parent));
req->r_parent = d_inode(parent);
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
memcpy(name, rinfo->dname, rinfo->dname_len);
name[rinfo->dname_len] = 0;
dout("get_name %p ino %llx.%llx name %s\n",
- child, ceph_vinop(d_inode(child)), name);
+ child, ceph_vinop(inode), name);
} else {
dout("get_name %p ino %llx.%llx err %d\n",
- child, ceph_vinop(d_inode(child)), err);
+ child, ceph_vinop(inode), err);
}
ceph_mdsc_put_request(req);
dout("sync_direct_%s on file %p %lld~%u snapc %p seq %lld\n",
(write ? "write" : "read"), file, pos, (unsigned)count,
- snapc, snapc->seq);
+ snapc, snapc ? snapc->seq : 0);
ret = filemap_write_and_wait_range(inode->i_mapping,
pos, pos + count - 1);
return err;
}
+/* Craft a mask of needed caps given a set of requested statx attrs. */
+static int statx_to_caps(u32 want)
+{
+ int mask = 0;
+
+ if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME))
+ mask |= CEPH_CAP_AUTH_SHARED;
+
+ if (want & (STATX_NLINK|STATX_CTIME))
+ mask |= CEPH_CAP_LINK_SHARED;
+
+ if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE|
+ STATX_BLOCKS))
+ mask |= CEPH_CAP_FILE_SHARED;
+
+ if (want & (STATX_CTIME))
+ mask |= CEPH_CAP_XATTR_SHARED;
+
+ return mask;
+}
+
/*
- * Get all attributes. Hopefully somedata we'll have a statlite()
- * and can limit the fields we require to be accurate.
+ * Get all the attributes. If we have sufficient caps for the requested attrs,
+ * then we can avoid talking to the MDS at all.
*/
int ceph_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
struct inode *inode = d_inode(path->dentry);
struct ceph_inode_info *ci = ceph_inode(inode);
- int err;
+ int err = 0;
- err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL, false);
- if (!err) {
- generic_fillattr(inode, stat);
- stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino);
- if (ceph_snap(inode) == CEPH_NOSNAP)
- stat->dev = inode->i_sb->s_dev;
+ /* Skip the getattr altogether if we're asked not to sync */
+ if (!(flags & AT_STATX_DONT_SYNC)) {
+ err = ceph_do_getattr(inode, statx_to_caps(request_mask),
+ flags & AT_STATX_FORCE_SYNC);
+ if (err)
+ return err;
+ }
+
+ generic_fillattr(inode, stat);
+ stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino);
+ if (ceph_snap(inode) == CEPH_NOSNAP)
+ stat->dev = inode->i_sb->s_dev;
+ else
+ stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0;
+
+ if (S_ISDIR(inode->i_mode)) {
+ if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb),
+ RBYTES))
+ stat->size = ci->i_rbytes;
else
- stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0;
-
- if (S_ISDIR(inode->i_mode)) {
- if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb),
- RBYTES))
- stat->size = ci->i_rbytes;
- else
- stat->size = ci->i_files + ci->i_subdirs;
- stat->blocks = 0;
- stat->blksize = 65536;
- /*
- * Some applications rely on the number of st_nlink
- * value on directories to be either 0 (if unlinked)
- * or 2 + number of subdirectories.
- */
- if (stat->nlink == 1)
- /* '.' + '..' + subdirs */
- stat->nlink = 1 + 1 + ci->i_subdirs;
- }
+ stat->size = ci->i_files + ci->i_subdirs;
+ stat->blocks = 0;
+ stat->blksize = 65536;
+ /*
+ * Some applications rely on the number of st_nlink
+ * value on directories to be either 0 (if unlinked)
+ * or 2 + number of subdirectories.
+ */
+ if (stat->nlink == 1)
+ /* '.' + '..' + subdirs */
+ stat->nlink = 1 + 1 + ci->i_subdirs;
}
+
+ /* Mask off any higher bits (e.g. btime) until we have support */
+ stat->result_mask = request_mask & STATX_BASIC_STATS;
return err;
}
spin_lock(&ci->i_ceph_lock);
if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
err = -EIO;
- } else if (op == CEPH_MDS_OP_SETFILELOCK) {
- /*
- * increasing i_filelock_ref closes race window between
- * handling request reply and adding file_lock struct to
- * inode. Otherwise, i_auth_cap may get trimmed in the
- * window. Caller function will decrease the counter.
- */
- fl->fl_ops = &ceph_fl_lock_ops;
- atomic_inc(&ci->i_filelock_ref);
}
spin_unlock(&ci->i_ceph_lock);
if (err < 0) {
spin_lock(&ci->i_ceph_lock);
if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
err = -EIO;
- } else {
- /* see comment in ceph_lock */
- fl->fl_ops = &ceph_fl_lock_ops;
- atomic_inc(&ci->i_filelock_ref);
}
spin_unlock(&ci->i_ceph_lock);
if (err < 0) {
struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc,
int mds)
{
- struct ceph_mds_session *session;
-
if (mds >= mdsc->max_sessions || !mdsc->sessions[mds])
return NULL;
- session = mdsc->sessions[mds];
- dout("lookup_mds_session %p %d\n", session,
- refcount_read(&session->s_ref));
- get_session(session);
- return session;
+ return get_session(mdsc->sessions[mds]);
}
static bool __have_session(struct ceph_mds_client *mdsc, int mds)
*
* Caller must hold session s_mutex.
*/
-static int iterate_session_caps(struct ceph_mds_session *session,
- int (*cb)(struct inode *, struct ceph_cap *,
- void *), void *arg)
+int ceph_iterate_session_caps(struct ceph_mds_session *session,
+ int (*cb)(struct inode *, struct ceph_cap *,
+ void *), void *arg)
{
struct list_head *p;
struct ceph_cap *cap;
LIST_HEAD(dispose);
dout("remove_session_caps on %p\n", session);
- iterate_session_caps(session, remove_session_caps_cb, fsc);
+ ceph_iterate_session_caps(session, remove_session_caps_cb, fsc);
wake_up_all(&fsc->mdsc->cap_flushing_wq);
static void wake_up_session_caps(struct ceph_mds_session *session, int ev)
{
dout("wake_up_session_caps %p mds%d\n", session, session->s_mds);
- iterate_session_caps(session, wake_up_session_cb,
- (void *)(unsigned long)ev);
+ ceph_iterate_session_caps(session, wake_up_session_cb,
+ (void *)(unsigned long)ev);
}
/*
session->s_mds, session->s_nr_caps, max_caps, trim_caps);
if (trim_caps > 0) {
session->s_trim_caps = trim_caps;
- iterate_session_caps(session, trim_caps_cb, session);
+ ceph_iterate_session_caps(session, trim_caps_cb, session);
dout("trim_caps mds%d done: %d / %d, trimmed %d\n",
session->s_mds, session->s_nr_caps, max_caps,
trim_caps - session->s_trim_caps);
num_cap_releases--;
head = msg->front.iov_base;
- le32_add_cpu(&head->num, 1);
+ put_unaligned_le32(get_unaligned_le32(&head->num) + 1,
+ &head->num);
item = msg->front.iov_base + msg->front.iov_len;
item->ino = cpu_to_le64(cap->cap_ino);
item->cap_id = cpu_to_le64(cap->cap_id);
* Encode hidden .snap dirs as a double /, i.e.
* foo/.snap/bar -> foo//bar
*/
-char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
+char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase,
int stop_on_nosnap)
{
struct dentry *temp;
char *path;
- int len, pos;
+ int pos;
unsigned seq;
+ u64 base;
if (!dentry)
return ERR_PTR(-EINVAL);
-retry:
- len = 0;
- seq = read_seqbegin(&rename_lock);
- rcu_read_lock();
- for (temp = dentry; !IS_ROOT(temp);) {
- struct inode *inode = d_inode(temp);
- if (inode && ceph_snap(inode) == CEPH_SNAPDIR)
- len++; /* slash only */
- else if (stop_on_nosnap && inode &&
- ceph_snap(inode) == CEPH_NOSNAP)
- break;
- else
- len += 1 + temp->d_name.len;
- temp = temp->d_parent;
- }
- rcu_read_unlock();
- if (len)
- len--; /* no leading '/' */
-
- path = kmalloc(len+1, GFP_NOFS);
+ path = __getname();
if (!path)
return ERR_PTR(-ENOMEM);
- pos = len;
- path[pos] = 0; /* trailing null */
+retry:
+ pos = PATH_MAX - 1;
+ path[pos] = '\0';
+
+ seq = read_seqbegin(&rename_lock);
rcu_read_lock();
- for (temp = dentry; !IS_ROOT(temp) && pos != 0; ) {
+ temp = dentry;
+ for (;;) {
struct inode *inode;
spin_lock(&temp->d_lock);
spin_unlock(&temp->d_lock);
break;
}
- strncpy(path + pos, temp->d_name.name,
- temp->d_name.len);
+ memcpy(path + pos, temp->d_name.name, temp->d_name.len);
}
spin_unlock(&temp->d_lock);
- if (pos)
- path[--pos] = '/';
temp = temp->d_parent;
+
+ /* Are we at the root? */
+ if (IS_ROOT(temp))
+ break;
+
+ /* Are we out of buffer? */
+ if (--pos < 0)
+ break;
+
+ path[pos] = '/';
}
+ base = ceph_ino(d_inode(temp));
rcu_read_unlock();
- if (pos != 0 || read_seqretry(&rename_lock, seq)) {
+ if (pos < 0 || read_seqretry(&rename_lock, seq)) {
pr_err("build_path did not end path lookup where "
- "expected, namelen is %d, pos is %d\n", len, pos);
+ "expected, pos is %d\n", pos);
/* presumably this is only possible if racing with a
rename of one of the parent directories (we can not
lock the dentries above us to prevent this, but
retrying should be harmless) */
- kfree(path);
goto retry;
}
- *base = ceph_ino(d_inode(temp));
- *plen = len;
+ *pbase = base;
+ *plen = PATH_MAX - 1 - pos;
dout("build_path on %p %d built %llx '%.*s'\n",
- dentry, d_count(dentry), *base, len, path);
- return path;
-}
-
-/* Duplicate the dentry->d_name.name safely */
-static int clone_dentry_name(struct dentry *dentry, const char **ppath,
- int *ppathlen)
-{
- u32 len;
- char *name;
-
-retry:
- len = READ_ONCE(dentry->d_name.len);
- name = kmalloc(len + 1, GFP_NOFS);
- if (!name)
- return -ENOMEM;
-
- spin_lock(&dentry->d_lock);
- if (dentry->d_name.len != len) {
- spin_unlock(&dentry->d_lock);
- kfree(name);
- goto retry;
- }
- memcpy(name, dentry->d_name.name, len);
- spin_unlock(&dentry->d_lock);
-
- name[len] = '\0';
- *ppath = name;
- *ppathlen = len;
- return 0;
+ dentry, d_count(dentry), base, *plen, path + pos);
+ return path + pos;
}
static int build_dentry_path(struct dentry *dentry, struct inode *dir,
const char **ppath, int *ppathlen, u64 *pino,
bool *pfreepath, bool parent_locked)
{
- int ret;
char *path;
rcu_read_lock();
if (!dir)
dir = d_inode_rcu(dentry->d_parent);
- if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
+ if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP) {
*pino = ceph_ino(dir);
rcu_read_unlock();
- if (parent_locked) {
- *ppath = dentry->d_name.name;
- *ppathlen = dentry->d_name.len;
- } else {
- ret = clone_dentry_name(dentry, ppath, ppathlen);
- if (ret)
- return ret;
- *pfreepath = true;
- }
+ *ppath = dentry->d_name.name;
+ *ppathlen = dentry->d_name.len;
return 0;
}
rcu_read_unlock();
(!!req->r_inode_drop + !!req->r_dentry_drop +
!!req->r_old_inode_drop + !!req->r_old_dentry_drop);
if (req->r_dentry_drop)
- len += req->r_dentry->d_name.len;
+ len += pathlen1;
if (req->r_old_dentry_drop)
- len += req->r_old_dentry->d_name.len;
+ len += pathlen2;
msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false);
if (!msg) {
out_free2:
if (freepath2)
- kfree((char *)path2);
+ ceph_mdsc_free_path((char *)path2, pathlen2);
out_free1:
if (freepath1)
- kfree((char *)path1);
+ ceph_mdsc_free_path((char *)path1, pathlen1);
out:
return msg;
}
{
if (req->r_callback)
req->r_callback(mdsc, req);
- else
- complete_all(&req->r_completion);
+ complete_all(&req->r_completion);
}
/*
}
}
-void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
+int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir,
struct ceph_mds_request *req)
-{
- dout("submit_request on %p\n", req);
- mutex_lock(&mdsc->mutex);
- __register_request(mdsc, req, NULL);
- __do_request(mdsc, req);
- mutex_unlock(&mdsc->mutex);
-}
-
-/*
- * Synchrously perform an mds request. Take care of all of the
- * session setup, forwarding, retry details.
- */
-int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
- struct inode *dir,
- struct ceph_mds_request *req)
{
int err;
- dout("do_request on %p\n", req);
-
/* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */
if (req->r_inode)
ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
CEPH_CAP_PIN);
- /* issue */
+ dout("submit_request on %p for inode %p\n", req, dir);
mutex_lock(&mdsc->mutex);
__register_request(mdsc, req, dir);
__do_request(mdsc, req);
+ err = req->r_err;
+ mutex_unlock(&mdsc->mutex);
+ return err;
+}
- if (req->r_err) {
- err = req->r_err;
- goto out;
- }
+static int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req)
+{
+ int err;
/* wait */
- mutex_unlock(&mdsc->mutex);
dout("do_request waiting\n");
if (!req->r_timeout && req->r_wait_for_completion) {
err = req->r_wait_for_completion(mdsc, req);
err = req->r_err;
}
-out:
mutex_unlock(&mdsc->mutex);
+ return err;
+}
+
+/*
+ * Synchrously perform an mds request. Take care of all of the
+ * session setup, forwarding, retry details.
+ */
+int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
+ struct inode *dir,
+ struct ceph_mds_request *req)
+{
+ int err;
+
+ dout("do_request on %p\n", req);
+
+ /* issue */
+ err = ceph_mdsc_submit_request(mdsc, dir, req);
+ if (!err)
+ err = ceph_mdsc_wait_request(mdsc, req);
dout("do_request %p done, result %d\n", req, err);
return err;
}
ceph_pagelist_encode_string(pagelist, path, pathlen);
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
out_freepath:
- kfree(path);
+ ceph_mdsc_free_path(path, pathlen);
}
out_err:
recon_state.msg_version = 2;
}
/* trsaverse this session's caps */
- err = iterate_session_caps(session, encode_caps_cb, &recon_state);
+ err = ceph_iterate_session_caps(session, encode_caps_cb, &recon_state);
spin_lock(&session->s_cap_lock);
session->s_cap_reconnect = 0;
mdsc->max_sessions = 0;
mdsc->stopping = 0;
atomic64_set(&mdsc->quotarealms_count, 0);
+ mdsc->quotarealms_inodes = RB_ROOT;
+ mutex_init(&mdsc->quotarealms_inodes_mutex);
mdsc->last_snap_seq = 0;
init_rwsem(&mdsc->snap_rwsem);
mdsc->snap_realms = RB_ROOT;
* their inode/dcache refs
*/
ceph_msgr_flush();
+
+ ceph_cleanup_quotarealms_inodes(mdsc);
}
/*
unsigned long last_used;
};
+/*
+ * node for list of quotarealm inodes that are not visible from the filesystem
+ * mountpoint, but required to handle, e.g. quotas.
+ */
+struct ceph_quotarealm_inode {
+ struct rb_node node;
+ u64 ino;
+ unsigned long timeout; /* last time a lookup failed for this inode */
+ struct mutex mutex;
+ struct inode *inode;
+};
+
/*
* mds client state
*/
int stopping; /* true if shutting down */
atomic64_t quotarealms_count; /* # realms with quota */
+ /*
+ * We keep a list of inodes we don't see in the mountpoint but that we
+ * need to track quota realms.
+ */
+ struct rb_root quotarealms_inodes;
+ struct mutex quotarealms_inodes_mutex;
/*
* snap_rwsem will cover cap linkage into snaprealms, and
struct inode *dir);
extern struct ceph_mds_request *
ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode);
-extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
- struct ceph_mds_request *req);
+extern int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
+ struct inode *dir,
+ struct ceph_mds_request *req);
extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
struct inode *dir,
struct ceph_mds_request *req);
struct ceph_mds_session *session);
extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc);
extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr);
+extern int ceph_iterate_session_caps(struct ceph_mds_session *session,
+ int (*cb)(struct inode *,
+ struct ceph_cap *, void *),
+ void *arg);
extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
+static inline void ceph_mdsc_free_path(char *path, int len)
+{
+ if (path)
+ __putname(path - (PATH_MAX - 1 - len));
+}
+
extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
int stop_on_nosnap);
dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n",
i+1, n, global_id, mds, inc,
- ceph_pr_addr(&addr.in_addr),
+ ceph_pr_addr(&addr),
ceph_mds_state_name(state));
if (mds < 0 || state <= 0)
static inline bool ceph_has_realms_with_quotas(struct inode *inode)
{
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
- return atomic64_read(&mdsc->quotarealms_count) > 0;
+ struct super_block *sb = mdsc->fsc->sb;
+
+ if (atomic64_read(&mdsc->quotarealms_count) > 0)
+ return true;
+ /* if root is the real CephFS root, we don't have quota realms */
+ if (sb->s_root->d_inode &&
+ (sb->s_root->d_inode->i_ino == CEPH_INO_ROOT))
+ return false;
+ /* otherwise, we can't know for sure */
+ return true;
}
void ceph_handle_quota(struct ceph_mds_client *mdsc,
iput(inode);
}
+static struct ceph_quotarealm_inode *
+find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino)
+{
+ struct ceph_quotarealm_inode *qri = NULL;
+ struct rb_node **node, *parent = NULL;
+
+ mutex_lock(&mdsc->quotarealms_inodes_mutex);
+ node = &(mdsc->quotarealms_inodes.rb_node);
+ while (*node) {
+ parent = *node;
+ qri = container_of(*node, struct ceph_quotarealm_inode, node);
+
+ if (ino < qri->ino)
+ node = &((*node)->rb_left);
+ else if (ino > qri->ino)
+ node = &((*node)->rb_right);
+ else
+ break;
+ }
+ if (!qri || (qri->ino != ino)) {
+ /* Not found, create a new one and insert it */
+ qri = kmalloc(sizeof(*qri), GFP_KERNEL);
+ if (qri) {
+ qri->ino = ino;
+ qri->inode = NULL;
+ qri->timeout = 0;
+ mutex_init(&qri->mutex);
+ rb_link_node(&qri->node, parent, node);
+ rb_insert_color(&qri->node, &mdsc->quotarealms_inodes);
+ } else
+ pr_warn("Failed to alloc quotarealms_inode\n");
+ }
+ mutex_unlock(&mdsc->quotarealms_inodes_mutex);
+
+ return qri;
+}
+
+/*
+ * This function will try to lookup a realm inode which isn't visible in the
+ * filesystem mountpoint. A list of these kind of inodes (not visible) is
+ * maintained in the mdsc and freed only when the filesystem is umounted.
+ *
+ * Note that these inodes are kept in this list even if the lookup fails, which
+ * allows to prevent useless lookup requests.
+ */
+static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
+ struct super_block *sb,
+ struct ceph_snap_realm *realm)
+{
+ struct ceph_quotarealm_inode *qri;
+ struct inode *in;
+
+ qri = find_quotarealm_inode(mdsc, realm->ino);
+ if (!qri)
+ return NULL;
+
+ mutex_lock(&qri->mutex);
+ if (qri->inode) {
+ /* A request has already returned the inode */
+ mutex_unlock(&qri->mutex);
+ return qri->inode;
+ }
+ /* Check if this inode lookup has failed recently */
+ if (qri->timeout &&
+ time_before_eq(jiffies, qri->timeout)) {
+ mutex_unlock(&qri->mutex);
+ return NULL;
+ }
+ in = ceph_lookup_inode(sb, realm->ino);
+ if (IS_ERR(in)) {
+ pr_warn("Can't lookup inode %llx (err: %ld)\n",
+ realm->ino, PTR_ERR(in));
+ qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */
+ } else {
+ qri->timeout = 0;
+ qri->inode = in;
+ }
+ mutex_unlock(&qri->mutex);
+
+ return in;
+}
+
+void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
+{
+ struct ceph_quotarealm_inode *qri;
+ struct rb_node *node;
+
+ /*
+ * It should now be safe to clean quotarealms_inode tree without holding
+ * mdsc->quotarealms_inodes_mutex...
+ */
+ mutex_lock(&mdsc->quotarealms_inodes_mutex);
+ while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) {
+ node = rb_first(&mdsc->quotarealms_inodes);
+ qri = rb_entry(node, struct ceph_quotarealm_inode, node);
+ rb_erase(node, &mdsc->quotarealms_inodes);
+ iput(qri->inode);
+ kfree(qri);
+ }
+ mutex_unlock(&mdsc->quotarealms_inodes_mutex);
+}
+
/*
* This function walks through the snaprealm for an inode and returns the
* ceph_snap_realm for the first snaprealm that has quotas set (either max_files
*
* Note that the caller is responsible for calling ceph_put_snap_realm() on the
* returned realm.
+ *
+ * Callers of this function need to hold mdsc->snap_rwsem. However, if there's
+ * a need to do an inode lookup, this rwsem will be temporarily dropped. Hence
+ * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false'
+ * this function will return -EAGAIN; otherwise, the snaprealms walk-through
+ * will be restarted.
*/
static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
- struct inode *inode)
+ struct inode *inode, bool retry)
{
struct ceph_inode_info *ci = NULL;
struct ceph_snap_realm *realm, *next;
if (ceph_snap(inode) != CEPH_NOSNAP)
return NULL;
+restart:
realm = ceph_inode(inode)->i_snap_realm;
if (realm)
ceph_get_snap_realm(mdsc, realm);
pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) "
"null i_snap_realm\n", ceph_vinop(inode));
while (realm) {
+ bool has_inode;
+
spin_lock(&realm->inodes_with_caps_lock);
- in = realm->inode ? igrab(realm->inode) : NULL;
+ has_inode = realm->inode;
+ in = has_inode ? igrab(realm->inode) : NULL;
spin_unlock(&realm->inodes_with_caps_lock);
- if (!in)
+ if (has_inode && !in)
break;
+ if (!in) {
+ up_read(&mdsc->snap_rwsem);
+ in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
+ down_read(&mdsc->snap_rwsem);
+ if (IS_ERR_OR_NULL(in))
+ break;
+ ceph_put_snap_realm(mdsc, realm);
+ if (!retry)
+ return ERR_PTR(-EAGAIN);
+ goto restart;
+ }
ci = ceph_inode(in);
has_quota = __ceph_has_any_quota(ci);
struct ceph_snap_realm *old_realm, *new_realm;
bool is_same;
+restart:
+ /*
+ * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem.
+ * However, get_quota_realm may drop it temporarily. By setting the
+ * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was
+ * dropped and we can then restart the whole operation.
+ */
down_read(&mdsc->snap_rwsem);
- old_realm = get_quota_realm(mdsc, old);
- new_realm = get_quota_realm(mdsc, new);
+ old_realm = get_quota_realm(mdsc, old, true);
+ new_realm = get_quota_realm(mdsc, new, false);
+ if (PTR_ERR(new_realm) == -EAGAIN) {
+ up_read(&mdsc->snap_rwsem);
+ if (old_realm)
+ ceph_put_snap_realm(mdsc, old_realm);
+ goto restart;
+ }
is_same = (old_realm == new_realm);
up_read(&mdsc->snap_rwsem);
return false;
down_read(&mdsc->snap_rwsem);
+restart:
realm = ceph_inode(inode)->i_snap_realm;
if (realm)
ceph_get_snap_realm(mdsc, realm);
pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) "
"null i_snap_realm\n", ceph_vinop(inode));
while (realm) {
+ bool has_inode;
+
spin_lock(&realm->inodes_with_caps_lock);
- in = realm->inode ? igrab(realm->inode) : NULL;
+ has_inode = realm->inode;
+ in = has_inode ? igrab(realm->inode) : NULL;
spin_unlock(&realm->inodes_with_caps_lock);
- if (!in)
+ if (has_inode && !in)
break;
-
+ if (!in) {
+ up_read(&mdsc->snap_rwsem);
+ in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
+ down_read(&mdsc->snap_rwsem);
+ if (IS_ERR_OR_NULL(in))
+ break;
+ ceph_put_snap_realm(mdsc, realm);
+ goto restart;
+ }
ci = ceph_inode(in);
spin_lock(&ci->i_ceph_lock);
if (op == QUOTA_CHECK_MAX_FILES_OP) {
bool is_updated = false;
down_read(&mdsc->snap_rwsem);
- realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root));
+ realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), true);
up_read(&mdsc->snap_rwsem);
if (!realm)
return false;
return;
}
+static int ceph_remount(struct super_block *sb, int *flags, char *data)
+{
+ sync_filesystem(sb);
+ return 0;
+}
+
static const struct super_operations ceph_super_ops = {
.alloc_inode = ceph_alloc_inode,
.destroy_inode = ceph_destroy_inode,
.drop_inode = ceph_drop_inode,
.sync_fs = ceph_sync_fs,
.put_super = ceph_put_super,
+ .remount_fs = ceph_remount,
.show_options = ceph_show_options,
.statfs = ceph_statfs,
.umount_begin = ceph_umount_begin,
/* export.c */
extern const struct export_operations ceph_export_ops;
+struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino);
/* locks.c */
extern __init void ceph_flock_init(void);
loff_t newlen);
extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
struct kstatfs *buf);
+extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
#endif /* _FS_CEPH_SUPER_H */
goto name_is_IP_address;
/* Perform the upcall */
- rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL);
+ rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL, false);
if (rc < 0)
cifs_dbg(FYI, "%s: unable to resolve: %*.*s\n",
__func__, len, len, hostname);
inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
ret = create_default_group(parent_group, group);
- if (!ret) {
- spin_lock(&configfs_dirent_lock);
- configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
- spin_unlock(&configfs_dirent_lock);
- }
+ if (ret)
+ goto err_out;
+
+ spin_lock(&configfs_dirent_lock);
+ configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
+ spin_unlock(&configfs_dirent_lock);
+ inode_unlock(d_inode(parent));
+ return 0;
+err_out:
inode_unlock(d_inode(parent));
+ mutex_lock(&subsys->su_mutex);
+ unlink_group(group);
+ mutex_unlock(&subsys->su_mutex);
return ret;
}
EXPORT_SYMBOL(configfs_register_group);
char *ip_addr = NULL;
int ip_len;
- ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL);
+ ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL, false);
if (ip_len > 0)
ret = rpc_pton(net, ip_addr, ip_len, sa, salen);
else
+++ /dev/null
-#ifndef __ASM_GENERIC_SEGMENT_H
-#define __ASM_GENERIC_SEGMENT_H
-/*
- * Only here because we have some old header files that expect it...
- *
- * New architectures probably don't want to have their own version.
- */
-
-#endif /* __ASM_GENERIC_SEGMENT_H */
*/
#include <linux/string.h>
-#include <asm/segment.h>
+#ifdef CONFIG_UACCESS_MEMCPY
+static inline __must_check unsigned long
+raw_copy_from_user(void *to, const void __user * from, unsigned long n)
+{
+ if (__builtin_constant_p(n)) {
+ switch(n) {
+ case 1:
+ *(u8 *)to = *(u8 __force *)from;
+ return 0;
+ case 2:
+ *(u16 *)to = *(u16 __force *)from;
+ return 0;
+ case 4:
+ *(u32 *)to = *(u32 __force *)from;
+ return 0;
+#ifdef CONFIG_64BIT
+ case 8:
+ *(u64 *)to = *(u64 __force *)from;
+ return 0;
+#endif
+ }
+ }
+
+ memcpy(to, (const void __force *)from, n);
+ return 0;
+}
+
+static inline __must_check unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ if (__builtin_constant_p(n)) {
+ switch(n) {
+ case 1:
+ *(u8 __force *)to = *(u8 *)from;
+ return 0;
+ case 2:
+ *(u16 __force *)to = *(u16 *)from;
+ return 0;
+ case 4:
+ *(u32 __force *)to = *(u32 *)from;
+ return 0;
+#ifdef CONFIG_64BIT
+ case 8:
+ *(u64 __force *)to = *(u64 *)from;
+ return 0;
+#endif
+ default:
+ break;
+ }
+ }
+
+ memcpy((void __force *)to, from, n);
+ return 0;
+}
+#define INLINE_COPY_FROM_USER
+#define INLINE_COPY_TO_USER
+#endif /* CONFIG_UACCESS_MEMCPY */
#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
__le64 length; /* num bytes to lock from start */
__u8 wait; /* will caller wait for lock to become available? */
} __attribute__ ((packed)) filelock_change;
+ struct {
+ __le32 mask; /* CEPH_CAP_* */
+ __le64 snapid;
+ __le64 parent;
+ __le32 hash;
+ } __attribute__ ((packed)) lookupino;
} __attribute__ ((packed));
#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */
};
-extern const char *ceph_pr_addr(const struct sockaddr_storage *ss);
+extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr);
+
extern int ceph_parse_ips(const char *c, const char *end,
struct ceph_entity_addr *addr,
int max_count, int *count);
int name_len;
};
+#define __CEPH_OID_INITIALIZER(oid) { .name = (oid).inline_name }
+
+#define CEPH_DEFINE_OID_ONSTACK(oid) \
+ struct ceph_object_id oid = __CEPH_OID_INITIALIZER(oid)
+
static inline void ceph_oid_init(struct ceph_object_id *oid)
{
- oid->name = oid->inline_name;
- oid->name_len = 0;
+ *oid = (struct ceph_object_id) __CEPH_OID_INITIALIZER(*oid);
}
-#define CEPH_OID_INIT_ONSTACK(oid) \
- ({ ceph_oid_init(&oid); oid; })
-#define CEPH_DEFINE_OID_ONSTACK(oid) \
- struct ceph_object_id oid = CEPH_OID_INIT_ONSTACK(oid)
-
static inline bool ceph_oid_empty(const struct ceph_object_id *oid)
{
return oid->name == oid->inline_name && !oid->name_len;
struct request *rq,
union map_info *map_context,
struct request **clone);
-typedef void (*dm_release_clone_request_fn) (struct request *clone);
+typedef void (*dm_release_clone_request_fn) (struct request *clone,
+ union map_info *map_context);
/*
* Returns:
#include <uapi/linux/dns_resolver.h>
extern int dns_query(const char *type, const char *name, size_t namelen,
- const char *options, char **_result, time64_t *_expiry);
+ const char *options, char **_result, time64_t *_expiry,
+ bool invalidate);
#endif /* _LINUX_DNS_RESOLVER_H */
struct hlist_node *prev)
{
n->next = prev->next;
- WRITE_ONCE(prev->next, n);
+ prev->next = n;
n->pprev = &prev->next;
if (n->next)
hlist_bl_set_first(h, n);
}
+static inline void hlist_bl_add_before(struct hlist_bl_node *n,
+ struct hlist_bl_node *next)
+{
+ struct hlist_bl_node **pprev = next->pprev;
+
+ n->pprev = pprev;
+ n->next = next;
+ next->pprev = &n->next;
+
+ /* pprev may be `first`, so be careful not to lose the lock bit */
+ WRITE_ONCE(*pprev,
+ (struct hlist_bl_node *)
+ ((uintptr_t)n | ((uintptr_t)*pprev & LIST_BL_LOCKMASK)));
+}
+
+static inline void hlist_bl_add_behind(struct hlist_bl_node *n,
+ struct hlist_bl_node *prev)
+{
+ n->next = prev->next;
+ n->pprev = &prev->next;
+ prev->next = n;
+
+ if (n->next)
+ n->next->pprev = &n->next;
+}
+
static inline void __hlist_bl_del(struct hlist_bl_node *n)
{
struct hlist_bl_node *next = n->next;
return bytes;
}
-static inline __must_check size_t __ab_c_size(size_t n, size_t size, size_t c)
+/*
+ * Compute a*b+c, returning SIZE_MAX on overflow. Internal helper for
+ * struct_size() below.
+ */
+static inline __must_check size_t __ab_c_size(size_t a, size_t b, size_t c)
{
size_t bytes;
- if (check_mul_overflow(n, size, &bytes))
+ if (check_mul_overflow(a, b, &bytes))
return SIZE_MAX;
if (check_add_overflow(bytes, c, &bytes))
return SIZE_MAX;
atomic_t allocmiss;
atomic_t freehit;
atomic_t freemiss;
-#ifdef CONFIG_DEBUG_SLAB_LEAK
- atomic_t store_user_clean;
-#endif
/*
* If debugging is enabled, then the allocator can add additional
enum thermal_notify_event);
void thermal_zone_set_trips(struct thermal_zone_device *);
-struct thermal_cooling_device *thermal_cooling_device_register(char *, void *,
- const struct thermal_cooling_device_ops *);
+struct thermal_cooling_device *thermal_cooling_device_register(const char *,
+ void *, const struct thermal_cooling_device_ops *);
struct thermal_cooling_device *
-thermal_of_cooling_device_register(struct device_node *np, char *, void *,
+thermal_of_cooling_device_register(struct device_node *np, const char *, void *,
const struct thermal_cooling_device_ops *);
struct thermal_cooling_device *
devm_thermal_of_cooling_device_register(struct device *dev,
__ret; \
})
+#define __wait_var_event_interruptible(var, condition) \
+ ___wait_var_event(var, condition, TASK_INTERRUPTIBLE, 0, 0, \
+ schedule())
+
+#define wait_var_event_interruptible(var, condition) \
+({ \
+ int __ret = 0; \
+ might_sleep(); \
+ if (!(condition)) \
+ __ret = __wait_var_event_interruptible(var, condition); \
+ __ret; \
+})
+
/**
* clear_and_wake_up_bit - clear a bit and wake up anyone waiting on that bit
*
struct encoder_config_info *ext_encoders;
/* amplifier information goes here */
struct amp_config_info *amp;
- int num_outputs;
+ unsigned int num_outputs;
/* Order is venc outputs followed by LCD and then external encoders */
struct vpbe_output *outputs;
};
gfp_t,
rxrpc_notify_rx_t,
bool,
+ bool,
unsigned int);
int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
struct msghdr *, size_t,
bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *,
ktime_t *);
bool rxrpc_kernel_call_is_complete(struct rxrpc_call *);
+void rxrpc_kernel_set_max_life(struct socket *, struct rxrpc_call *,
+ unsigned long);
#endif /* _NET_RXRPC_H */
{
struct rwsem_waiter *waiter, *tmp;
long oldcount, woken = 0, adjustment = 0;
+ struct list_head wlist;
/*
* Take a peek at the queue head waiter such that we can determine
* of the queue. We know that woken will be at least 1 as we accounted
* for above. Note we increment the 'active part' of the count by the
* number of readers before waking any processes up.
+ *
+ * We have to do wakeup in 2 passes to prevent the possibility that
+ * the reader count may be decremented before it is incremented. It
+ * is because the to-be-woken waiter may not have slept yet. So it
+ * may see waiter->task got cleared, finish its critical section and
+ * do an unlock before the reader count increment.
+ *
+ * 1) Collect the read-waiters in a separate list, count them and
+ * fully increment the reader count in rwsem.
+ * 2) For each waiters in the new list, clear waiter->task and
+ * put them into wake_q to be woken up later.
*/
- list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
- struct task_struct *tsk;
-
+ list_for_each_entry(waiter, &sem->wait_list, list) {
if (waiter->type == RWSEM_WAITING_FOR_WRITE)
break;
woken++;
- tsk = waiter->task;
+ }
+ list_cut_before(&wlist, &sem->wait_list, &waiter->list);
+
+ adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
+ lockevent_cond_inc(rwsem_wake_reader, woken);
+ if (list_empty(&sem->wait_list)) {
+ /* hit end of list above */
+ adjustment -= RWSEM_WAITING_BIAS;
+ }
+
+ if (adjustment)
+ atomic_long_add(adjustment, &sem->count);
+
+ /* 2nd pass */
+ list_for_each_entry_safe(waiter, tmp, &wlist, list) {
+ struct task_struct *tsk;
+ tsk = waiter->task;
get_task_struct(tsk);
- list_del(&waiter->list);
+
/*
* Ensure calling get_task_struct() before setting the reader
* waiter to nil such that rwsem_down_read_failed() cannot
*/
wake_q_add_safe(wake_q, tsk);
}
-
- adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
- lockevent_cond_inc(rwsem_wake_reader, woken);
- if (list_empty(&sem->wait_list)) {
- /* hit end of list above */
- adjustment -= RWSEM_WAITING_BIAS;
- }
-
- if (adjustment)
- atomic_long_add(adjustment, &sem->count);
}
/*
time_constant = max(time_constant, 0l);
}
- if (txc->modes & ADJ_TAI && txc->constant > 0)
+ if (txc->modes & ADJ_TAI && txc->constant >= 0)
*time_tai = txc->constant;
if (txc->modes & ADJ_OFFSET)
config ARCH_HAS_PMEM_API
bool
+# use memcpy to implement user copies for nommu architectures
+config UACCESS_MEMCPY
+ bool
+
config ARCH_HAS_UACCESS_FLUSHCACHE
bool
allocation as well as poisoning memory on free to catch use of freed
memory. This can make kmalloc/kfree-intensive workloads much slower.
-config DEBUG_SLAB_LEAK
- bool "Memory leak debugging"
- depends on DEBUG_SLAB
-
config SLUB_DEBUG_ON
bool "SLUB debugging on by default"
depends on SLUB && SLUB_DEBUG
* The Hamming Weight of a number is the total number of bits set in it.
*/
-#ifndef __HAVE_ARCH_SW_HWEIGHT
unsigned int __sw_hweight32(unsigned int w)
{
#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
#endif
}
EXPORT_SYMBOL(__sw_hweight32);
-#endif
unsigned int __sw_hweight16(unsigned int w)
{
}
EXPORT_SYMBOL(__sw_hweight8);
-#ifndef __HAVE_ARCH_SW_HWEIGHT
unsigned long __sw_hweight64(__u64 w)
{
#if BITS_PER_LONG == 32
#endif
}
EXPORT_SYMBOL(__sw_hweight64);
-#endif
#endif
-#ifdef CONFIG_DEBUG_SLAB_LEAK
-
-static inline bool is_store_user_clean(struct kmem_cache *cachep)
-{
- return atomic_read(&cachep->store_user_clean) == 1;
-}
-
-static inline void set_store_user_clean(struct kmem_cache *cachep)
-{
- atomic_set(&cachep->store_user_clean, 1);
-}
-
-static inline void set_store_user_dirty(struct kmem_cache *cachep)
-{
- if (is_store_user_clean(cachep))
- atomic_set(&cachep->store_user_clean, 0);
-}
-
-#else
-static inline void set_store_user_dirty(struct kmem_cache *cachep) {}
-
-#endif
-
/*
* Do not go above this order unless 0 objects fit into the slab or
* overridden on the command line.
objp = index_to_obj(cachep, page, get_free_obj(page, page->active));
page->active++;
-#if DEBUG
- if (cachep->flags & SLAB_STORE_USER)
- set_store_user_dirty(cachep);
-#endif
-
return objp;
}
*dbg_redzone1(cachep, objp) = RED_INACTIVE;
*dbg_redzone2(cachep, objp) = RED_INACTIVE;
}
- if (cachep->flags & SLAB_STORE_USER) {
- set_store_user_dirty(cachep);
+ if (cachep->flags & SLAB_STORE_USER)
*dbg_userword(cachep, objp) = (void *)caller;
- }
objnr = obj_to_index(cachep, page, objp);
return res;
}
-#ifdef CONFIG_DEBUG_SLAB_LEAK
-
-static inline int add_caller(unsigned long *n, unsigned long v)
-{
- unsigned long *p;
- int l;
- if (!v)
- return 1;
- l = n[1];
- p = n + 2;
- while (l) {
- int i = l/2;
- unsigned long *q = p + 2 * i;
- if (*q == v) {
- q[1]++;
- return 1;
- }
- if (*q > v) {
- l = i;
- } else {
- p = q + 2;
- l -= i + 1;
- }
- }
- if (++n[1] == n[0])
- return 0;
- memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));
- p[0] = v;
- p[1] = 1;
- return 1;
-}
-
-static void handle_slab(unsigned long *n, struct kmem_cache *c,
- struct page *page)
-{
- void *p;
- int i, j;
- unsigned long v;
-
- if (n[0] == n[1])
- return;
- for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) {
- bool active = true;
-
- for (j = page->active; j < c->num; j++) {
- if (get_free_obj(page, j) == i) {
- active = false;
- break;
- }
- }
-
- if (!active)
- continue;
-
- /*
- * probe_kernel_read() is used for DEBUG_PAGEALLOC. page table
- * mapping is established when actual object allocation and
- * we could mistakenly access the unmapped object in the cpu
- * cache.
- */
- if (probe_kernel_read(&v, dbg_userword(c, p), sizeof(v)))
- continue;
-
- if (!add_caller(n, v))
- return;
- }
-}
-
-static void show_symbol(struct seq_file *m, unsigned long address)
-{
-#ifdef CONFIG_KALLSYMS
- unsigned long offset, size;
- char modname[MODULE_NAME_LEN], name[KSYM_NAME_LEN];
-
- if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {
- seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
- if (modname[0])
- seq_printf(m, " [%s]", modname);
- return;
- }
-#endif
- seq_printf(m, "%px", (void *)address);
-}
-
-static int leaks_show(struct seq_file *m, void *p)
-{
- struct kmem_cache *cachep = list_entry(p, struct kmem_cache,
- root_caches_node);
- struct page *page;
- struct kmem_cache_node *n;
- const char *name;
- unsigned long *x = m->private;
- int node;
- int i;
-
- if (!(cachep->flags & SLAB_STORE_USER))
- return 0;
- if (!(cachep->flags & SLAB_RED_ZONE))
- return 0;
-
- /*
- * Set store_user_clean and start to grab stored user information
- * for all objects on this cache. If some alloc/free requests comes
- * during the processing, information would be wrong so restart
- * whole processing.
- */
- do {
- drain_cpu_caches(cachep);
- /*
- * drain_cpu_caches() could make kmemleak_object and
- * debug_objects_cache dirty, so reset afterwards.
- */
- set_store_user_clean(cachep);
-
- x[1] = 0;
-
- for_each_kmem_cache_node(cachep, node, n) {
-
- check_irq_on();
- spin_lock_irq(&n->list_lock);
-
- list_for_each_entry(page, &n->slabs_full, slab_list)
- handle_slab(x, cachep, page);
- list_for_each_entry(page, &n->slabs_partial, slab_list)
- handle_slab(x, cachep, page);
- spin_unlock_irq(&n->list_lock);
- }
- } while (!is_store_user_clean(cachep));
-
- name = cachep->name;
- if (x[0] == x[1]) {
- /* Increase the buffer size */
- mutex_unlock(&slab_mutex);
- m->private = kcalloc(x[0] * 4, sizeof(unsigned long),
- GFP_KERNEL);
- if (!m->private) {
- /* Too bad, we are really out */
- m->private = x;
- mutex_lock(&slab_mutex);
- return -ENOMEM;
- }
- *(unsigned long *)m->private = x[0] * 2;
- kfree(x);
- mutex_lock(&slab_mutex);
- /* Now make sure this entry will be retried */
- m->count = m->size;
- return 0;
- }
- for (i = 0; i < x[1]; i++) {
- seq_printf(m, "%s: %lu ", name, x[2*i+3]);
- show_symbol(m, x[2*i+2]);
- seq_putc(m, '\n');
- }
-
- return 0;
-}
-
-static const struct seq_operations slabstats_op = {
- .start = slab_start,
- .next = slab_next,
- .stop = slab_stop,
- .show = leaks_show,
-};
-
-static int slabstats_open(struct inode *inode, struct file *file)
-{
- unsigned long *n;
-
- n = __seq_open_private(file, &slabstats_op, PAGE_SIZE);
- if (!n)
- return -ENOMEM;
-
- *n = PAGE_SIZE / (2 * sizeof(unsigned long));
-
- return 0;
-}
-
-static const struct file_operations proc_slabstats_operations = {
- .open = slabstats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-#endif
-
-static int __init slab_proc_init(void)
-{
-#ifdef CONFIG_DEBUG_SLAB_LEAK
- proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
-#endif
- return 0;
-}
-module_init(slab_proc_init);
-
#ifdef CONFIG_HARDENED_USERCOPY
/*
* Rejects incorrectly sized objects and objects that are to be copied
dout("%s %s%llu cookie %s addr %s\n", __func__,
ENTITY_NAME(locker->id.name), locker->id.cookie,
- ceph_pr_addr(&locker->info.addr.in_addr));
+ ceph_pr_addr(&locker->info.addr));
return 0;
}
seq_printf(s, "\t%s%lld\t%s\n",
ENTITY_NAME(inst->name),
- ceph_pr_addr(&inst->addr.in_addr));
+ ceph_pr_addr(&inst->addr));
}
return 0;
}
char sb[64];
seq_printf(s, "osd%d\t%s\t%3d%%\t(%s)\t%3d%%\n",
- i, ceph_pr_addr(&addr->in_addr),
+ i, ceph_pr_addr(addr),
((map->osd_weight[i]*100) >> 16),
ceph_osdmap_state_str(sb, sizeof(sb), state),
((ceph_get_primary_affinity(map, i)*100) >> 16));
static struct page *zero_page; /* used in certain error cases */
-const char *ceph_pr_addr(const struct sockaddr_storage *ss)
+const char *ceph_pr_addr(const struct ceph_entity_addr *addr)
{
int i;
char *s;
- struct sockaddr_in *in4 = (struct sockaddr_in *) ss;
- struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss;
+ struct sockaddr_storage ss = addr->in_addr; /* align */
+ struct sockaddr_in *in4 = (struct sockaddr_in *)&ss;
+ struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)&ss;
i = atomic_inc_return(&addr_str_seq) & ADDR_STR_COUNT_MASK;
s = addr_str[i];
- switch (ss->ss_family) {
+ switch (ss.ss_family) {
case AF_INET:
snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%hu", &in4->sin_addr,
ntohs(in4->sin_port));
default:
snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %hu)",
- ss->ss_family);
+ ss.ss_family);
}
return s;
*/
static int ceph_tcp_connect(struct ceph_connection *con)
{
- struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
+ struct sockaddr_storage ss = con->peer_addr.in_addr; /* align */
struct socket *sock;
unsigned int noio_flag;
int ret;
/* sock_create_kern() allocates with GFP_KERNEL */
noio_flag = memalloc_noio_save();
- ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family,
+ ret = sock_create_kern(read_pnet(&con->msgr->net), ss.ss_family,
SOCK_STREAM, IPPROTO_TCP, &sock);
memalloc_noio_restore(noio_flag);
if (ret)
set_sock_callbacks(sock, con);
- dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
+ dout("connect %s\n", ceph_pr_addr(&con->peer_addr));
con_sock_state_connecting(con);
- ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
+ ret = sock->ops->connect(sock, (struct sockaddr *)&ss, sizeof(ss),
O_NONBLOCK);
if (ret == -EINPROGRESS) {
dout("connect %s EINPROGRESS sk_state = %u\n",
- ceph_pr_addr(&con->peer_addr.in_addr),
+ ceph_pr_addr(&con->peer_addr),
sock->sk->sk_state);
} else if (ret < 0) {
pr_err("connect %s error %d\n",
- ceph_pr_addr(&con->peer_addr.in_addr), ret);
+ ceph_pr_addr(&con->peer_addr), ret);
sock_release(sock);
return ret;
}
void ceph_con_close(struct ceph_connection *con)
{
mutex_lock(&con->mutex);
- dout("con_close %p peer %s\n", con,
- ceph_pr_addr(&con->peer_addr.in_addr));
+ dout("con_close %p peer %s\n", con, ceph_pr_addr(&con->peer_addr));
con->state = CON_STATE_CLOSED;
con_flag_clear(con, CON_FLAG_LOSSYTX); /* so we retry next connect */
struct ceph_entity_addr *addr)
{
mutex_lock(&con->mutex);
- dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
+ dout("con_open %p %s\n", con, ceph_pr_addr(addr));
WARN_ON(con->state != CON_STATE_CLOSED);
con->state = CON_STATE_PREOPEN;
{
if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) {
pr_err("connect to %s got bad banner\n",
- ceph_pr_addr(&con->peer_addr.in_addr));
+ ceph_pr_addr(&con->peer_addr));
con->error_msg = "protocol error, bad banner";
return -1;
}
return 0;
}
-static bool addr_is_blank(struct sockaddr_storage *ss)
+static bool addr_is_blank(struct ceph_entity_addr *addr)
{
- struct in_addr *addr = &((struct sockaddr_in *)ss)->sin_addr;
- struct in6_addr *addr6 = &((struct sockaddr_in6 *)ss)->sin6_addr;
+ struct sockaddr_storage ss = addr->in_addr; /* align */
+ struct in_addr *addr4 = &((struct sockaddr_in *)&ss)->sin_addr;
+ struct in6_addr *addr6 = &((struct sockaddr_in6 *)&ss)->sin6_addr;
- switch (ss->ss_family) {
+ switch (ss.ss_family) {
case AF_INET:
- return addr->s_addr == htonl(INADDR_ANY);
+ return addr4->s_addr == htonl(INADDR_ANY);
case AF_INET6:
return ipv6_addr_any(addr6);
default:
}
}
-static int addr_port(struct sockaddr_storage *ss)
+static int addr_port(struct ceph_entity_addr *addr)
{
- switch (ss->ss_family) {
+ switch (get_unaligned(&addr->in_addr.ss_family)) {
case AF_INET:
- return ntohs(((struct sockaddr_in *)ss)->sin_port);
+ return ntohs(get_unaligned(&((struct sockaddr_in *)&addr->in_addr)->sin_port));
case AF_INET6:
- return ntohs(((struct sockaddr_in6 *)ss)->sin6_port);
+ return ntohs(get_unaligned(&((struct sockaddr_in6 *)&addr->in_addr)->sin6_port));
}
return 0;
}
-static void addr_set_port(struct sockaddr_storage *ss, int p)
+static void addr_set_port(struct ceph_entity_addr *addr, int p)
{
- switch (ss->ss_family) {
+ switch (get_unaligned(&addr->in_addr.ss_family)) {
case AF_INET:
- ((struct sockaddr_in *)ss)->sin_port = htons(p);
+ put_unaligned(htons(p), &((struct sockaddr_in *)&addr->in_addr)->sin_port);
break;
case AF_INET6:
- ((struct sockaddr_in6 *)ss)->sin6_port = htons(p);
+ put_unaligned(htons(p), &((struct sockaddr_in6 *)&addr->in_addr)->sin6_port);
break;
}
}
/*
* Unlike other *_pton function semantics, zero indicates success.
*/
-static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss,
+static int ceph_pton(const char *str, size_t len, struct ceph_entity_addr *addr,
char delim, const char **ipend)
{
- struct sockaddr_in *in4 = (struct sockaddr_in *) ss;
- struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss;
+ memset(&addr->in_addr, 0, sizeof(addr->in_addr));
- memset(ss, 0, sizeof(*ss));
-
- if (in4_pton(str, len, (u8 *)&in4->sin_addr.s_addr, delim, ipend)) {
- ss->ss_family = AF_INET;
+ if (in4_pton(str, len, (u8 *)&((struct sockaddr_in *)&addr->in_addr)->sin_addr.s_addr, delim, ipend)) {
+ put_unaligned(AF_INET, &addr->in_addr.ss_family);
return 0;
}
- if (in6_pton(str, len, (u8 *)&in6->sin6_addr.s6_addr, delim, ipend)) {
- ss->ss_family = AF_INET6;
+ if (in6_pton(str, len, (u8 *)&((struct sockaddr_in6 *)&addr->in_addr)->sin6_addr.s6_addr, delim, ipend)) {
+ put_unaligned(AF_INET6, &addr->in_addr.ss_family);
return 0;
}
*/
#ifdef CONFIG_CEPH_LIB_USE_DNS_RESOLVER
static int ceph_dns_resolve_name(const char *name, size_t namelen,
- struct sockaddr_storage *ss, char delim, const char **ipend)
+ struct ceph_entity_addr *addr, char delim, const char **ipend)
{
const char *end, *delim_p;
char *colon_p, *ip_addr = NULL;
return -EINVAL;
/* do dns_resolve upcall */
- ip_len = dns_query(NULL, name, end - name, NULL, &ip_addr, NULL);
+ ip_len = dns_query(NULL, name, end - name, NULL, &ip_addr, NULL, false);
if (ip_len > 0)
- ret = ceph_pton(ip_addr, ip_len, ss, -1, NULL);
+ ret = ceph_pton(ip_addr, ip_len, addr, -1, NULL);
else
ret = -ESRCH;
*ipend = end;
pr_info("resolve '%.*s' (ret=%d): %s\n", (int)(end - name), name,
- ret, ret ? "failed" : ceph_pr_addr(ss));
+ ret, ret ? "failed" : ceph_pr_addr(addr));
return ret;
}
#else
static inline int ceph_dns_resolve_name(const char *name, size_t namelen,
- struct sockaddr_storage *ss, char delim, const char **ipend)
+ struct ceph_entity_addr *addr, char delim, const char **ipend)
{
return -EINVAL;
}
* then try to extract a hostname to resolve using userspace DNS upcall.
*/
static int ceph_parse_server_name(const char *name, size_t namelen,
- struct sockaddr_storage *ss, char delim, const char **ipend)
+ struct ceph_entity_addr *addr, char delim, const char **ipend)
{
int ret;
- ret = ceph_pton(name, namelen, ss, delim, ipend);
+ ret = ceph_pton(name, namelen, addr, delim, ipend);
if (ret)
- ret = ceph_dns_resolve_name(name, namelen, ss, delim, ipend);
+ ret = ceph_dns_resolve_name(name, namelen, addr, delim, ipend);
return ret;
}
dout("parse_ips on '%.*s'\n", (int)(end-c), c);
for (i = 0; i < max_count; i++) {
const char *ipend;
- struct sockaddr_storage *ss = &addr[i].in_addr;
int port;
char delim = ',';
p++;
}
- ret = ceph_parse_server_name(p, end - p, ss, delim, &ipend);
+ ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend);
if (ret)
goto bad;
ret = -EINVAL;
port = CEPH_MON_PORT;
}
- addr_set_port(ss, port);
+ addr_set_port(&addr[i], port);
- dout("parse_ips got %s\n", ceph_pr_addr(ss));
+ dout("parse_ips got %s\n", ceph_pr_addr(&addr[i]));
if (p == end)
break;
*/
if (memcmp(&con->peer_addr, &con->actual_peer_addr,
sizeof(con->peer_addr)) != 0 &&
- !(addr_is_blank(&con->actual_peer_addr.in_addr) &&
+ !(addr_is_blank(&con->actual_peer_addr) &&
con->actual_peer_addr.nonce == con->peer_addr.nonce)) {
pr_warn("wrong peer, want %s/%d, got %s/%d\n",
- ceph_pr_addr(&con->peer_addr.in_addr),
+ ceph_pr_addr(&con->peer_addr),
(int)le32_to_cpu(con->peer_addr.nonce),
- ceph_pr_addr(&con->actual_peer_addr.in_addr),
+ ceph_pr_addr(&con->actual_peer_addr),
(int)le32_to_cpu(con->actual_peer_addr.nonce));
con->error_msg = "wrong peer at address";
return -1;
/*
* did we learn our address?
*/
- if (addr_is_blank(&con->msgr->inst.addr.in_addr)) {
- int port = addr_port(&con->msgr->inst.addr.in_addr);
+ if (addr_is_blank(&con->msgr->inst.addr)) {
+ int port = addr_port(&con->msgr->inst.addr);
memcpy(&con->msgr->inst.addr.in_addr,
&con->peer_addr_for_me.in_addr,
sizeof(con->peer_addr_for_me.in_addr));
- addr_set_port(&con->msgr->inst.addr.in_addr, port);
+ addr_set_port(&con->msgr->inst.addr, port);
encode_my_addr(con->msgr);
dout("process_banner learned my addr is %s\n",
- ceph_pr_addr(&con->msgr->inst.addr.in_addr));
+ ceph_pr_addr(&con->msgr->inst.addr));
}
return 0;
pr_err("%s%lld %s feature set mismatch,"
" my %llx < server's %llx, missing %llx\n",
ENTITY_NAME(con->peer_name),
- ceph_pr_addr(&con->peer_addr.in_addr),
+ ceph_pr_addr(&con->peer_addr),
sup_feat, server_feat, server_feat & ~sup_feat);
con->error_msg = "missing required protocol features";
reset_connection(con);
pr_err("%s%lld %s protocol version mismatch,"
" my %d != server's %d\n",
ENTITY_NAME(con->peer_name),
- ceph_pr_addr(&con->peer_addr.in_addr),
+ ceph_pr_addr(&con->peer_addr),
le32_to_cpu(con->out_connect.protocol_version),
le32_to_cpu(con->in_reply.protocol_version));
con->error_msg = "protocol version mismatch";
le32_to_cpu(con->in_reply.connect_seq));
pr_err("%s%lld %s connection reset\n",
ENTITY_NAME(con->peer_name),
- ceph_pr_addr(&con->peer_addr.in_addr));
+ ceph_pr_addr(&con->peer_addr));
reset_connection(con);
con_out_kvec_reset(con);
ret = prepare_write_connect(con);
pr_err("%s%lld %s protocol feature mismatch,"
" my required %llx > server's %llx, need %llx\n",
ENTITY_NAME(con->peer_name),
- ceph_pr_addr(&con->peer_addr.in_addr),
+ ceph_pr_addr(&con->peer_addr),
req_feat, server_feat, req_feat & ~server_feat);
con->error_msg = "missing required protocol features";
reset_connection(con);
if ((s64)seq - (s64)con->in_seq < 1) {
pr_info("skipping %s%lld %s seq %lld expected %lld\n",
ENTITY_NAME(con->peer_name),
- ceph_pr_addr(&con->peer_addr.in_addr),
+ ceph_pr_addr(&con->peer_addr),
seq, con->in_seq + 1);
con->in_base_pos = -front_len - middle_len - data_len -
sizeof_footer(con);
static void con_fault(struct ceph_connection *con)
{
dout("fault %p state %lu to peer %s\n",
- con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
+ con, con->state, ceph_pr_addr(&con->peer_addr));
pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
- ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
+ ceph_pr_addr(&con->peer_addr), con->error_msg);
con->error_msg = NULL;
WARN_ON(con->state != CON_STATE_CONNECTING &&
m->num_mon);
for (i = 0; i < m->num_mon; i++)
dout("monmap_decode mon%d is %s\n", i,
- ceph_pr_addr(&m->mon_inst[i].addr.in_addr));
+ ceph_pr_addr(&m->mon_inst[i].addr));
return m;
bad:
{
if (!monc->hunting)
pr_info("mon%d %s session lost, hunting for new mon\n",
- monc->cur_mon, ceph_pr_addr(&monc->con.peer_addr.in_addr));
+ monc->cur_mon, ceph_pr_addr(&monc->con.peer_addr));
__close_session(monc);
__open_session(monc);
__resend_generic_request(monc);
pr_info("mon%d %s session established\n", monc->cur_mon,
- ceph_pr_addr(&monc->con.peer_addr.in_addr));
+ ceph_pr_addr(&monc->con.peer_addr));
}
out:
dout("%s %s%llu cookie %llu addr %s\n", __func__,
ENTITY_NAME(item->name), item->cookie,
- ceph_pr_addr(&item->addr.in_addr));
+ ceph_pr_addr(&item->addr));
return 0;
}
* @options: Request options (or NULL if no options)
* @_result: Where to place the returned data (or NULL)
* @_expiry: Where to store the result expiry time (or NULL)
+ * @invalidate: Always invalidate the key after use
*
* The data will be returned in the pointer at *result, if provided, and the
* caller is responsible for freeing it.
* Returns the size of the result on success, -ve error code otherwise.
*/
int dns_query(const char *type, const char *name, size_t namelen,
- const char *options, char **_result, time64_t *_expiry)
+ const char *options, char **_result, time64_t *_expiry,
+ bool invalidate)
{
struct key *rkey;
struct user_key_payload *upayload;
ret = len;
put:
up_read(&rkey->sem);
+ if (invalidate)
+ key_invalidate(rkey);
key_put(rkey);
out:
kleave(" = %d", ret);
* @gfp: The allocation constraints
* @notify_rx: Where to send notifications instead of socket queue
* @upgrade: Request service upgrade for call
+ * @intr: The call is interruptible
* @debug_id: The debug ID for tracing to be assigned to the call
*
* Allow a kernel service to begin a call on the nominated socket. This just
gfp_t gfp,
rxrpc_notify_rx_t notify_rx,
bool upgrade,
+ bool intr,
unsigned int debug_id)
{
struct rxrpc_conn_parameters cp;
memset(&p, 0, sizeof(p));
p.user_call_ID = user_call_ID;
p.tx_total_len = tx_total_len;
+ p.intr = intr;
memset(&cp, 0, sizeof(cp));
cp.local = rx->local;
}
EXPORT_SYMBOL(rxrpc_kernel_new_call_notification);
+/**
+ * rxrpc_kernel_set_max_life - Set maximum lifespan on a call
+ * @sock: The socket the call is on
+ * @call: The call to configure
+ * @hard_timeout: The maximum lifespan of the call in jiffies
+ *
+ * Set the maximum lifespan of a call. The call will end with ETIME or
+ * ETIMEDOUT if it takes longer than this.
+ */
+void rxrpc_kernel_set_max_life(struct socket *sock, struct rxrpc_call *call,
+ unsigned long hard_timeout)
+{
+ unsigned long now;
+
+ mutex_lock(&call->user_mutex);
+
+ now = jiffies;
+ hard_timeout += now;
+ WRITE_ONCE(call->expect_term_by, hard_timeout);
+ rxrpc_reduce_call_timer(call, hard_timeout, now, rxrpc_timer_set_for_hard);
+
+ mutex_unlock(&call->user_mutex);
+}
+EXPORT_SYMBOL(rxrpc_kernel_set_max_life);
+
/*
* connect an RxRPC socket
* - this just targets it at a specific destination; no actual connection
RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */
RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */
RXRPC_CALL_RX_UNDERRUN, /* Got data underrun */
+ RXRPC_CALL_IS_INTR, /* The call is interruptible */
};
/*
u32 normal; /* Max time since last call packet (msec) */
} timeouts;
u8 nr_timeouts; /* Number of timeouts specified */
+ bool intr; /* The call is interruptible */
};
struct rxrpc_send_params {
return call;
}
+ if (p->intr)
+ __set_bit(RXRPC_CALL_IS_INTR, &call->flags);
call->tx_total_len = p->tx_total_len;
trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage),
here, (const void *)p->user_call_ID);
add_wait_queue_exclusive(&call->waitq, &myself);
for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
+ if (test_bit(RXRPC_CALL_IS_INTR, &call->flags))
+ set_current_state(TASK_INTERRUPTIBLE);
+ else
+ set_current_state(TASK_UNINTERRUPTIBLE);
if (call->call_id)
break;
- if (signal_pending(current)) {
+ if (test_bit(RXRPC_CALL_IS_INTR, &call->flags) &&
+ signal_pending(current)) {
ret = -ERESTARTSYS;
break;
}
if (call->state >= RXRPC_CALL_COMPLETE)
return call->error;
- if (timeout == 0 &&
+ if (test_bit(RXRPC_CALL_IS_INTR, &call->flags) &&
+ timeout == 0 &&
tx_win == tx_start && signal_pending(current))
return -EINTR;
.call.tx_total_len = -1,
.call.user_call_ID = 0,
.call.nr_timeouts = 0,
+ .call.intr = true,
.abort_code = 0,
.command = RXRPC_CMD_SEND_DATA,
.exclusive = false,
- To skip validation of a file, add
- OBJECT_FILES_NON_STANDARD_filename.o := n
+ OBJECT_FILES_NON_STANDARD_filename.o := y
to the Makefile.
#include <linux/hashtable.h>
#include <linux/kernel.h>
+#define FAKE_JUMP_OFFSET -1
+
struct alternative {
struct list_head list;
struct instruction *insn;
insn->type != INSN_JUMP_UNCONDITIONAL)
continue;
- if (insn->ignore)
+ if (insn->ignore || insn->offset == FAKE_JUMP_OFFSET)
continue;
rela = find_rela_by_dest_range(insn->sec, insn->offset,
clear_insn_state(&fake_jump->state);
fake_jump->sec = special_alt->new_sec;
- fake_jump->offset = -1;
+ fake_jump->offset = FAKE_JUMP_OFFSET;
fake_jump->type = INSN_JUMP_UNCONDITIONAL;
fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
- fake_jump->ignore = true;
+ fake_jump->func = orig_insn->func;
}
if (!special_alt->new_len) {
return 1;
}
- func = insn->func ? insn->func->pfunc : NULL;
+ if (insn->func)
+ func = insn->func->pfunc;
if (func && insn->ignore) {
WARN_FUNC("BUG: why am I validating an ignored function?",
-kselftest
gpiogpio-event-mon
gpiogpio-hammer
gpioinclude/
override MAKEFLAGS =
endif
+# Append kselftest to KBUILD_OUTPUT to avoid cluttering
+# KBUILD_OUTPUT with selftest objects and headers installed
+# by selftests Makefile or lib.mk.
ifneq ($(KBUILD_SRC),)
override LDFLAGS =
endif
BUILD := $(O)
else
ifneq ($(KBUILD_OUTPUT),)
- BUILD := $(KBUILD_OUTPUT)
+ BUILD := $(KBUILD_OUTPUT)/kselftest
else
BUILD := $(shell pwd)
DEFAULT_INSTALL_HDR_PATH := 1
endif
endif
-# KSFT_TAP_LEVEL is used from KSFT framework to prevent nested TAP header
-# printing from tests. Applicable to run_tests case where run_tests adds
-# TAP header prior running tests and when a test program invokes another
-# with system() call. Export it here to cover override RUN_TESTS defines.
-export KSFT_TAP_LEVEL=`echo 1`
-
# Prepare for headers install
top_srcdir ?= ../../..
include $(top_srcdir)/scripts/subarch.include
run_pstore_crash:
make -C pstore run_crash
-INSTALL_PATH ?= install
+# Use $BUILD as the default install root. $BUILD points to the
+# right output location for the following cases:
+# 1. output_dir=kernel_src
+# 2. a separate output directory is specified using O= KBUILD_OUTPUT
+# 3. a separate output directory is specified using KBUILD_OUTPUT
+#
+INSTALL_PATH ?= $(BUILD)/install
INSTALL_PATH := $(abspath $(INSTALL_PATH))
ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh
-install:
+install: all
ifdef INSTALL_PATH
@# Ask all targets to install their files
- mkdir -p $(INSTALL_PATH)
+ mkdir -p $(INSTALL_PATH)/kselftest
+ install -m 744 kselftest/runner.sh $(INSTALL_PATH)/kselftest/
+ install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/
@for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
echo "#!/bin/sh" > $(ALL_SCRIPT)
echo "BASE_DIR=\$$(realpath \$$(dirname \$$0))" >> $(ALL_SCRIPT)
echo "cd \$$BASE_DIR" >> $(ALL_SCRIPT)
+ echo ". ./kselftest/runner.sh" >> $(ALL_SCRIPT)
echo "ROOT=\$$PWD" >> $(ALL_SCRIPT)
echo "if [ \"\$$1\" = \"--summary\" ]; then" >> $(ALL_SCRIPT)
- echo " OUTPUT=\$$BASE_DIR/output.log" >> $(ALL_SCRIPT)
- echo " cat /dev/null > \$$OUTPUT" >> $(ALL_SCRIPT)
- echo "else" >> $(ALL_SCRIPT)
- echo " OUTPUT=/dev/stdout" >> $(ALL_SCRIPT)
+ echo " logfile=\$$BASE_DIR/output.log" >> $(ALL_SCRIPT)
+ echo " cat /dev/null > \$$logfile" >> $(ALL_SCRIPT)
echo "fi" >> $(ALL_SCRIPT)
- echo "export KSFT_TAP_LEVEL=1" >> $(ALL_SCRIPT)
- echo "export skip=4" >> $(ALL_SCRIPT)
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
- echo "echo ; echo TAP version 13" >> $(ALL_SCRIPT); \
- echo "echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \
- echo "echo ========================================" >> $(ALL_SCRIPT); \
echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \
echo "cd $$TARGET" >> $(ALL_SCRIPT); \
+ echo -n "run_many" >> $(ALL_SCRIPT); \
make -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
+ echo "" >> $(ALL_SCRIPT); \
echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
done;
#include "../kselftest.h"
+#define COUNT_ISN_BPS 4
+#define COUNT_WPS 4
/* Breakpoint access modes */
enum {
if (!local && !global)
continue;
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < COUNT_ISN_BPS; i++) {
dummy_funcs[i]();
check_trapped();
}
{
int i;
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < COUNT_ISN_BPS; i++) {
set_breakpoint_addr(dummy_funcs[i], i);
toggle_breakpoint(i, BP_X, 1, local, global, 1);
ptrace(PTRACE_CONT, child_pid, NULL, 0);
else
mode_str = "read";
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < COUNT_WPS; i++) {
set_breakpoint_addr(&dummy_var[i], i);
toggle_breakpoint(i, mode, len, local, global, 1);
ptrace(PTRACE_CONT, child_pid, NULL, 0);
static void launch_tests(void)
{
char buf[1024];
+ unsigned int tests = 0;
int len, local, global, i;
+ tests += 3 * COUNT_ISN_BPS;
+ tests += sizeof(long) / 2 * 3 * COUNT_WPS;
+ tests += sizeof(long) / 2 * 3 * COUNT_WPS;
+ tests += 2;
+ ksft_set_plan(tests);
+
/* Instruction breakpoints */
for (local = 0; local < 2; local++) {
for (global = 0; global < 2; global++) {
return false;
}
-static bool run_test(int wr_size, int wp_size, int wr, int wp)
+static bool arun_test(int wr_size, int wp_size, int wr, int wp)
{
int status;
siginfo_t siginfo;
bool result;
ksft_print_header();
+ ksft_set_plan(213);
act.sa_handler = sigalrm;
sigemptyset(&act.sa_mask);
int opt;
bool do_suspend = true;
bool succeeded = true;
+ unsigned int tests = 0;
cpu_set_t available_cpus;
int err;
int cpu;
}
}
+ for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+ if (!CPU_ISSET(cpu, &available_cpus))
+ continue;
+ tests++;
+ }
+ ksft_set_plan(tests);
+
if (do_suspend)
suspend();
{
char *tmp1, *tmp2, *our_path;
- ksft_print_header();
-
/* Find our path */
tmp1 = strdup(argv[0]);
if (!tmp1)
mpid = getpid();
if (fork_wait()) {
+ ksft_print_header();
+ ksft_set_plan(12);
ksft_print_msg("[RUN]\t+++ Tests with uid == 0 +++\n");
return do_tests(0, our_path);
}
ksft_print_msg("==================================================\n");
if (fork_wait()) {
+ ksft_print_header();
+ ksft_set_plan(9);
ksft_print_msg("[RUN]\t+++ Tests with uid != 0 +++\n");
return do_tests(1, our_path);
}
--- /dev/null
+/dma-buf/udmabuf
}
ksft_print_header();
+ ksft_set_plan(1);
ksft_print_msg("%s: Test requeue functionality\n", basename(argv[0]));
ksft_print_msg(
"\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
}
ksft_print_header();
+ ksft_set_plan(1);
ksft_print_msg("%s: Detect mismatched requeue_pi operations\n",
basename(argv[0]));
}
ksft_print_header();
+ ksft_set_plan(1);
ksft_print_msg("%s: Test signal handling during requeue_pi\n",
basename(argv[0]));
ksft_print_msg("\tArguments: <none>\n");
}
ksft_print_header();
+ ksft_set_plan(1);
ksft_print_msg(
"%s: Test the futex value of private file mappings in FUTEX_WAIT\n",
basename(argv[0]));
}
ksft_print_header();
+ ksft_set_plan(1);
ksft_print_msg("%s: Block on a futex and wait for timeout\n",
basename(argv[0]));
ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
}
ksft_print_header();
+ ksft_set_plan(1);
ksft_print_msg("%s: Test the uninitialized futex value in FUTEX_WAIT\n",
basename(argv[0]));
}
ksft_print_header();
+ ksft_set_plan(1);
ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n",
basename(argv[0]));
};
static struct ksft_count ksft_cnt;
+static unsigned int ksft_plan;
static inline int ksft_test_num(void)
{
printf("TAP version 13\n");
}
+static inline void ksft_set_plan(unsigned int plan)
+{
+ ksft_plan = plan;
+ printf("1..%d\n", ksft_plan);
+}
+
static inline void ksft_print_cnts(void)
{
- printf("Pass %d Fail %d Xfail %d Xpass %d Skip %d Error %d\n",
+ if (ksft_plan != ksft_test_num())
+ printf("# Planned tests != run tests (%u != %u)\n",
+ ksft_plan, ksft_test_num());
+ printf("# Pass %d Fail %d Xfail %d Xpass %d Skip %d Error %d\n",
ksft_cnt.ksft_pass, ksft_cnt.ksft_fail,
ksft_cnt.ksft_xfail, ksft_cnt.ksft_xpass,
ksft_cnt.ksft_xskip, ksft_cnt.ksft_error);
- printf("1..%d\n", ksft_test_num());
}
static inline void ksft_print_msg(const char *msg, ...)
ksft_cnt.ksft_xskip++;
va_start(args, msg);
- printf("ok %d # skip ", ksft_test_num());
+ printf("not ok %d # SKIP ", ksft_test_num());
vprintf(msg, args);
va_end(args);
}
va_list args;
va_start(args, msg);
- printf("1..%d # Skipped: ", ksft_test_num());
+ printf("not ok %d # SKIP ", 1 + ksft_test_num());
vprintf(msg, args);
va_end(args);
} else {
--- /dev/null
+#!/usr/bin/perl
+# SPDX-License-Identifier: GPL-2.0
+# Prefix all lines with "# ", unbuffered. Command being piped in may need
+# to have unbuffering forced with "stdbuf -i0 -o0 -e0 $cmd".
+use strict;
+
+binmode STDIN;
+binmode STDOUT;
+
+STDOUT->autoflush(1);
+
+my $needed = 1;
+while (1) {
+ my $char;
+ my $bytes = sysread(STDIN, $char, 1);
+ exit 0 if ($bytes == 0);
+ if ($needed) {
+ print "# ";
+ $needed = 0;
+ }
+ print $char;
+ $needed = 1 if ($char eq "\n");
+}
--- /dev/null
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Runs a set of tests in a given subdirectory.
+export skip_rc=4
+export logfile=/dev/stdout
+export per_test_logging=
+
+# There isn't a shell-agnostic way to find the path of a sourced file,
+# so we must rely on BASE_DIR being set to find other tools.
+if [ -z "$BASE_DIR" ]; then
+ echo "Error: BASE_DIR must be set before sourcing." >&2
+ exit 1
+fi
+
+# If Perl is unavailable, we must fall back to line-at-a-time prefixing
+# with sed instead of unbuffered output.
+tap_prefix()
+{
+ if [ ! -x /usr/bin/perl ]; then
+ sed -e 's/^/# /'
+ else
+ "$BASE_DIR"/kselftest/prefix.pl
+ fi
+}
+
+# If stdbuf is unavailable, we must fall back to line-at-a-time piping.
+tap_unbuffer()
+{
+ if ! which stdbuf >/dev/null ; then
+ "$@"
+ else
+ stdbuf -i0 -o0 -e0 "$@"
+ fi
+}
+
+run_one()
+{
+ DIR="$1"
+ TEST="$2"
+ NUM="$3"
+
+ BASENAME_TEST=$(basename $TEST)
+
+ TEST_HDR_MSG="selftests: $DIR: $BASENAME_TEST"
+ echo "# $TEST_HDR_MSG"
+ if [ ! -x "$TEST" ]; then
+ echo -n "# Warning: file $TEST is "
+ if [ ! -e "$TEST" ]; then
+ echo "missing!"
+ else
+ echo "not executable, correct this."
+ fi
+ echo "not ok $test_num $TEST_HDR_MSG"
+ else
+ cd `dirname $TEST` > /dev/null
+ (((((tap_unbuffer ./$BASENAME_TEST 2>&1; echo $? >&3) |
+ tap_prefix >&4) 3>&1) |
+ (read xs; exit $xs)) 4>>"$logfile" &&
+ echo "ok $test_num $TEST_HDR_MSG") ||
+ (if [ $? -eq $skip_rc ]; then \
+ echo "not ok $test_num $TEST_HDR_MSG # SKIP"
+ else
+ echo "not ok $test_num $TEST_HDR_MSG"
+ fi)
+ cd - >/dev/null
+ fi
+}
+
+run_many()
+{
+ echo "TAP version 13"
+ DIR=$(basename "$PWD")
+ test_num=0
+ total=$(echo "$@" | wc -w)
+ echo "1..$total"
+ for TEST in "$@"; do
+ BASENAME_TEST=$(basename $TEST)
+ test_num=$(( test_num + 1 ))
+ if [ -n "$per_test_logging" ]; then
+ logfile="/tmp/$BASENAME_TEST"
+ cat /dev/null > "$logfile"
+ fi
+ run_one "$DIR" "$TEST" "$test_num"
+ done
+}
CC := $(CROSS_COMPILE)gcc
ifeq (0,$(MAKELEVEL))
- ifneq ($(O),)
- OUTPUT := $(O)
- else
- ifneq ($(KBUILD_OUTPUT),)
- OUTPUT := $(KBUILD_OUTPUT)
- else
- OUTPUT := $(shell pwd)
- DEFAULT_INSTALL_HDR_PATH := 1
- endif
+ ifeq ($(OUTPUT),)
+ OUTPUT := $(shell pwd)
+ DEFAULT_INSTALL_HDR_PATH := 1
endif
endif
+selfdir = $(realpath $(dir $(filter %/lib.mk,$(MAKEFILE_LIST))))
# The following are built by lib.mk common compile rules.
# TEST_CUSTOM_PROGS should be used by tests that require
endif
.ONESHELL:
-define RUN_TEST_PRINT_RESULT
- TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST"; \
- echo $$TEST_HDR_MSG; \
- echo "========================================"; \
- if [ ! -x $$TEST ]; then \
- echo "$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.";\
- echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \
- else \
- cd `dirname $$TEST` > /dev/null; \
- if [ "X$(summary)" != "X" ]; then \
- (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && \
- echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") || \
- (if [ $$? -eq $$skip ]; then \
- echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]"; \
- else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \
- fi;) \
- else \
- (./$$BASENAME_TEST && \
- echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") || \
- (if [ $$? -eq $$skip ]; then \
- echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]"; \
- else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \
- fi;) \
- fi; \
- cd - > /dev/null; \
- fi;
-endef
-
define RUN_TESTS
- @export KSFT_TAP_LEVEL=`echo 1`; \
- test_num=`echo 0`; \
- skip=`echo 4`; \
- echo "TAP version 13"; \
- for TEST in $(1); do \
- BASENAME_TEST=`basename $$TEST`; \
- test_num=`echo $$test_num+1 | bc`; \
- $(call RUN_TEST_PRINT_RESULT,$(TEST),$(BASENAME_TEST),$(test_num),$(skip)) \
- done;
+ @BASE_DIR="$(selfdir)"; \
+ . $(selfdir)/kselftest/runner.sh; \
+ if [ "X$(summary)" != "X" ]; then \
+ per_test_logging=1; \
+ fi; \
+ run_many $(1)
endef
run_tests: all
$(error Error: set INSTALL_PATH to use install)
endif
-define EMIT_TESTS
- @test_num=`echo 0`; \
+emit_tests:
for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
BASENAME_TEST=`basename $$TEST`; \
- test_num=`echo $$test_num+1 | bc`; \
- TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST"; \
- echo "echo $$TEST_HDR_MSG"; \
- if [ ! -x $$TEST ]; then \
- echo "echo \"$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.\""; \
- echo "echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\""; \
- else
- echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"ok 1..$$test_num $$TEST_HDR_MSG [PASS]\") || (if [ \$$? -eq \$$skip ]; then echo \"not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]\"; else echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\"; fi;)"; \
- fi; \
- done;
-endef
-
-emit_tests:
- $(EMIT_TESTS)
+ echo " \\"; \
+ echo -n " \"$$BASENAME_TEST\""; \
+ done; \
# define if isn't already. It is undefined in make O= case.
ifeq ($(RM),)
int main(int argc, char **argv)
{
ksft_print_header();
+ ksft_set_plan(13);
test_membarrier_query();
test_membarrier();
--- /dev/null
+pidfd_test
int main(int argc, char **argv)
{
ksft_print_header();
+ ksft_set_plan(4);
test_pidfd_send_signal_syscall_support();
test_pidfd_send_signal_simple_success();
# SPDX-License-Identifier: GPL-2.0+ OR MIT
-CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./
+
+ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
+CLANG_FLAGS += -no-integrated-as
+endif
+
+CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ \
+ $(CLANG_FLAGS)
LDLIBS += -lpthread
# Own dependencies because we only want to build against 1st prerequisite, but
* (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*/
-#define RSEQ_SIG 0x53053053
+/*
+ * RSEQ_SIG uses the udf A32 instruction with an uncommon immediate operand
+ * value 0x5de3. This traps if user-space reaches this instruction by mistake,
+ * and the uncommon operand ensures the kernel does not move the instruction
+ * pointer to attacker-controlled code on rseq abort.
+ *
+ * The instruction pattern in the A32 instruction set is:
+ *
+ * e7f5def3 udf #24035 ; 0x5de3
+ *
+ * This translates to the following instruction pattern in the T16 instruction
+ * set:
+ *
+ * little endian:
+ * def3 udf #243 ; 0xf3
+ * e7f5 b.n <7f5>
+ *
+ * pre-ARMv6 big endian code:
+ * e7f5 b.n <7f5>
+ * def3 udf #243 ; 0xf3
+ *
+ * ARMv6+ -mbig-endian generates mixed endianness code vs data: little-endian
+ * code and big-endian data. Ensure the RSEQ_SIG data signature matches code
+ * endianness. Prior to ARMv6, -mbig-endian generates big-endian code and data
+ * (which match), so there is no need to reverse the endianness of the data
+ * representation of the signature. However, the choice between BE32 and BE8
+ * is done by the linker, so we cannot know whether code and data endianness
+ * will be mixed before the linker is invoked.
+ */
+
+#define RSEQ_SIG_CODE 0xe7f5def3
+
+#ifndef __ASSEMBLER__
+
+#define RSEQ_SIG_DATA \
+ ({ \
+ int sig; \
+ asm volatile ("b 2f\n\t" \
+ "1: .inst " __rseq_str(RSEQ_SIG_CODE) "\n\t" \
+ "2:\n\t" \
+ "ldr %[sig], 1b\n\t" \
+ : [sig] "=r" (sig)); \
+ sig; \
+ })
+
+#define RSEQ_SIG RSEQ_SIG_DATA
+
+#endif
#define rseq_smp_mb() __asm__ __volatile__ ("dmb" ::: "memory", "cc")
#define rseq_smp_rmb() __asm__ __volatile__ ("dmb" ::: "memory", "cc")
#include "rseq-skip.h"
#else /* !RSEQ_SKIP_FASTPATH */
-#define __RSEQ_ASM_DEFINE_TABLE(version, flags, start_ip, \
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \
post_commit_offset, abort_ip) \
- ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".pushsection __rseq_cs, \"aw\"\n\t" \
".balign 32\n\t" \
+ __rseq_str(label) ":\n\t" \
".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
+ ".popsection\n\t" \
+ ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \
+ ".word " __rseq_str(label) "b, 0x0\n\t" \
".popsection\n\t"
-#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip) \
- __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
(post_commit_ip - start_ip), abort_ip)
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \
+ ".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(exit_ip) ", 0x0\n\t" \
+ ".popsection\n\t"
+
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
RSEQ_INJECT_ASM(1) \
"adr r0, " __rseq_str(cs_label) "\n\t" \
__rseq_str(table_label) ":\n\t" \
".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
- ".word " __rseq_str(RSEQ_SIG) "\n\t" \
+ ".arm\n\t" \
+ ".inst " __rseq_str(RSEQ_SIG_CODE) "\n\t" \
__rseq_str(label) ":\n\t" \
teardown \
"b %l[" __rseq_str(abort_label) "]\n\t"
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
"str %[src], %[rseq_scratch0]\n\t"
"str %[dst], %[rseq_scratch1]\n\t"
"str %[len], %[rseq_scratch2]\n\t"
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
"str %[src], %[rseq_scratch0]\n\t"
"str %[dst], %[rseq_scratch1]\n\t"
"str %[len], %[rseq_scratch2]\n\t"
* (C) Copyright 2018 - Will Deacon <will.deacon@arm.com>
*/
-#define RSEQ_SIG 0xd428bc00 /* BRK #0x45E0 */
+/*
+ * aarch64 -mbig-endian generates mixed endianness code vs data:
+ * little-endian code and big-endian data. Ensure the RSEQ_SIG signature
+ * matches code endianness.
+ */
+#define RSEQ_SIG_CODE 0xd428bc00 /* BRK #0x45E0. */
+
+#ifdef __AARCH64EB__
+#define RSEQ_SIG_DATA 0x00bc28d4 /* BRK #0x45E0. */
+#else
+#define RSEQ_SIG_DATA RSEQ_SIG_CODE
+#endif
+
+#define RSEQ_SIG RSEQ_SIG_DATA
#define rseq_smp_mb() __asm__ __volatile__ ("dmb ish" ::: "memory")
#define rseq_smp_rmb() __asm__ __volatile__ ("dmb ishld" ::: "memory")
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \
post_commit_offset, abort_ip) \
- " .pushsection __rseq_table, \"aw\"\n" \
+ " .pushsection __rseq_cs, \"aw\"\n" \
" .balign 32\n" \
__rseq_str(label) ":\n" \
" .long " __rseq_str(version) ", " __rseq_str(flags) "\n" \
" .quad " __rseq_str(start_ip) ", " \
__rseq_str(post_commit_offset) ", " \
__rseq_str(abort_ip) "\n" \
+ " .popsection\n\t" \
+ " .pushsection __rseq_cs_ptr_array, \"aw\"\n" \
+ " .quad " __rseq_str(label) "b\n" \
" .popsection\n"
#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
(post_commit_ip - start_ip), abort_ip)
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ " .pushsection __rseq_exit_point_array, \"aw\"\n" \
+ " .quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n" \
+ " .popsection\n"
+
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
RSEQ_INJECT_ASM(1) \
" adrp " RSEQ_ASM_TMP_REG ", " __rseq_str(cs_label) "\n" \
#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \
" b 222f\n" \
- " .inst " __rseq_str(RSEQ_SIG) "\n" \
+ " .inst " __rseq_str(RSEQ_SIG_CODE) "\n" \
__rseq_str(label) ":\n" \
" b %l[" __rseq_str(abort_label) "]\n" \
"222:\n"
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
+#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
+#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
+#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
+#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error3])
+#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
+#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
+#endif
RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
RSEQ_INJECT_ASM(3)
* (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*/
-#define RSEQ_SIG 0x53053053
+/*
+ * RSEQ_SIG uses the break instruction. The instruction pattern is:
+ *
+ * On MIPS:
+ * 0350000d break 0x350
+ *
+ * On nanoMIPS:
+ * 00100350 break 0x350
+ *
+ * On microMIPS:
+ * 0000d407 break 0x350
+ *
+ * For nanoMIPS32 and microMIPS, the instruction stream is encoded as 16-bit
+ * halfwords, so the signature halfwords need to be swapped accordingly for
+ * little-endian.
+ */
+#if defined(__nanomips__)
+# ifdef __MIPSEL__
+# define RSEQ_SIG 0x03500010
+# else
+# define RSEQ_SIG 0x00100350
+# endif
+#elif defined(__mips_micromips)
+# ifdef __MIPSEL__
+# define RSEQ_SIG 0xd4070000
+# else
+# define RSEQ_SIG 0x0000d407
+# endif
+#elif defined(__mips__)
+# define RSEQ_SIG 0x0350000d
+#else
+/* Unknown MIPS architecture. */
+#endif
#define rseq_smp_mb() __asm__ __volatile__ ("sync" ::: "memory")
#define rseq_smp_rmb() rseq_smp_mb()
# error unsupported _MIPS_SZLONG
#endif
-#define __RSEQ_ASM_DEFINE_TABLE(version, flags, start_ip, \
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \
post_commit_offset, abort_ip) \
- ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".pushsection __rseq_cs, \"aw\"\n\t" \
".balign 32\n\t" \
+ __rseq_str(label) ":\n\t" \
".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \
LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \
LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \
+ ".popsection\n\t" \
+ ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \
+ LONG " " U32_U64_PAD(__rseq_str(label) "b") "\n\t" \
".popsection\n\t"
-#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip) \
- __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
(post_commit_ip - start_ip), abort_ip)
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \
+ LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \
+ LONG " " U32_U64_PAD(__rseq_str(exit_ip)) "\n\t" \
+ ".popsection\n\t"
+
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
RSEQ_INJECT_ASM(1) \
LONG_LA " $4, " __rseq_str(cs_label) "\n\t" \
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
LONG_S " %[src], %[rseq_scratch0]\n\t"
LONG_S " %[dst], %[rseq_scratch1]\n\t"
LONG_S " %[len], %[rseq_scratch2]\n\t"
rseq_workaround_gcc_asm_size_guess();
__asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
LONG_S " %[src], %[rseq_scratch0]\n\t"
LONG_S " %[dst], %[rseq_scratch1]\n\t"
LONG_S " %[len], %[rseq_scratch2]\n\t"
* (C) Copyright 2016-2018 - Boqun Feng <boqun.feng@gmail.com>
*/
-#define RSEQ_SIG 0x53053053
+/*
+ * RSEQ_SIG is used with the following trap instruction:
+ *
+ * powerpc-be: 0f e5 00 0b twui r5,11
+ * powerpc64-le: 0b 00 e5 0f twui r5,11
+ * powerpc64-be: 0f e5 00 0b twui r5,11
+ */
+
+#define RSEQ_SIG 0x0fe5000b
#define rseq_smp_mb() __asm__ __volatile__ ("sync" ::: "memory", "cc")
#define rseq_smp_lwsync() __asm__ __volatile__ ("lwsync" ::: "memory", "cc")
#else /* !RSEQ_SKIP_FASTPATH */
/*
- * The __rseq_table section can be used by debuggers to better handle
- * single-stepping through the restartable critical sections.
+ * The __rseq_cs_ptr_array and __rseq_cs sections can be used by debuggers to
+ * better handle single-stepping through the restartable critical sections.
*/
#ifdef __PPC64__
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
start_ip, post_commit_offset, abort_ip) \
- ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".pushsection __rseq_cs, \"aw\"\n\t" \
".balign 32\n\t" \
__rseq_str(label) ":\n\t" \
".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t" \
+ ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \
+ ".quad " __rseq_str(label) "b\n\t" \
".popsection\n\t"
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
"std %%r17, %[" __rseq_str(rseq_cs) "]\n\t" \
__rseq_str(label) ":\n\t"
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \
+ ".quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n\t" \
+ ".popsection\n\t"
+
#else /* #ifdef __PPC64__ */
#define STORE_WORD "stw "
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
start_ip, post_commit_offset, abort_ip) \
- ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".pushsection __rseq_cs, \"aw\"\n\t" \
".balign 32\n\t" \
__rseq_str(label) ":\n\t" \
".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
/* 32-bit only supported on BE */ \
".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t" \
+ ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \
+ ".long 0x0, " __rseq_str(label) "b\n\t" \
+ ".popsection\n\t"
+
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \
+ /* 32-bit only supported on BE */ \
+ ".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(exit_ip) "\n\t" \
".popsection\n\t"
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
/* cmp cpuid */
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
/* cmp cpuid */
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
/* cmp cpuid */
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
/* cmp cpuid */
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
/* cmp cpuid */
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
/* cmp cpuid */
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* setup for mempcy */
"mr %%r19, %[len]\n\t"
"mr %%r20, %[src]\n\t"
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* setup for mempcy */
"mr %%r19, %[len]\n\t"
"mr %%r20, %[src]\n\t"
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
start_ip, post_commit_offset, abort_ip) \
- ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".pushsection __rseq_cs, \"aw\"\n\t" \
".balign 32\n\t" \
__rseq_str(label) ":\n\t" \
".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t" \
+ ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \
+ ".quad " __rseq_str(label) "b\n\t" \
+ ".popsection\n\t"
+
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \
+ ".quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n\t" \
".popsection\n\t"
#elif __s390__
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
start_ip, post_commit_offset, abort_ip) \
- ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".pushsection __rseq_cs, \"aw\"\n\t" \
".balign 32\n\t" \
__rseq_str(label) ":\n\t" \
".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t" \
+ ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \
+ ".long 0x0, " __rseq_str(label) "b\n\t" \
+ ".popsection\n\t"
+
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \
+ ".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(exit_ip) "\n\t" \
".popsection\n\t"
#define LONG_L "l"
".long " __rseq_str(RSEQ_SIG) "\n\t" \
__rseq_str(label) ":\n\t" \
teardown \
- "j %l[" __rseq_str(abort_label) "]\n\t" \
+ "jg %l[" __rseq_str(abort_label) "]\n\t" \
".popsection\n\t"
#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \
".pushsection __rseq_failure, \"ax\"\n\t" \
__rseq_str(label) ":\n\t" \
teardown \
- "j %l[" __rseq_str(cmpfail_label) "]\n\t" \
+ "jg %l[" __rseq_str(cmpfail_label) "]\n\t" \
".popsection\n\t"
static inline __attribute__((always_inline))
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
LONG_S " %[src], %[rseq_scratch0]\n\t"
LONG_S " %[dst], %[rseq_scratch1]\n\t"
LONG_S " %[len], %[rseq_scratch2]\n\t"
#include <stdint.h>
+/*
+ * RSEQ_SIG is used with the following reserved undefined instructions, which
+ * trap in user-space:
+ *
+ * x86-32: 0f b9 3d 53 30 05 53 ud1 0x53053053,%edi
+ * x86-64: 0f b9 3d 53 30 05 53 ud1 0x53053053(%rip),%edi
+ */
#define RSEQ_SIG 0x53053053
+/*
+ * Due to a compiler optimization bug in gcc-8 with asm goto and TLS asm input
+ * operands, we cannot use "m" input operands, and rather pass the __rseq_abi
+ * address through a "r" input operand.
+ */
+
+/* Offset of cpu_id and rseq_cs fields in struct rseq. */
+#define RSEQ_CPU_ID_OFFSET 4
+#define RSEQ_CS_OFFSET 8
+
#ifdef __x86_64__
#define rseq_smp_mb() \
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
start_ip, post_commit_offset, abort_ip) \
- ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".pushsection __rseq_cs, \"aw\"\n\t" \
".balign 32\n\t" \
__rseq_str(label) ":\n\t" \
".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t" \
+ ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \
+ ".quad " __rseq_str(label) "b\n\t" \
".popsection\n\t"
+
#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
(post_commit_ip - start_ip), abort_ip)
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \
+ ".quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n\t" \
+ ".popsection\n\t"
+
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
RSEQ_INJECT_ASM(1) \
"leaq " __rseq_str(cs_label) "(%%rip), %%rax\n\t" \
- "movq %%rax, %[" __rseq_str(rseq_cs) "]\n\t" \
+ "movq %%rax, " __rseq_str(rseq_cs) "\n\t" \
__rseq_str(label) ":\n\t"
#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
RSEQ_INJECT_ASM(2) \
- "cmpl %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
+ "cmpl %[" __rseq_str(cpu_id) "], " __rseq_str(current_cpu_id) "\n\t" \
"jnz " __rseq_str(label) "\n\t"
#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \
".pushsection __rseq_failure, \"ax\"\n\t" \
- /* Disassembler-friendly signature: nopl <sig>(%rip). */\
- ".byte 0x0f, 0x1f, 0x05\n\t" \
+ /* Disassembler-friendly signature: ud1 <sig>(%rip),%edi. */ \
+ ".byte 0x0f, 0xb9, 0x3d\n\t" \
".long " __rseq_str(RSEQ_SIG) "\n\t" \
__rseq_str(label) ":\n\t" \
teardown \
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpq %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"cmpq %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
#endif
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
[v] "m" (*v),
[expect] "r" (expect),
[newv] "r" (newv)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"movq %[v], %%rbx\n\t"
"cmpq %%rbx, %[expectnot]\n\t"
"je %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"movq %[v], %%rbx\n\t"
"cmpq %%rbx, %[expectnot]\n\t"
"je %l[error2]\n\t"
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[expectnot] "r" (expectnot),
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
#endif
/* final store */
"addq %[count], %[v]\n\t"
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[count] "er" (count)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpq %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"cmpq %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
#endif
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* try store input */
[v2] "m" (*v2),
[newv2] "r" (newv2),
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpq %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(5)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"cmpq %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
"cmpq %[v2], %[expect2]\n\t"
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* cmp2 input */
[v2] "m" (*v2),
[expect2] "r" (expect2),
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
"movq %[src], %[rseq_scratch0]\n\t"
"movq %[dst], %[rseq_scratch1]\n\t"
"movq %[len], %[rseq_scratch2]\n\t"
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpq %[v], %[expect]\n\t"
"jnz 5f\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
"cmpq %[v], %[expect]\n\t"
"jnz 7f\n\t"
#endif
#endif
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[expect] "r" (expect),
*/
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
start_ip, post_commit_offset, abort_ip) \
- ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".pushsection __rseq_cs, \"aw\"\n\t" \
".balign 32\n\t" \
__rseq_str(label) ":\n\t" \
".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
".long " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
+ ".popsection\n\t" \
+ ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" \
+ ".long " __rseq_str(label) "b, 0x0\n\t" \
".popsection\n\t"
#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
(post_commit_ip - start_ip), abort_ip)
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ ".pushsection __rseq_exit_point_array, \"aw\"\n\t" \
+ ".long " __rseq_str(start_ip) ", 0x0, " __rseq_str(exit_ip) ", 0x0\n\t" \
+ ".popsection\n\t"
+
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
RSEQ_INJECT_ASM(1) \
- "movl $" __rseq_str(cs_label) ", %[rseq_cs]\n\t" \
+ "movl $" __rseq_str(cs_label) ", " __rseq_str(rseq_cs) "\n\t" \
__rseq_str(label) ":\n\t"
#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
RSEQ_INJECT_ASM(2) \
- "cmpl %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
+ "cmpl %[" __rseq_str(cpu_id) "], " __rseq_str(current_cpu_id) "\n\t" \
"jnz " __rseq_str(label) "\n\t"
#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \
".pushsection __rseq_failure, \"ax\"\n\t" \
- /* Disassembler-friendly signature: nopl <sig>. */ \
- ".byte 0x0f, 0x1f, 0x05\n\t" \
+ /* Disassembler-friendly signature: ud1 <sig>,%edi. */ \
+ ".byte 0x0f, 0xb9, 0x3d\n\t" \
".long " __rseq_str(RSEQ_SIG) "\n\t" \
__rseq_str(label) ":\n\t" \
teardown \
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpl %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"cmpl %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
#endif
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
[v] "m" (*v),
[expect] "r" (expect),
[newv] "r" (newv)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"movl %[v], %%ebx\n\t"
"cmpl %%ebx, %[expectnot]\n\t"
"je %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"movl %[v], %%ebx\n\t"
"cmpl %%ebx, %[expectnot]\n\t"
"je %l[error2]\n\t"
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[expectnot] "r" (expectnot),
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
#endif
/* final store */
"addl %[count], %[v]\n\t"
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[count] "ir" (count)
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpl %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"cmpl %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
#endif
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* try store input */
[v2] "m" (*v2),
[newv2] "m" (newv2),
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"movl %[expect], %%eax\n\t"
"cmpl %[v], %%eax\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"movl %[expect], %%eax\n\t"
"cmpl %[v], %%eax\n\t"
"jnz %l[error2]\n\t"
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* try store input */
[v2] "m" (*v2),
[newv2] "r" (newv2),
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpl %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(5)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"cmpl %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
"cmpl %[expect2], %[v2]\n\t"
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* cmp2 input */
[v2] "m" (*v2),
[expect2] "r" (expect2),
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
"movl %[src], %[rseq_scratch0]\n\t"
"movl %[dst], %[rseq_scratch1]\n\t"
"movl %[len], %[rseq_scratch2]\n\t"
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"movl %[expect], %%eax\n\t"
"cmpl %%eax, %[v]\n\t"
"jnz 5f\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
"movl %[expect], %%eax\n\t"
"cmpl %%eax, %[v]\n\t"
"jnz 7f\n\t"
#endif
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[expect] "m" (expect),
__asm__ __volatile__ goto (
RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
"movl %[src], %[rseq_scratch0]\n\t"
"movl %[dst], %[rseq_scratch1]\n\t"
"movl %[len], %[rseq_scratch2]\n\t"
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"movl %[expect], %%eax\n\t"
"cmpl %%eax, %[v]\n\t"
"jnz 5f\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
"movl %[expect], %%eax\n\t"
"cmpl %%eax, %[v]\n\t"
"jnz 7f\n\t"
#endif
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[expect] "m" (expect),
#include <syscall.h>
#include <assert.h>
#include <signal.h>
+#include <limits.h>
#include "rseq.h"
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-__attribute__((tls_model("initial-exec"))) __thread
-volatile struct rseq __rseq_abi = {
+__thread volatile struct rseq __rseq_abi = {
.cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
};
-static __attribute__((tls_model("initial-exec"))) __thread
-volatile int refcount;
+/*
+ * Shared with other libraries. This library may take rseq ownership if it is
+ * still 0 when executing the library constructor. Set to 1 by library
+ * constructor when handling rseq. Set to 0 in destructor if handling rseq.
+ */
+int __rseq_handled;
+
+/* Whether this library have ownership of rseq registration. */
+static int rseq_ownership;
+
+static __thread volatile uint32_t __rseq_refcount;
static void signal_off_save(sigset_t *oldset)
{
int rc, ret = 0;
sigset_t oldset;
+ if (!rseq_ownership)
+ return 0;
signal_off_save(&oldset);
- if (refcount++)
+ if (__rseq_refcount == UINT_MAX) {
+ ret = -1;
+ goto end;
+ }
+ if (__rseq_refcount++)
goto end;
rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
if (!rc) {
goto end;
}
if (errno != EBUSY)
- __rseq_abi.cpu_id = -2;
+ __rseq_abi.cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED;
ret = -1;
- refcount--;
+ __rseq_refcount--;
end:
signal_restore(oldset);
return ret;
int rc, ret = 0;
sigset_t oldset;
+ if (!rseq_ownership)
+ return 0;
signal_off_save(&oldset);
- if (--refcount)
+ if (!__rseq_refcount) {
+ ret = -1;
+ goto end;
+ }
+ if (--__rseq_refcount)
goto end;
rc = sys_rseq(&__rseq_abi, sizeof(struct rseq),
RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
if (!rc)
goto end;
+ __rseq_refcount = 1;
ret = -1;
end:
signal_restore(oldset);
}
return cpu;
}
+
+void __attribute__((constructor)) rseq_init(void)
+{
+ /* Check whether rseq is handled by another library. */
+ if (__rseq_handled)
+ return;
+ __rseq_handled = 1;
+ rseq_ownership = 1;
+}
+
+void __attribute__((destructor)) rseq_fini(void)
+{
+ if (!rseq_ownership)
+ return;
+ __rseq_handled = 0;
+ rseq_ownership = 0;
+}
#endif
extern __thread volatile struct rseq __rseq_abi;
+extern int __rseq_handled;
#define rseq_likely(x) __builtin_expect(!!(x), 1)
#define rseq_unlikely(x) __builtin_expect(!!(x), 0)
int err;
ksft_print_header();
+ ksft_set_plan(3);
sigemptyset(&act.sa_mask);
act.sa_flags = SA_ONSTACK | SA_SIGINFO;
int err;
ksft_print_header();
+ ksft_set_plan(3 + 7);
sync_api_supported();